avro_turf 0.7.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +36 -0
  3. data/.github/workflows/ruby.yml +20 -0
  4. data/CHANGELOG.md +29 -0
  5. data/Gemfile +0 -3
  6. data/README.md +54 -16
  7. data/avro_turf.gemspec +13 -2
  8. data/lib/avro_turf.rb +14 -3
  9. data/lib/avro_turf/cached_confluent_schema_registry.rb +39 -0
  10. data/lib/avro_turf/cached_schema_registry.rb +4 -24
  11. data/lib/avro_turf/confluent_schema_registry.rb +106 -0
  12. data/lib/avro_turf/disk_cache.rb +83 -0
  13. data/lib/avro_turf/in_memory_cache.rb +38 -0
  14. data/lib/avro_turf/messaging.rb +77 -9
  15. data/lib/avro_turf/mutable_schema_store.rb +18 -0
  16. data/lib/avro_turf/schema_registry.rb +4 -77
  17. data/lib/avro_turf/schema_store.rb +36 -19
  18. data/lib/avro_turf/schema_to_avro_patch.rb +11 -0
  19. data/lib/avro_turf/test/fake_confluent_schema_registry_server.rb +141 -0
  20. data/lib/avro_turf/test/fake_schema_registry_server.rb +4 -82
  21. data/lib/avro_turf/version.rb +1 -1
  22. data/spec/cached_confluent_schema_registry_spec.rb +63 -0
  23. data/spec/confluent_schema_registry_spec.rb +9 -0
  24. data/spec/disk_cached_confluent_schema_registry_spec.rb +159 -0
  25. data/spec/messaging_spec.rb +208 -19
  26. data/spec/schema_store_spec.rb +36 -0
  27. data/spec/schema_to_avro_patch_spec.rb +42 -0
  28. data/spec/spec_helper.rb +8 -0
  29. data/spec/support/{schema_registry_context.rb → confluent_schema_registry_context.rb} +72 -8
  30. data/spec/test/fake_confluent_schema_registry_server_spec.rb +40 -0
  31. metadata +49 -16
  32. data/circle.yml +0 -4
  33. data/spec/cached_schema_registry_spec.rb +0 -41
  34. data/spec/schema_registry_spec.rb +0 -9
@@ -34,8 +34,19 @@ class AvroTurf
34
34
  end
35
35
  end
36
36
  end
37
+
38
+ module DatumReader
39
+ def read_default_value(field_schema, default_value)
40
+ if default_value == :no_default
41
+ raise Avro::AvroError, "Missing data for #{field_schema} with no default"
42
+ end
43
+
44
+ super
45
+ end
46
+ end
37
47
  end
38
48
  end
39
49
 
40
50
  Avro::Schema::RecordSchema.send(:prepend, AvroTurf::AvroGemPatch::RecordSchema)
41
51
  Avro::Schema::Field.send(:prepend, AvroTurf::AvroGemPatch::Field)
52
+ Avro::IO::DatumReader.send(:prepend, AvroTurf::AvroGemPatch::DatumReader)
@@ -0,0 +1,141 @@
1
+ require 'sinatra/base'
2
+
3
+ class FakeConfluentSchemaRegistryServer < Sinatra::Base
4
+ SUBJECTS = Hash.new { Array.new }
5
+ SCHEMAS = []
6
+ CONFIGS = Hash.new
7
+ SUBJECT_NOT_FOUND = { error_code: 40401, message: 'Subject not found' }.to_json.freeze
8
+ VERSION_NOT_FOUND = { error_code: 40402, message: 'Version not found' }.to_json.freeze
9
+ SCHEMA_NOT_FOUND = { error_code: 40403, message: 'Schema not found' }.to_json.freeze
10
+ DEFAULT_GLOBAL_CONFIG = { 'compatibility' => 'BACKWARD'.freeze }.freeze
11
+
12
+ @global_config = DEFAULT_GLOBAL_CONFIG.dup
13
+
14
+ class << self
15
+ attr_reader :global_config
16
+ end
17
+
18
+ helpers do
19
+ def parse_schema
20
+ request.body.rewind
21
+ JSON.parse(request.body.read).fetch("schema").tap do |schema|
22
+ Avro::Schema.parse(schema)
23
+ end
24
+ end
25
+
26
+ def parse_config
27
+ request.body.rewind
28
+ JSON.parse(request.body.read)
29
+ end
30
+
31
+ def global_config
32
+ self.class.global_config
33
+ end
34
+ end
35
+
36
+ post "/subjects/:subject/versions" do
37
+ schema = parse_schema
38
+ ids_for_subject = SUBJECTS[params[:subject]]
39
+
40
+ schemas_for_subject =
41
+ SCHEMAS.select
42
+ .with_index { |_, i| ids_for_subject.include?(i) }
43
+
44
+ if schemas_for_subject.include?(schema)
45
+ schema_id = SCHEMAS.index(schema)
46
+ else
47
+ SCHEMAS << schema
48
+ schema_id = SCHEMAS.size - 1
49
+ SUBJECTS[params[:subject]] = SUBJECTS[params[:subject]] << schema_id
50
+ end
51
+
52
+ { id: schema_id }.to_json
53
+ end
54
+
55
+ get "/schemas/ids/:schema_id" do
56
+ schema = SCHEMAS.at(params[:schema_id].to_i)
57
+ halt(404, SCHEMA_NOT_FOUND) unless schema
58
+ { schema: schema }.to_json
59
+ end
60
+
61
+ get "/subjects" do
62
+ SUBJECTS.keys.to_json
63
+ end
64
+
65
+ get "/subjects/:subject/versions" do
66
+ schema_ids = SUBJECTS[params[:subject]]
67
+ halt(404, SUBJECT_NOT_FOUND) if schema_ids.empty?
68
+ (1..schema_ids.size).to_a.to_json
69
+ end
70
+
71
+ get "/subjects/:subject/versions/:version" do
72
+ schema_ids = SUBJECTS[params[:subject]]
73
+ halt(404, SUBJECT_NOT_FOUND) if schema_ids.empty?
74
+
75
+ schema_id = if params[:version] == 'latest'
76
+ schema_ids.last
77
+ else
78
+ schema_ids.at(Integer(params[:version]) - 1)
79
+ end
80
+ halt(404, VERSION_NOT_FOUND) unless schema_id
81
+
82
+ schema = SCHEMAS.at(schema_id)
83
+
84
+ {
85
+ name: params[:subject],
86
+ version: schema_ids.index(schema_id) + 1,
87
+ id: schema_id,
88
+ schema: schema
89
+ }.to_json
90
+ end
91
+
92
+ post "/subjects/:subject" do
93
+ schema = parse_schema
94
+
95
+ # Note: this does not actually handle the same schema registered under
96
+ # multiple subjects
97
+ schema_id = SCHEMAS.index(schema)
98
+
99
+ halt(404, SCHEMA_NOT_FOUND) unless schema_id
100
+
101
+ {
102
+ subject: params[:subject],
103
+ id: schema_id,
104
+ version: SUBJECTS[params[:subject]].index(schema_id) + 1,
105
+ schema: schema
106
+ }.to_json
107
+ end
108
+
109
+ post "/compatibility/subjects/:subject/versions/:version" do
110
+ # The ruby avro gem does not yet include a compatibility check between schemas.
111
+ # See https://github.com/apache/avro/pull/170
112
+ raise NotImplementedError
113
+ end
114
+
115
+ get "/config" do
116
+ global_config.to_json
117
+ end
118
+
119
+ put "/config" do
120
+ global_config.merge!(parse_config).to_json
121
+ end
122
+
123
+ get "/config/:subject" do
124
+ CONFIGS.fetch(params[:subject], global_config).to_json
125
+ end
126
+
127
+ put "/config/:subject" do
128
+ config = parse_config
129
+ subject = params[:subject]
130
+ CONFIGS.fetch(subject) do
131
+ CONFIGS[subject] = {}
132
+ end.merge!(config).to_json
133
+ end
134
+
135
+ def self.clear
136
+ SUBJECTS.clear
137
+ SCHEMAS.clear
138
+ CONFIGS.clear
139
+ @global_config = DEFAULT_GLOBAL_CONFIG.dup
140
+ end
141
+ end
@@ -1,84 +1,6 @@
1
- require 'sinatra/base'
1
+ require 'avro_turf/test/fake_confluent_schema_registry_server'
2
2
 
3
- class FakeSchemaRegistryServer < Sinatra::Base
4
- SUBJECTS = Hash.new { Array.new }
5
- SCHEMAS = []
6
- SUBJECT_NOT_FOUND = { error_code: 40401, message: 'Subject not found' }.to_json.freeze
7
- VERSION_NOT_FOUND = { error_code: 40402, message: 'Version not found' }.to_json.freeze
8
- SCHEMA_NOT_FOUND = { error_code: 40403, message: 'Schema not found' }.to_json.freeze
3
+ # FakeSchemaRegistryServer is deprecated and will be removed in a future release.
4
+ # Use FakeConfluentSchemaRegistryServer instead.
9
5
 
10
- helpers do
11
- def parse_schema
12
- request.body.rewind
13
- JSON.parse(request.body.read).fetch("schema").tap do |schema|
14
- Avro::Schema.parse(schema)
15
- end
16
- end
17
- end
18
-
19
- post "/subjects/:subject/versions" do
20
- SCHEMAS << parse_schema
21
-
22
- schema_id = SCHEMAS.size - 1
23
- SUBJECTS[params[:subject]] = SUBJECTS[params[:subject]] << schema_id
24
- { id: schema_id }.to_json
25
- end
26
-
27
- get "/schemas/ids/:schema_id" do
28
- schema = SCHEMAS.at(params[:schema_id].to_i)
29
- halt(404, SCHEMA_NOT_FOUND) unless schema
30
- { schema: schema }.to_json
31
- end
32
-
33
- get "/subjects" do
34
- SUBJECTS.keys.to_json
35
- end
36
-
37
- get "/subjects/:subject/versions" do
38
- schema_ids = SUBJECTS[params[:subject]]
39
- halt(404, SUBJECT_NOT_FOUND) if schema_ids.empty?
40
- (1..schema_ids.size).to_a.to_json
41
- end
42
-
43
- get "/subjects/:subject/versions/:version" do
44
- schema_ids = SUBJECTS[params[:subject]]
45
- halt(404, SUBJECT_NOT_FOUND) if schema_ids.empty?
46
-
47
- schema_id = if params[:version] == 'latest'
48
- schema_ids.last
49
- else
50
- schema_ids.at(Integer(params[:version]) - 1)
51
- end
52
- halt(404, VERSION_NOT_FOUND) unless schema_id
53
-
54
- schema = SCHEMAS.at(schema_id)
55
-
56
- {
57
- name: params[:subject],
58
- version: schema_ids.index(schema_id) + 1,
59
- schema: schema
60
- }.to_json
61
- end
62
-
63
- post "/subjects/:subject" do
64
- schema = parse_schema
65
-
66
- # Note: this does not actually handle the same schema registered under
67
- # multiple subjects
68
- schema_id = SCHEMAS.index(schema)
69
-
70
- halt(404, SCHEMA_NOT_FOUND) unless schema_id
71
-
72
- {
73
- subject: params[:subject],
74
- id: schema_id,
75
- version: SUBJECTS[params[:subject]].index(schema_id) + 1,
76
- schema: schema
77
- }.to_json
78
- end
79
-
80
- def self.clear
81
- SUBJECTS.clear
82
- SCHEMAS.clear
83
- end
84
- end
6
+ FakeSchemaRegistryServer = FakeConfluentSchemaRegistryServer
@@ -1,3 +1,3 @@
1
1
  class AvroTurf
2
- VERSION = "0.7.1"
2
+ VERSION = "0.10.0"
3
3
  end
@@ -0,0 +1,63 @@
1
+ require 'webmock/rspec'
2
+ require 'avro_turf/cached_confluent_schema_registry'
3
+ require 'avro_turf/test/fake_confluent_schema_registry_server'
4
+
5
+ describe AvroTurf::CachedConfluentSchemaRegistry do
6
+ let(:upstream) { instance_double(AvroTurf::ConfluentSchemaRegistry) }
7
+ let(:registry) { described_class.new(upstream) }
8
+ let(:id) { rand(999) }
9
+ let(:schema) do
10
+ {
11
+ type: "record",
12
+ name: "person",
13
+ fields: [{ name: "name", type: "string" }]
14
+ }.to_json
15
+ end
16
+
17
+ describe "#fetch" do
18
+ it "caches the result of fetch" do
19
+ # multiple calls return same result, with only one upstream call
20
+ allow(upstream).to receive(:fetch).with(id).and_return(schema)
21
+ expect(registry.fetch(id)).to eq(schema)
22
+ expect(registry.fetch(id)).to eq(schema)
23
+ expect(upstream).to have_received(:fetch).exactly(1).times
24
+ end
25
+ end
26
+
27
+ describe "#register" do
28
+ let(:subject_name) { "a_subject" }
29
+
30
+ it "caches the result of register" do
31
+ # multiple calls return same result, with only one upstream call
32
+ allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
33
+ expect(registry.register(subject_name, schema)).to eq(id)
34
+ expect(registry.register(subject_name, schema)).to eq(id)
35
+ expect(upstream).to have_received(:register).exactly(1).times
36
+ end
37
+ end
38
+
39
+ describe '#subject_version' do
40
+ let(:subject_name) { 'a_subject' }
41
+ let(:version) { 1 }
42
+ let(:schema_with_meta) do
43
+ {
44
+ subject: subject_name,
45
+ id: 1,
46
+ version: 1,
47
+ schema: schema
48
+ }
49
+ end
50
+
51
+ it 'caches the result of subject_version' do
52
+ allow(upstream).to receive(:subject_version).with(subject_name, version).and_return(schema_with_meta)
53
+ registry.subject_version(subject_name, version)
54
+ registry.subject_version(subject_name, version)
55
+ expect(upstream).to have_received(:subject_version).exactly(1).times
56
+ end
57
+ end
58
+
59
+ it_behaves_like "a confluent schema registry client" do
60
+ let(:upstream) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
61
+ let(:registry) { described_class.new(upstream) }
62
+ end
63
+ end
@@ -0,0 +1,9 @@
1
+ require 'webmock/rspec'
2
+ require 'avro_turf/confluent_schema_registry'
3
+ require 'avro_turf/test/fake_confluent_schema_registry_server'
4
+
5
+ describe AvroTurf::ConfluentSchemaRegistry do
6
+ it_behaves_like "a confluent schema registry client" do
7
+ let(:registry) { described_class.new(registry_url, logger: logger) }
8
+ end
9
+ end
@@ -0,0 +1,159 @@
1
+ require 'webmock/rspec'
2
+ require 'avro_turf/cached_confluent_schema_registry'
3
+ require 'avro_turf/test/fake_confluent_schema_registry_server'
4
+
5
+ describe AvroTurf::CachedConfluentSchemaRegistry do
6
+ let(:upstream) { instance_double(AvroTurf::ConfluentSchemaRegistry) }
7
+ let(:cache) { AvroTurf::DiskCache.new("spec/cache")}
8
+ let(:registry) { described_class.new(upstream, cache: cache) }
9
+ let(:id) { rand(999) }
10
+ let(:schema) do
11
+ {
12
+ type: "record",
13
+ name: "person",
14
+ fields: [{ name: "name", type: "string" }]
15
+ }.to_json
16
+ end
17
+
18
+ let(:city_id) { rand(999) }
19
+ let(:city_schema) do
20
+ {
21
+ type: "record",
22
+ name: "city",
23
+ fields: [{ name: "name", type: "string" }]
24
+ }.to_json
25
+ end
26
+
27
+ let(:subject) { 'subject' }
28
+ let(:version) { rand(999) }
29
+ let(:subject_version_schema) do
30
+ {
31
+ subject: subject,
32
+ version: version,
33
+ id: id,
34
+ schema: {
35
+ type: "record",
36
+ name: "city",
37
+ fields: { name: "name", type: "string" }
38
+ }
39
+ }.to_json
40
+ end
41
+
42
+ before do
43
+ FileUtils.mkdir_p("spec/cache")
44
+ end
45
+
46
+ describe "#fetch" do
47
+ let(:cache_before) do
48
+ {
49
+ "#{id}" => "#{schema}"
50
+ }
51
+ end
52
+ let(:cache_after) do
53
+ {
54
+ "#{id}" => "#{schema}",
55
+ "#{city_id}" => "#{city_schema}"
56
+ }
57
+ end
58
+
59
+ # setup the disk cache to avoid performing the upstream fetch
60
+ before do
61
+ store_cache("schemas_by_id.json", cache_before)
62
+ end
63
+
64
+ it "uses preloaded disk cache" do
65
+ # multiple calls return same result, with zero upstream calls
66
+ allow(upstream).to receive(:fetch).with(id).and_return(schema)
67
+ expect(registry.fetch(id)).to eq(schema)
68
+ expect(registry.fetch(id)).to eq(schema)
69
+ expect(upstream).to have_received(:fetch).exactly(0).times
70
+ expect(load_cache("schemas_by_id.json")).to eq cache_before
71
+ end
72
+
73
+ it "writes thru to disk cache" do
74
+ # multiple calls return same result, with only one upstream call
75
+ allow(upstream).to receive(:fetch).with(city_id).and_return(city_schema)
76
+ expect(registry.fetch(city_id)).to eq(city_schema)
77
+ expect(registry.fetch(city_id)).to eq(city_schema)
78
+ expect(upstream).to have_received(:fetch).exactly(1).times
79
+ expect(load_cache("schemas_by_id.json")).to eq cache_after
80
+ end
81
+ end
82
+
83
+ describe "#register" do
84
+ let(:subject_name) { "a_subject" }
85
+ let(:cache_before) do
86
+ {
87
+ "#{subject_name}#{schema}" => id
88
+ }
89
+ end
90
+
91
+ let(:city_name) { "a_city" }
92
+ let(:cache_after) do
93
+ {
94
+ "#{subject_name}#{schema}" => id,
95
+ "#{city_name}#{city_schema}" => city_id
96
+ }
97
+ end
98
+
99
+ # setup the disk cache to avoid performing the upstream register
100
+ before do
101
+ store_cache("ids_by_schema.json", cache_before)
102
+ end
103
+
104
+ it "uses preloaded disk cache" do
105
+ # multiple calls return same result, with zero upstream calls
106
+ allow(upstream).to receive(:register).with(subject_name, schema).and_return(id)
107
+ expect(registry.register(subject_name, schema)).to eq(id)
108
+ expect(registry.register(subject_name, schema)).to eq(id)
109
+ expect(upstream).to have_received(:register).exactly(0).times
110
+ expect(load_cache("ids_by_schema.json")).to eq cache_before
111
+ end
112
+
113
+ it "writes thru to disk cache" do
114
+ # multiple calls return same result, with only one upstream call
115
+ allow(upstream).to receive(:register).with(city_name, city_schema).and_return(city_id)
116
+ expect(registry.register(city_name, city_schema)).to eq(city_id)
117
+ expect(registry.register(city_name, city_schema)).to eq(city_id)
118
+ expect(upstream).to have_received(:register).exactly(1).times
119
+ expect(load_cache("ids_by_schema.json")).to eq cache_after
120
+ end
121
+ end
122
+
123
+ describe "#subject_version" do
124
+ it "writes thru to disk cache" do
125
+ # multiple calls return same result, with zero upstream calls
126
+ allow(upstream).to receive(:subject_version).with(subject, version).and_return(subject_version_schema)
127
+ expect(File).not_to exist("./spec/cache/schemas_by_subject_version.json")
128
+
129
+ expect(registry.subject_version(subject, version)).to eq(subject_version_schema)
130
+
131
+ json = JSON.parse(File.read("./spec/cache/schemas_by_subject_version.json"))["#{subject}#{version}"]
132
+ expect(json).to eq(subject_version_schema)
133
+
134
+ expect(registry.subject_version(subject, version)).to eq(subject_version_schema)
135
+ expect(upstream).to have_received(:subject_version).exactly(1).times
136
+ end
137
+
138
+ it "reads from disk cache and populates mem cache" do
139
+ allow(upstream).to receive(:subject_version).with(subject, version).and_return(subject_version_schema)
140
+ key = "#{subject}#{version}"
141
+ hash = {key => subject_version_schema}
142
+ cache.send(:write_to_disk_cache, "./spec/cache/schemas_by_subject_version.json", hash)
143
+
144
+ cached_schema = cache.instance_variable_get(:@schemas_by_subject_version)
145
+ expect(cached_schema).to eq({})
146
+
147
+ expect(registry.subject_version(subject, version)).to eq(subject_version_schema)
148
+ expect(upstream).to have_received(:subject_version).exactly(0).times
149
+
150
+ cached_schema = cache.instance_variable_get(:@schemas_by_subject_version)
151
+ expect(cached_schema).to eq({key => subject_version_schema})
152
+ end
153
+ end
154
+
155
+ it_behaves_like "a confluent schema registry client" do
156
+ let(:upstream) { AvroTurf::ConfluentSchemaRegistry.new(registry_url, logger: logger) }
157
+ let(:registry) { described_class.new(upstream) }
158
+ end
159
+ end