avro_turf 0.7.1 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +36 -0
  3. data/.github/workflows/ruby.yml +20 -0
  4. data/CHANGELOG.md +29 -0
  5. data/Gemfile +0 -3
  6. data/README.md +54 -16
  7. data/avro_turf.gemspec +13 -2
  8. data/lib/avro_turf.rb +14 -3
  9. data/lib/avro_turf/cached_confluent_schema_registry.rb +39 -0
  10. data/lib/avro_turf/cached_schema_registry.rb +4 -24
  11. data/lib/avro_turf/confluent_schema_registry.rb +106 -0
  12. data/lib/avro_turf/disk_cache.rb +83 -0
  13. data/lib/avro_turf/in_memory_cache.rb +38 -0
  14. data/lib/avro_turf/messaging.rb +77 -9
  15. data/lib/avro_turf/mutable_schema_store.rb +18 -0
  16. data/lib/avro_turf/schema_registry.rb +4 -77
  17. data/lib/avro_turf/schema_store.rb +36 -19
  18. data/lib/avro_turf/schema_to_avro_patch.rb +11 -0
  19. data/lib/avro_turf/test/fake_confluent_schema_registry_server.rb +141 -0
  20. data/lib/avro_turf/test/fake_schema_registry_server.rb +4 -82
  21. data/lib/avro_turf/version.rb +1 -1
  22. data/spec/cached_confluent_schema_registry_spec.rb +63 -0
  23. data/spec/confluent_schema_registry_spec.rb +9 -0
  24. data/spec/disk_cached_confluent_schema_registry_spec.rb +159 -0
  25. data/spec/messaging_spec.rb +208 -19
  26. data/spec/schema_store_spec.rb +36 -0
  27. data/spec/schema_to_avro_patch_spec.rb +42 -0
  28. data/spec/spec_helper.rb +8 -0
  29. data/spec/support/{schema_registry_context.rb → confluent_schema_registry_context.rb} +72 -8
  30. data/spec/test/fake_confluent_schema_registry_server_spec.rb +40 -0
  31. metadata +49 -16
  32. data/circle.yml +0 -4
  33. data/spec/cached_schema_registry_spec.rb +0 -41
  34. data/spec/schema_registry_spec.rb +0 -9
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 4973e043a05b5317719217586f8cee3b9f913d1a
4
- data.tar.gz: fb36f804b15c0bed61161d7b73242b7cb0a4cd83
2
+ SHA256:
3
+ metadata.gz: 622818bec3f2c9af6a801fe428b373c96ad38596bef90e6f24d86c855084a1a9
4
+ data.tar.gz: 386053febb649eb457fd0fca3685556e074bee28994be511944da5d7b75b594b
5
5
  SHA512:
6
- metadata.gz: 234fa5e60fa5d41ead791b6f48c0db08b2ae02a53848b0fbd31dcf13ee07610800c8407a760e86b9329da3f0b0a2d96ea6c44768f7db9438e18aebe72a3f7ab6
7
- data.tar.gz: deaa4c2afa94cabe4d50070d17c3cd455623461d1fb91397e2ed0a2ff473df9892e409836efe7e4bc13d55658de6b340955c1342a8251bee1a52aef2b8d875cb
6
+ metadata.gz: 35663cd1404da25e60c03bdfcca2dc20fd1af8fb5eb046bad57fb079f4e50901120e15ae3e6f9d275b068202fd113613b49b9e9237ca2705de3f1fd0e34c304a
7
+ data.tar.gz: a37f52541946c02c4858c73e974f78e3e88702fe31292807dd46760e9e2a4dabad8287b136df0ff95ba6912cbb15760881e9574f2837a3124242a00e9d6fac75
@@ -0,0 +1,36 @@
1
+ version: 2
2
+ jobs:
3
+ build:
4
+ environment:
5
+ CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
6
+ CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
7
+ docker:
8
+ - image: circleci/ruby:2.6.2
9
+ steps:
10
+ - checkout
11
+ - run: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
12
+ - restore_cache:
13
+ keys:
14
+ # This branch if available
15
+ - v1-dep-{{ .Branch }}-
16
+ # Default branch if not
17
+ - v1-dep-master-
18
+ # Any branch if there are none on the default branch - this should be unnecessary if you have your default branch configured correctly
19
+ - v1-dep-
20
+ - run: gem install bundler --no-document
21
+ - run: 'bundle check --path=vendor/bundle || bundle install --path=vendor/bundle --jobs=4 --retry=3'
22
+ # Save dependency cache
23
+ - save_cache:
24
+ key: v1-dep-{{ .Branch }}-{{ epoch }}
25
+ paths:
26
+ - vendor/bundle
27
+ - ~/.bundle
28
+ - run: mkdir -p $CIRCLE_TEST_REPORTS/rspec
29
+ - run:
30
+ command: bundle exec rspec --color --require spec_helper --format progress
31
+ - store_test_results:
32
+ path: /tmp/circleci-test-results
33
+ - store_artifacts:
34
+ path: /tmp/circleci-artifacts
35
+ - store_artifacts:
36
+ path: /tmp/circleci-test-results
@@ -0,0 +1,20 @@
1
+ name: Ruby
2
+
3
+ on: [push]
4
+
5
+ jobs:
6
+ build:
7
+
8
+ runs-on: ubuntu-latest
9
+
10
+ steps:
11
+ - uses: actions/checkout@v1
12
+ - name: Set up Ruby 2.6
13
+ uses: actions/setup-ruby@v1
14
+ with:
15
+ ruby-version: 2.6.x
16
+ - name: Build and test with RSpec
17
+ run: |
18
+ gem install bundler
19
+ bundle install --jobs 4 --retry 3
20
+ bundle exec rspec
@@ -0,0 +1,29 @@
1
+ # avro_turf
2
+
3
+ ## Unreleased
4
+
5
+ ## v0.10.0
6
+
7
+ - Add more disk caching (#103)
8
+ - Include schema information when decoding (#100, #101, #104)
9
+
10
+ ## v0.9.0
11
+
12
+ - Compatibility with Avro v1.9.0 (#94)
13
+ - Disable the auto registeration of schema (#95)
14
+ - abstracted caching from CachedConfluentSchemaRegistry (#74)
15
+ - Load avro-patches if installed to silence deprecation errors (#85)
16
+ - Make schema store to be thread safe (#92)
17
+
18
+ ## v0.8.1
19
+
20
+ - Allow accessing schema store from outside AvroTurf (#68).
21
+
22
+ ## v0.8.0
23
+
24
+ - The names `AvroTurf::SchemaRegistry`, `AvroTurf::CachedSchemaRegistry`, and
25
+ `FakeSchemaRegistryServer` are deprecated and will be removed in a future release.
26
+ Use `AvroTurf::ConfluentSchemaRegistry`, `AvroTurf::CachedConfluentSchemaRegistry`,
27
+ and `FakeConfluentSchemaRegistryServer` instead.
28
+ - Add support for the Config API (http://docs.confluent.io/3.1.2/schema-registry/docs/api.html#config)
29
+ to `AvroTurf::ConfluentSchemaRegistry`.
data/Gemfile CHANGED
@@ -2,6 +2,3 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in avro_turf.gemspec
4
4
  gemspec
5
-
6
- # Used by CircleCI to format RSpec results.
7
- gem 'rspec_junit_formatter', :git => 'git@github.com:circleci/rspec_junit_formatter.git'
data/README.md CHANGED
@@ -5,6 +5,17 @@ AvroTurf is a library that makes it easier to encode and decode data using the [
5
5
  * Provides an idiomatic Ruby interface.
6
6
  * Allows referencing schemas defined in another file.
7
7
 
8
+ ## Deprecation Notice
9
+
10
+ The `AvroTurf::SchemaRegistry`, `AvroTurf::CachedSchemaRegistry`,
11
+ and `FakeSchemaRegistryServer` names have been deprecated because the Avro spec recently
12
+ introduced an incompatible [single-message encoding format](https://github.com/apache/avro/commit/30408a9c192c5f4eaaf42f01f0ffbfffd705aa57).
13
+
14
+ These classes have been renamed to `AvroTurf::ConfluentSchemaRegistry`,
15
+ `AvroTurf::CachedConfluentSchemaRegistry`, and `FakeConfluentSchemaRegistry`.
16
+
17
+ The aliases for the original names will be removed in a future release.
18
+
8
19
  ## Installation
9
20
 
10
21
  Add this line to your application's Gemfile:
@@ -80,11 +91,8 @@ In the example above, the `person` schema references the `address` schema, even
80
91
  ```json
81
92
  // person_list.avsc
82
93
  {
83
- "name": "person_list",
84
- "type": {
85
- "type": "array",
86
- "items": "person"
87
- }
94
+ "type": "array",
95
+ "items": "person"
88
96
  }
89
97
  ```
90
98
 
@@ -99,7 +107,7 @@ By default, AvroTurf will encode data in the Avro data file format. This means t
99
107
 
100
108
  The Messaging API will automatically register schemas used for encoding data, and will fetch the corresponding schema when decoding. Instead of including the full schema in the output, only a schema id generated by the registry is included. Registering the same schema twice is idempotent, so no coordination is needed.
101
109
 
102
- **NOTE:** The Messaging format is _not_ compatible with the Avro data file API.
110
+ **NOTE:** [The Messaging format](https://github.com/confluentinc/schema-registry/blob/master/docs/serializer-formatter.rst#wire-format) is _not_ compatible with the Avro data file API.
103
111
 
104
112
  The Messaging API is not included by default, so you must require 'avro_turf/messaging' explicitly if you want to use it.
105
113
 
@@ -116,16 +124,39 @@ avro = AvroTurf::Messaging.new(registry_url: "http://my-registry:8081/")
116
124
  # time a schema is used.
117
125
  data = avro.encode({ "title" => "hello, world" }, schema_name: "greeting")
118
126
 
127
+ # If you don't want to automatically register new schemas, you can pass explicitly
128
+ # subject and version to specify which schema should be used for encoding.
129
+ # It will fetch that schema from the registry and cache it. Subsequent instances
130
+ # of the same schema version will be served by the cache.
131
+ data = avro.encode({ "title" => "hello, world" }, subject: 'greeting', version: 1)
132
+
133
+ # You can also pass explicitly schema_id to specify which schema
134
+ # should be used for encoding.
135
+ # It will fetch that schema from the registry and cache it. Subsequent instances
136
+ # of the same schema version will be served by the cache.
137
+ data = avro.encode({ "title" => "hello, world" }, schema_id: 2)
138
+
119
139
  # When decoding, the schema will be fetched from the registry and cached. Subsequent
120
140
  # instances of the same schema id will be served by the cache.
121
141
  avro.decode(data) #=> { "title" => "hello, world" }
142
+
143
+ # If you want to get decoded message as well as the schema used to encode the message,
144
+ # you can use `#decode_message` method.
145
+ result = avro.decode_message(data)
146
+ result.message #=> { "title" => "hello, world" }
147
+ result.schema_id #=> 3
148
+ result.writer_schema #=> #<Avro::Schema: ...>
149
+ result.reader_schema #=> nil
122
150
  ```
123
151
 
124
- In addition to encoding and decoding data, you can check whether a schema is compatible
125
- with a subject in the registry using the [Compatibility API](http://docs.confluent.io/2.0.0/schema-registry/docs/api.html#compatibility)
152
+ ### Confluent Schema Registry Client
153
+
154
+ The ConfluentSchemaRegistry client used by the Messaging API can also be used directly.
155
+ It can check whether a schema is compatible with a subject in the registry using the [Compatibility API](http://docs.confluent.io/3.1.2/schema-registry/docs/api.html#compatibility):
126
156
 
127
157
  ```ruby
128
- require 'avro_turf/messaging'
158
+ require 'avro_turf'
159
+ require 'avro_turf/confluent_schema_registry'
129
160
 
130
161
  schema = <<-JSON
131
162
  {
@@ -144,15 +175,22 @@ schema = <<-JSON
144
175
  }
145
176
  JSON
146
177
 
147
- avro = AvroTurf::Messaging.new(registry_url: "http://my-registry:8081/")
178
+ registry = AvroTurf::ConfluentSchemaRegistry.new("http://my-registry:8081/")
148
179
 
149
- # Returns true if the schema is compatible, false otherwise.
150
- avro.compatible?("person", schema)
180
+ # Returns true if the schema is compatible, nil if the subject or version is not registered, and false if incompatible.
181
+ registry.compatible?("person", schema)
182
+ ```
183
+
184
+ The ConfluentSchemaRegistry client can also change the global compatibility level or the compatibility level for an individual subject using the [Config API](http://docs.confluent.io/3.1.2/schema-registry/docs/api.html#config):
185
+
186
+ ```ruby
187
+ registry.update_global_config(compatibility: 'FULL')
188
+ registry.update_subject_config("person", compatibility: 'NONE')
151
189
  ```
152
190
 
153
191
  ### Testing Support
154
192
 
155
- AvroTurf includes a `FakeSchemaRegistryServer` that can be used in tests. The
193
+ AvroTurf includes a `FakeConfluentSchemaRegistryServer` that can be used in tests. The
156
194
  fake schema registry server depends on Sinatra but it is _not_ listed as a runtime
157
195
  dependency for AvroTurf. Sinatra must be added to your Gemfile or gemspec in order
158
196
  to use the fake server.
@@ -160,14 +198,14 @@ to use the fake server.
160
198
  Example using RSpec:
161
199
 
162
200
  ```ruby
163
- require 'avro_turf/test/fake_schema_registry_server'
201
+ require 'avro_turf/test/fake_confluent_schema_registry_server'
164
202
  require 'webmock/rspec'
165
203
 
166
204
  # within an example
167
205
  let(:registry_url) { "http://registry.example.com" }
168
206
  before do
169
- stub_request(:any, /^#{registry_url}/).to_rack(FakeSchemaRegistryServer)
170
- FakeSchemaRegistryServer.clear
207
+ stub_request(:any, /^#{registry_url}/).to_rack(FakeConfluentSchemaRegistryServer)
208
+ FakeConfluentSchemaRegistryServer.clear
171
209
  end
172
210
 
173
211
  # Messaging objects created with the same registry_url will now use the fake server.
@@ -17,14 +17,25 @@ Gem::Specification.new do |spec|
17
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
18
  spec.require_paths = ["lib"]
19
19
 
20
- spec.add_dependency "avro", ">= 1.7.7", "< 1.9"
20
+ spec.add_dependency "avro", ">= 1.7.7", "< 1.10"
21
21
  spec.add_dependency "excon", "~> 0.45"
22
22
 
23
- spec.add_development_dependency "bundler", "~> 1.7"
23
+ spec.add_development_dependency "bundler", "~> 2.0"
24
24
  spec.add_development_dependency "rake", "~> 10.0"
25
25
  spec.add_development_dependency "rspec", "~> 3.2.0"
26
26
  spec.add_development_dependency "fakefs", "~> 0.6.7"
27
27
  spec.add_development_dependency "webmock"
28
28
  spec.add_development_dependency "sinatra"
29
29
  spec.add_development_dependency "json_spec"
30
+ spec.add_development_dependency "rack-test"
31
+
32
+ spec.post_install_message = %{
33
+ avro_turf v0.8.0 deprecates the names AvroTurf::SchemaRegistry,
34
+ AvroTurf::CachedSchemaRegistry, and FakeSchemaRegistryServer.
35
+
36
+ Use AvroTurf::ConfluentSchemaRegistry, AvroTurf::CachedConfluentSchemaRegistry,
37
+ and FakeConfluentSchemaRegistryServer instead.
38
+
39
+ See https://github.com/dasch/avro_turf#deprecation-notice
40
+ }
30
41
  end
@@ -1,9 +1,18 @@
1
+ begin
2
+ require 'avro-patches'
3
+ rescue LoadError
4
+ false
5
+ end
1
6
  require 'avro_turf/version'
2
7
  require 'avro'
3
8
  require 'json'
4
9
  require 'avro_turf/schema_store'
5
10
  require 'avro_turf/core_ext'
6
- require 'avro_turf/schema_to_avro_patch'
11
+
12
+ # check for something that indicates Avro v1.9.0 or later
13
+ unless defined?(::Avro::LogicalTypes)
14
+ require 'avro_turf/schema_to_avro_patch'
15
+ end
7
16
 
8
17
  class AvroTurf
9
18
  class Error < StandardError; end
@@ -15,13 +24,15 @@ class AvroTurf
15
24
  # Create a new AvroTurf instance with the specified configuration.
16
25
  #
17
26
  # schemas_path - The String path to the root directory containing Avro schemas (default: "./schemas").
27
+ # schema_store - A schema store object that responds to #find(schema_name, namespace).
18
28
  # namespace - The String namespace that should be used to qualify schema names (optional).
19
29
  # codec - The String name of a codec that should be used to compress messages (optional).
20
30
  #
21
31
  # Currently, the only valid codec name is `deflate`.
22
- def initialize(schemas_path: nil, namespace: nil, codec: nil)
32
+ def initialize(schemas_path: nil, schema_store: nil, namespace: nil, codec: nil)
23
33
  @namespace = namespace
24
- @schema_store = SchemaStore.new(path: schemas_path || DEFAULT_SCHEMAS_PATH)
34
+ @schema_store = schema_store ||
35
+ SchemaStore.new(path: schemas_path || DEFAULT_SCHEMAS_PATH)
25
36
  @codec = codec
26
37
  end
27
38
 
@@ -0,0 +1,39 @@
1
+ require 'avro_turf/confluent_schema_registry'
2
+ require 'avro_turf/in_memory_cache'
3
+ require 'avro_turf/disk_cache'
4
+
5
+ # Caches registrations and lookups to the schema registry in memory.
6
+ class AvroTurf::CachedConfluentSchemaRegistry
7
+
8
+ # Instantiate a new CachedConfluentSchemaRegistry instance with the given configuration.
9
+ # By default, uses a provided InMemoryCache to prevent repeated calls to the upstream registry.
10
+ #
11
+ # upstream - The upstream schema registry object that fully responds to all methods in the
12
+ # AvroTurf::ConfluentSchemaRegistry interface.
13
+ # cache - Optional user provided Cache object that responds to all methods in the AvroTurf::InMemoryCache interface.
14
+ def initialize(upstream, cache: nil)
15
+ @upstream = upstream
16
+ @cache = cache || AvroTurf::InMemoryCache.new()
17
+ end
18
+
19
+ # Delegate the following methods to the upstream
20
+ %i(subjects subject_versions check compatible?
21
+ global_config update_global_config subject_config update_subject_config).each do |name|
22
+ define_method(name) do |*args|
23
+ instance_variable_get(:@upstream).send(name, *args)
24
+ end
25
+ end
26
+
27
+ def fetch(id)
28
+ @cache.lookup_by_id(id) || @cache.store_by_id(id, @upstream.fetch(id))
29
+ end
30
+
31
+ def register(subject, schema)
32
+ @cache.lookup_by_schema(subject, schema) || @cache.store_by_schema(subject, schema, @upstream.register(subject, schema))
33
+ end
34
+
35
+ def subject_version(subject, version = 'latest')
36
+ @cache.lookup_by_version(subject, version) ||
37
+ @cache.store_by_version(subject, version, @upstream.subject_version(subject, version))
38
+ end
39
+ end
@@ -1,26 +1,6 @@
1
- require 'avro_turf/schema_registry'
1
+ require 'avro_turf/cached_confluent_schema_registry'
2
2
 
3
- # Caches registrations and lookups to the schema registry in memory.
4
- class AvroTurf::CachedSchemaRegistry
3
+ # AvroTurf::CachedSchemaRegistry is deprecated and will be removed in a future release.
4
+ # Use AvroTurf::CachedConfluentSchemaRegistry instead.
5
5
 
6
- def initialize(upstream)
7
- @upstream = upstream
8
- @schemas_by_id = {}
9
- @ids_by_schema = {}
10
- end
11
-
12
- # Delegate the following methods to the upstream
13
- %i(subjects subject_versions subject_version check compatible?).each do |name|
14
- define_method(name) do |*args|
15
- instance_variable_get(:@upstream).send(name, *args)
16
- end
17
- end
18
-
19
- def fetch(id)
20
- @schemas_by_id[id] ||= @upstream.fetch(id)
21
- end
22
-
23
- def register(subject, schema)
24
- @ids_by_schema[subject + schema.to_s] ||= @upstream.register(subject, schema)
25
- end
26
- end
6
+ AvroTurf::CachedSchemaRegistry = AvroTurf::CachedConfluentSchemaRegistry
@@ -0,0 +1,106 @@
1
+ require 'excon'
2
+
3
+ class AvroTurf::ConfluentSchemaRegistry
4
+ CONTENT_TYPE = "application/vnd.schemaregistry.v1+json".freeze
5
+
6
+ def initialize(url, logger: Logger.new($stdout))
7
+ @logger = logger
8
+ @connection = Excon.new(url, headers: {
9
+ "Content-Type" => CONTENT_TYPE,
10
+ })
11
+ end
12
+
13
+ def fetch(id)
14
+ @logger.info "Fetching schema with id #{id}"
15
+ data = get("/schemas/ids/#{id}")
16
+ data.fetch("schema")
17
+ end
18
+
19
+ def register(subject, schema)
20
+ data = post("/subjects/#{subject}/versions", body: {
21
+ schema: schema.to_s
22
+ }.to_json)
23
+
24
+ id = data.fetch("id")
25
+
26
+ @logger.info "Registered schema for subject `#{subject}`; id = #{id}"
27
+
28
+ id
29
+ end
30
+
31
+ # List all subjects
32
+ def subjects
33
+ get('/subjects')
34
+ end
35
+
36
+ # List all versions for a subject
37
+ def subject_versions(subject)
38
+ get("/subjects/#{subject}/versions")
39
+ end
40
+
41
+ # Get a specific version for a subject
42
+ def subject_version(subject, version = 'latest')
43
+ get("/subjects/#{subject}/versions/#{version}")
44
+ end
45
+
46
+ # Check if a schema exists. Returns nil if not found.
47
+ def check(subject, schema)
48
+ data = post("/subjects/#{subject}",
49
+ expects: [200, 404],
50
+ body: { schema: schema.to_s }.to_json)
51
+ data unless data.has_key?("error_code")
52
+ end
53
+
54
+ # Check if a schema is compatible with the stored version.
55
+ # Returns:
56
+ # - true if compatible
57
+ # - nil if the subject or version does not exist
58
+ # - false if incompatible
59
+ # http://docs.confluent.io/3.1.2/schema-registry/docs/api.html#compatibility
60
+ def compatible?(subject, schema, version = 'latest')
61
+ data = post("/compatibility/subjects/#{subject}/versions/#{version}",
62
+ expects: [200, 404],
63
+ body: { schema: schema.to_s }.to_json)
64
+ data.fetch('is_compatible', false) unless data.has_key?('error_code')
65
+ end
66
+
67
+ # Get global config
68
+ def global_config
69
+ get("/config")
70
+ end
71
+
72
+ # Update global config
73
+ def update_global_config(config)
74
+ put("/config", { body: config.to_json })
75
+ end
76
+
77
+ # Get config for subject
78
+ def subject_config(subject)
79
+ get("/config/#{subject}")
80
+ end
81
+
82
+ # Update config for subject
83
+ def update_subject_config(subject, config)
84
+ put("/config/#{subject}", { body: config.to_json })
85
+ end
86
+
87
+ private
88
+
89
+ def get(path, **options)
90
+ request(path, method: :get, **options)
91
+ end
92
+
93
+ def put(path, **options)
94
+ request(path, method: :put, **options)
95
+ end
96
+
97
+ def post(path, **options)
98
+ request(path, method: :post, **options)
99
+ end
100
+
101
+ def request(path, **options)
102
+ options = { expects: 200 }.merge!(options)
103
+ response = @connection.request(path: path, **options)
104
+ JSON.parse(response.body)
105
+ end
106
+ end