avro_turf_enchanced 0.7.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.rspec +2 -0
  4. data/Gemfile +7 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +174 -0
  7. data/Rakefile +2 -0
  8. data/avro_turf.gemspec +30 -0
  9. data/circle.yml +4 -0
  10. data/lib/avro_turf.rb +105 -0
  11. data/lib/avro_turf/cached_schema_registry.rb +26 -0
  12. data/lib/avro_turf/core_ext.rb +10 -0
  13. data/lib/avro_turf/core_ext/date.rb +5 -0
  14. data/lib/avro_turf/core_ext/enumerable.rb +5 -0
  15. data/lib/avro_turf/core_ext/false_class.rb +5 -0
  16. data/lib/avro_turf/core_ext/hash.rb +7 -0
  17. data/lib/avro_turf/core_ext/nil_class.rb +5 -0
  18. data/lib/avro_turf/core_ext/numeric.rb +5 -0
  19. data/lib/avro_turf/core_ext/string.rb +5 -0
  20. data/lib/avro_turf/core_ext/symbol.rb +5 -0
  21. data/lib/avro_turf/core_ext/time.rb +5 -0
  22. data/lib/avro_turf/core_ext/true_class.rb +5 -0
  23. data/lib/avro_turf/messaging.rb +102 -0
  24. data/lib/avro_turf/schema_registry.rb +79 -0
  25. data/lib/avro_turf/schema_store.rb +58 -0
  26. data/lib/avro_turf/schema_to_avro_patch.rb +52 -0
  27. data/lib/avro_turf/test/fake_schema_registry_server.rb +84 -0
  28. data/lib/avro_turf/version.rb +3 -0
  29. data/perf/address.avsc +14 -0
  30. data/perf/encoding_size.rb +26 -0
  31. data/perf/encoding_speed.rb +30 -0
  32. data/perf/person.avsc +14 -0
  33. data/spec/avro_turf_spec.rb +161 -0
  34. data/spec/cached_schema_registry_spec.rb +41 -0
  35. data/spec/core_ext/date_spec.rb +6 -0
  36. data/spec/core_ext/enumerable_spec.rb +12 -0
  37. data/spec/core_ext/false_class_spec.rb +5 -0
  38. data/spec/core_ext/hash_spec.rb +8 -0
  39. data/spec/core_ext/nil_class_spec.rb +5 -0
  40. data/spec/core_ext/numeric_spec.rb +6 -0
  41. data/spec/core_ext/string_spec.rb +5 -0
  42. data/spec/core_ext/symbol_spec.rb +5 -0
  43. data/spec/core_ext/time_spec.rb +6 -0
  44. data/spec/core_ext/true_class_spec.rb +5 -0
  45. data/spec/messaging_spec.rb +112 -0
  46. data/spec/schema_registry_spec.rb +9 -0
  47. data/spec/schema_store_spec.rb +253 -0
  48. data/spec/schema_to_avro_patch_spec.rb +66 -0
  49. data/spec/spec_helper.rb +20 -0
  50. data/spec/support/schema_registry_context.rb +190 -0
  51. metadata +244 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 14a7499b6aebb21573dbee95446815f677762fc5
4
+ data.tar.gz: b387dede993698644fa5a1f9b2da31a40621574b
5
+ SHA512:
6
+ metadata.gz: 93d80249a49cb7d4213664946e383d39fe57764c8cc069a210b835be760f7fb3c540f8a0bcda10e025ac2cf9ca77f1e95d2457b5060c4199aed616938647ac62
7
+ data.tar.gz: 1f6d8d33c4728f3a54fd323f134431ef8fa693dacdb7a5ebb8e50b4ccd86837a73ee03d813318bbae9436f19f3caa9092f043602a1d16d2d8b55073bdef4a871
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in avro_turf.gemspec
4
+ gemspec
5
+
6
+ # Used by CircleCI to format RSpec results.
7
+ gem 'rspec_junit_formatter', :git => 'git@github.com:circleci/rspec_junit_formatter.git'
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Daniel Schierbeck
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,174 @@
1
+ # AvroTurf
2
+
3
+ AvroTurf is a library that makes it easier to encode and decode data using the [Apache Avro](http://avro.apache.org/) serialization format. It adds a layer on top of the official Avro gem which makes it easier to integrate Avro into your application:
4
+
5
+ * Provides an idiomatic Ruby interface.
6
+ * Allows referencing schemas defined in another file.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ ```ruby
13
+ gem 'avro_turf'
14
+ ```
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install avro_turf
23
+
24
+ ## Usage
25
+
26
+ Using AvroTurf is quite simple:
27
+
28
+ ```ruby
29
+ # Schemas will be looked up from the specified directory.
30
+ avro = AvroTurf.new(schemas_path: "app/schemas/")
31
+
32
+ # Decode some data using a named schema. The schema file should exist in the
33
+ # schemas directory with the file name `<name>.avsc`.
34
+ avro.decode(encoded_data, schema_name: "person")
35
+
36
+ # Encode some data using the named schema.
37
+ avro.encode({ "name" => "Jane", "age" => 28 }, schema_name: "person")
38
+ ```
39
+
40
+ ### Inter-schema references
41
+
42
+ Unlike the official Avro library, AvroTurf allows schemas to reference each other. As an example:
43
+
44
+ ```json
45
+ // person.avsc
46
+ {
47
+ "name": "person",
48
+ "type": "record",
49
+ "fields": [
50
+ {
51
+ "name": "full_name",
52
+ "type": "string"
53
+ },
54
+ {
55
+ "name": "address",
56
+ "type": "address"
57
+ }
58
+ ]
59
+ }
60
+
61
+ // address.avsc
62
+ {
63
+ "name": "address",
64
+ "type": "record",
65
+ "fields": [
66
+ {
67
+ "name": "street",
68
+ "type": "string"
69
+ },
70
+ {
71
+ "name": "city",
72
+ "type": "string"
73
+ }
74
+ ]
75
+ }
76
+ ```
77
+
78
+ In the example above, the `person` schema references the `address` schema, even though the latter is defined in another file. This makes it possible to share types across schemas, e.g.
79
+
80
+ ```json
81
+ // person_list.avsc
82
+ {
83
+ "name": "person_list",
84
+ "type": {
85
+ "type": "array",
86
+ "items": "person"
87
+ }
88
+ }
89
+ ```
90
+
91
+ There's no reason to copy-paste the `person` schema into the `person_list` schema, as you can reference it directly.
92
+
93
+ This feature helps avoid subtle errors when the same type is represented using slightly different schemas.
94
+
95
+
96
+ ### Using a Schema Registry
97
+
98
+ By default, AvroTurf will encode data in the Avro data file format. This means that the schema used to encode the data is prepended to the output. If you want to decrease the size of the output, e.g. when storing data in a log such as Apache Kafka or in a database, you can use the `AvroTurf::Messaging` API. This top-level API requires the use of [Schema Registry](https://github.com/confluentinc/schema-registry), a service which allows registering and fetching Avro schemas.
99
+
100
+ The Messaging API will automatically register schemas used for encoding data, and will fetch the corresponding schema when decoding. Instead of including the full schema in the output, only a schema id generated by the registry is included. Registering the same schema twice is idempotent, so no coordination is needed.
101
+
102
+ **NOTE:** [The Messaging format](https://github.com/confluentinc/schema-registry/blob/master/docs/serializer-formatter.rst#wire-format) is _not_ compatible with the Avro data file API.
103
+
104
+ The Messaging API is not included by default, so you must require 'avro_turf/messaging' explicitly if you want to use it.
105
+
106
+ Using the Messaging API is simple once you have set up a Schema Registry service:
107
+
108
+ ```ruby
109
+ require 'avro_turf/messaging'
110
+
111
+ # You need to pass the URL of your Schema Registry.
112
+ avro = AvroTurf::Messaging.new(registry_url: "http://my-registry:8081/")
113
+
114
+ # The API for encoding and decoding data is similar to the default one. Encoding
115
+ # data has the side effect of registering the schema. This only happens the first
116
+ # time a schema is used.
117
+ data = avro.encode({ "title" => "hello, world" }, schema_name: "greeting")
118
+
119
+ # When decoding, the schema will be fetched from the registry and cached. Subsequent
120
+ # instances of the same schema id will be served by the cache.
121
+ avro.decode(data) #=> { "title" => "hello, world" }
122
+ ```
123
+
124
+ In addition to encoding and decoding data, you can check whether a schema is compatible
125
+ with a subject in the registry using the [Compatibility API](http://docs.confluent.io/2.0.0/schema-registry/docs/api.html#compatibility)
126
+
127
+ ```ruby
128
+ require 'avro_turf/messaging'
129
+
130
+ schema = <<-JSON
131
+ {
132
+ "name": "person",
133
+ "type": "record",
134
+ "fields": [
135
+ {
136
+ "name": "full_name",
137
+ "type": "string"
138
+ },
139
+ {
140
+ "name": "address",
141
+ "type": "address"
142
+ }
143
+ ]
144
+ }
145
+ JSON
146
+
147
+ avro = AvroTurf::Messaging.new(registry_url: "http://my-registry:8081/")
148
+
149
+ # Returns true if the schema is compatible, false otherwise.
150
+ avro.compatible?("person", schema)
151
+ ```
152
+
153
+ ### Testing Support
154
+
155
+ AvroTurf includes a `FakeSchemaRegistryServer` that can be used in tests. The
156
+ fake schema registry server depends on Sinatra but it is _not_ listed as a runtime
157
+ dependency for AvroTurf. Sinatra must be added to your Gemfile or gemspec in order
158
+ to use the fake server.
159
+
160
+ Example using RSpec:
161
+
162
+ ```ruby
163
+ require 'avro_turf/test/fake_schema_registry_server'
164
+ require 'webmock/rspec'
165
+
166
+ # within an example
167
+ let(:registry_url) { "http://registry.example.com" }
168
+ before do
169
+ stub_request(:any, /^#{registry_url}/).to_rack(FakeSchemaRegistryServer)
170
+ FakeSchemaRegistryServer.clear
171
+ end
172
+
173
+ # Messaging objects created with the same registry_url will now use the fake server.
174
+ ```
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/avro_turf.gemspec ADDED
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'avro_turf/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "avro_turf_enchanced"
8
+ spec.version = AvroTurf::VERSION
9
+ spec.authors = ["Daniel Schierbeck"]
10
+ spec.email = ["dasch@zendesk.com"]
11
+ spec.summary = "A library that makes it easier to use the Avro serialization format from Ruby"
12
+ spec.homepage = "https://github.com/dasch/avro_turf"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_dependency "avro", ">= 1.7.7", "< 1.9"
21
+ spec.add_dependency "excon", "~> 0.45"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.7"
24
+ spec.add_development_dependency "rake", "~> 10.0"
25
+ spec.add_development_dependency "rspec", "~> 3.2.0"
26
+ spec.add_development_dependency "fakefs", "~> 0.6.7"
27
+ spec.add_development_dependency "webmock"
28
+ spec.add_development_dependency "sinatra"
29
+ spec.add_development_dependency "json_spec"
30
+ end
data/circle.yml ADDED
@@ -0,0 +1,4 @@
1
+ machine:
2
+ ruby:
3
+ version: 2.2.0
4
+ version: 2.0.0
data/lib/avro_turf.rb ADDED
@@ -0,0 +1,105 @@
1
+ require 'avro_turf/version'
2
+ require 'avro'
3
+ require 'json'
4
+ require 'avro_turf/schema_store'
5
+ require 'avro_turf/core_ext'
6
+ require 'avro_turf/schema_to_avro_patch'
7
+
8
+ class AvroTurf
9
+ class Error < StandardError; end
10
+ class SchemaError < Error; end
11
+ class SchemaNotFoundError < Error; end
12
+
13
+ DEFAULT_SCHEMAS_PATH = "./schemas"
14
+
15
+ # Create a new AvroTurf instance with the specified configuration.
16
+ #
17
+ # schemas_path - The String path to the root directory containing Avro schemas (default: "./schemas").
18
+ # namespace - The String namespace that should be used to qualify schema names (optional).
19
+ # codec - The String name of a codec that should be used to compress messages (optional).
20
+ #
21
+ # Currently, the only valid codec name is `deflate`.
22
+ def initialize(schemas_path: nil, namespace: nil, codec: nil)
23
+ @namespace = namespace
24
+ @schema_store = SchemaStore.new(path: schemas_path || DEFAULT_SCHEMAS_PATH)
25
+ @codec = codec
26
+ end
27
+
28
+ # Encodes data to Avro using the specified schema.
29
+ #
30
+ # data - The data that should be encoded.
31
+ # schema_name - The name of a schema in the `schemas_path`.
32
+ #
33
+ # Returns a String containing the encoded data.
34
+ def encode(data, schema_name: nil, namespace: @namespace)
35
+ stream = StringIO.new
36
+
37
+ encode_to_stream(data, stream: stream, schema_name: schema_name, namespace: namespace)
38
+
39
+ stream.string
40
+ end
41
+
42
+ # Encodes data to Avro using the specified schema and writes it to the
43
+ # specified stream.
44
+ #
45
+ # data - The data that should be encoded.
46
+ # schema_name - The name of a schema in the `schemas_path`.
47
+ # stream - An IO object that the encoded data should be written to (optional).
48
+ #
49
+ # Returns nothing.
50
+ def encode_to_stream(data, schema_name: nil, stream: nil, namespace: @namespace)
51
+ schema = @schema_store.find(schema_name, namespace)
52
+ writer = Avro::IO::DatumWriter.new(schema)
53
+
54
+ dw = Avro::DataFile::Writer.new(stream, writer, schema, @codec)
55
+ dw << data.as_avro
56
+ dw.close
57
+ end
58
+
59
+ # Decodes Avro data.
60
+ #
61
+ # encoded_data - A String containing Avro-encoded data.
62
+ # schema_name - The String name of the schema that should be used to read
63
+ # the data. If nil, the writer schema will be used.
64
+ # namespace - The namespace of the Avro schema used to decode the data.
65
+ #
66
+ # Returns whatever is encoded in the data.
67
+ def decode(encoded_data, schema_name: nil, namespace: @namespace)
68
+ stream = StringIO.new(encoded_data)
69
+ decode_stream(stream, schema_name: schema_name, namespace: namespace)
70
+ end
71
+
72
+ # Decodes Avro data from an IO stream.
73
+ #
74
+ # stream - An IO object containing Avro data.
75
+ # schema_name - The String name of the schema that should be used to read
76
+ # the data. If nil, the writer schema will be used.
77
+ # namespace - The namespace of the Avro schema used to decode the data.
78
+ #
79
+ # Returns whatever is encoded in the stream.
80
+ def decode_stream(stream, schema_name: nil, namespace: @namespace)
81
+ schema = schema_name && @schema_store.find(schema_name, namespace)
82
+ reader = Avro::IO::DatumReader.new(nil, schema)
83
+ dr = Avro::DataFile::Reader.new(stream, reader)
84
+ dr.first
85
+ end
86
+
87
+ # Validates data against an Avro schema.
88
+ #
89
+ # data - The data that should be validated.
90
+ # schema - The String name of the schema that should be used to validate
91
+ # the data.
92
+ # namespace - The namespace of the Avro schema (optional).
93
+ #
94
+ # Returns true if the data is valid, false otherwise.
95
+ def valid?(data, schema_name: nil, namespace: @namespace)
96
+ schema = schema_name && @schema_store.find(schema_name, namespace)
97
+
98
+ Avro::Schema.validate(schema, data.as_avro)
99
+ end
100
+
101
+ # Loads all schema definition files in the `schemas_dir`.
102
+ def load_schemas!
103
+ @schema_store.load_schemas!
104
+ end
105
+ end
@@ -0,0 +1,26 @@
1
+ require 'avro_turf/schema_registry'
2
+
3
+ # Caches registrations and lookups to the schema registry in memory.
4
+ class AvroTurf::CachedSchemaRegistry
5
+
6
+ def initialize(upstream)
7
+ @upstream = upstream
8
+ @schemas_by_id = {}
9
+ @ids_by_schema = {}
10
+ end
11
+
12
+ # Delegate the following methods to the upstream
13
+ %i(subjects subject_versions subject_version check compatible?).each do |name|
14
+ define_method(name) do |*args|
15
+ instance_variable_get(:@upstream).send(name, *args)
16
+ end
17
+ end
18
+
19
+ def fetch(id)
20
+ @schemas_by_id[id] ||= @upstream.fetch(id)
21
+ end
22
+
23
+ def register(subject, schema)
24
+ @ids_by_schema[subject + schema.to_s] ||= @upstream.register(subject, schema)
25
+ end
26
+ end
@@ -0,0 +1,10 @@
1
+ require 'avro_turf/core_ext/string'
2
+ require 'avro_turf/core_ext/numeric'
3
+ require 'avro_turf/core_ext/enumerable'
4
+ require 'avro_turf/core_ext/hash'
5
+ require 'avro_turf/core_ext/time'
6
+ require 'avro_turf/core_ext/date'
7
+ require 'avro_turf/core_ext/symbol'
8
+ require 'avro_turf/core_ext/nil_class'
9
+ require 'avro_turf/core_ext/true_class'
10
+ require 'avro_turf/core_ext/false_class'
@@ -0,0 +1,5 @@
1
+ class Date
2
+ def as_avro
3
+ iso8601
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module Enumerable
2
+ def as_avro
3
+ map(&:as_avro)
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ class FalseClass
2
+ def as_avro
3
+ self
4
+ end
5
+ end