avro-patches 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4501694b4e822ab5d2f9b08cd4d2406474b7efa6
4
- data.tar.gz: 7cb86acf0bb9cb38486d59263904e1eac8eb4be7
3
+ metadata.gz: 28f04e46e3ccd4a1d9400d32fed8474f1d4b9edc
4
+ data.tar.gz: 412323d266beed3d397ee861775dc340e2d12020
5
5
  SHA512:
6
- metadata.gz: 7d2aa4dc274fef6e4727ac3d3842c984223b0ab8ab2fdaed522490d4a4a28d15ae636fb7844aea8f5ff20f0983e698c9d8aec42c3e7e3ba7b2b08e9e1a692761
7
- data.tar.gz: 03b0e275a36bfbf88e18638661bbc59bcad4a423a6767743a5560c9b2092e8a25a829128c941d8bb3b79693926b478361d78b47826e3b37939e0a94a7f2c172c
6
+ metadata.gz: d95df0967c75e5f5aba614785db7f7407e9e5b88f35c2c524ed230555425d4dc846cde3ae15cfe8f8923c6f1792d8724e01ca9e8dfda73a230304a6e56ad6714
7
+ data.tar.gz: 568746bb7f634a1168a1b109f59170f7ad79e3ed1cbcf99f6325913b559283c235c66d1c8e3e08ec5e7032bc610dcb62b4dc84e4ce3eb366e77fc78ba759003b
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # avro-patches
2
2
 
3
+ ## v0.3.0
4
+ - Further performance improvements for `Avro::SchemaValidator` and encoding.
5
+ - Ensure that strings are encoded as UTF-8.
6
+
3
7
  ## v0.2.0
4
8
  - Performance improvements for `Avro::SchemaValidator`.
5
9
 
data/lib/avro-patches.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'avro-patches/version'
2
2
 
3
3
  require 'avro'
4
+ require 'avro-patches/ensure_encoding'
4
5
  require 'avro-patches/schema_validator'
5
6
  require 'avro-patches/logical_types'
6
7
  require 'avro-patches/schema_compatibility'
@@ -0,0 +1,5 @@
1
+ # Change from "AVRO-1783: Ruby: Ensure correct binary encoding for byte strings"
2
+ # https://github.com/apache/avro/commit/315d842148d57590a58fafecf6e5ea378e9e0d74
3
+
4
+ # Only part of the above commit is included as we are not using protocols and RPC
5
+ require_relative 'ensure_encoding/io'
@@ -0,0 +1,12 @@
1
+ Avro::IO::DatumWriter.class_eval do
2
+ # A string is encoded as a long followed by that many bytes of
3
+ # UTF-8 encoded character data
4
+ def write_string(datum)
5
+ # The original commit used:
6
+ # datum = datum.encode('utf-8') if datum.respond_to? :encode
7
+ # This always allocated a new string even if the string was already UTF-8 encoded.
8
+ # The form below is slightly more efficient.
9
+ datum = datum.encode(Encoding::UTF_8) if datum.respond_to?(:encode) && datum.encoding != Encoding::UTF_8
10
+ write_bytes(datum)
11
+ end
12
+ end
@@ -2,7 +2,7 @@ Avro::IO::DatumWriter.class_eval do
2
2
  def write_data(writers_schema, logical_datum, encoder)
3
3
  datum = writers_schema.type_adapter.encode(logical_datum)
4
4
 
5
- unless Avro::Schema.validate(writers_schema, datum, encoded = true)
5
+ unless Avro::Schema.validate(writers_schema, datum, { recursive: false, encoded: true })
6
6
  raise Avro::IO::AvroTypeError.new(writers_schema, datum)
7
7
  end
8
8
 
@@ -57,8 +57,8 @@ Avro::Schema.class_eval do
57
57
  end
58
58
 
59
59
  # Determine if a ruby datum is an instance of a schema
60
- def self.validate(expected_schema, logical_datum, encoded = false)
61
- Avro::SchemaValidator.validate!(expected_schema, logical_datum, encoded)
60
+ def self.validate(expected_schema, logical_datum, options = { recursive: true, encoded: false })
61
+ Avro::SchemaValidator.validate!(expected_schema, logical_datum, options)
62
62
  true
63
63
  rescue Avro::SchemaValidator::ValidationError
64
64
  false
@@ -1,9 +1,16 @@
1
1
  module AvroPatches
2
2
  module LogicalTypes
3
3
  module SchemaValidatorPatch
4
- def validate!(expected_schema, logical_datum, encoded = false)
4
+ def validate!(expected_schema, logical_datum, options = { recursive: true, encoded: false })
5
+ options ||= {}
6
+ options[:recursive] = true unless options.key?(:recursive)
7
+
5
8
  result = Avro::SchemaValidator::Result.new
6
- validate_recursive(expected_schema, logical_datum, Avro::SchemaValidator::ROOT_IDENTIFIER, result, encoded)
9
+ if options[:recursive]
10
+ validate_recursive(expected_schema, logical_datum, Avro::SchemaValidator::ROOT_IDENTIFIER, result, options[:encoded])
11
+ else
12
+ validate_simple(expected_schema, logical_datum, Avro::SchemaValidator::ROOT_IDENTIFIER, result, options[:encoded])
13
+ end
7
14
  fail Avro::SchemaValidator::ValidationError, result if result.failure?
8
15
  result
9
16
  end
@@ -11,14 +18,41 @@ module AvroPatches
11
18
  private
12
19
 
13
20
  def validate_recursive(expected_schema, logical_datum, path, result, encoded = false)
14
- datum = if encoded
15
- logical_datum
16
- else
17
- expected_schema.type_adapter.encode(logical_datum) rescue nil
18
- end
21
+ datum = resolve_datum(expected_schema, logical_datum, encoded)
22
+
23
+ # The entire method is overridden so that encoded: true can be passed here
24
+ validate_simple(expected_schema, datum, path, result, true)
25
+
26
+ case expected_schema.type_sym
27
+ when :array
28
+ validate_array(expected_schema, datum, path, result)
29
+ when :map
30
+ validate_map(expected_schema, datum, path, result)
31
+ when :union
32
+ validate_union(expected_schema, datum, path, result)
33
+ when :record, :error, :request
34
+ fail Avro::SchemaValidator::TypeMismatchError unless datum.is_a?(Hash)
35
+ expected_schema.fields.each do |field|
36
+ deeper_path = deeper_path_for_hash(field.name, path)
37
+ validate_recursive(field.type, datum[field.name], deeper_path, result)
38
+ end
39
+ end
40
+ rescue Avro::SchemaValidator::TypeMismatchError
41
+ result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
42
+ end
19
43
 
44
+ def validate_simple(expected_schema, logical_datum, path, result, encoded = false)
45
+ datum = resolve_datum(expected_schema, logical_datum, encoded)
20
46
  super(expected_schema, datum, path, result)
21
47
  end
48
+
49
+ def resolve_datum(expected_schema, logical_datum, encoded)
50
+ if encoded
51
+ logical_datum
52
+ else
53
+ expected_schema.type_adapter.encode(logical_datum) rescue nil
54
+ end
55
+ end
22
56
  end
23
57
  end
24
58
  end
@@ -1,7 +1,7 @@
1
1
  Avro::Schema.class_eval do
2
2
  # Determine if a ruby datum is an instance of a schema
3
- def self.validate(expected_schema, datum)
4
- Avro::SchemaValidator.validate!(expected_schema, datum)
3
+ def self.validate(expected_schema, datum, options = { recursive: true })
4
+ Avro::SchemaValidator.validate!(expected_schema, datum, options)
5
5
  true
6
6
  rescue Avro::SchemaValidator::ValidationError
7
7
  false
@@ -63,16 +63,56 @@ module Avro
63
63
  TypeMismatchError = Class.new(ValidationError)
64
64
 
65
65
  class << self
66
- def validate!(expected_schema, datum)
67
- result = Result.new
68
- validate_recursive(expected_schema, datum, ROOT_IDENTIFIER, result)
69
- fail ValidationError, result if result.failure?
66
+ # This method is replaced by code in AvroPatches::LogicalTypes::SchemaValidatorPatch.
67
+ def validate!(expected_schema, datum, options = { recursive: true })
68
+ options ||= {}
69
+ options[:recursive] = true unless options.key?(:recursive)
70
+
71
+ result = Avro::SchemaValidator::Result.new
72
+ if options[:recursive]
73
+ validate_recursive(expected_schema, datum, ROOT_IDENTIFIER, result)
74
+ else
75
+ validate_simple(expected_schema, datum, ROOT_IDENTIFIER, result)
76
+ end
77
+ fail Avro::SchemaValidator::ValidationError, result if result.failure?
70
78
  result
71
79
  end
72
80
 
73
81
  private
74
82
 
83
+ def validate_type(expected_schema)
84
+ unless Avro::Schema::VALID_TYPES_SYM.include?(expected_schema.type_sym)
85
+ fail "Unexpected schema type #{expected_schema.type_sym} #{expected_schema.inspect}"
86
+ end
87
+ end
88
+
89
+ # This method is replaced by code in AvroPatches::LogicalTypes::SchemaValidatorPatch.
90
+ # The patches are layered this way because SchemaValidator exists on
91
+ # avro's master branch but logical type support is still in PR.
75
92
  def validate_recursive(expected_schema, datum, path, result)
93
+ validate_simple(expected_schema, datum, path, result)
94
+
95
+ case expected_schema.type_sym
96
+ when :array
97
+ validate_array(expected_schema, datum, path, result)
98
+ when :map
99
+ validate_map(expected_schema, datum, path, result)
100
+ when :union
101
+ validate_union(expected_schema, datum, path, result)
102
+ when :record, :error, :request
103
+ fail TypeMismatchError unless datum.is_a?(Hash)
104
+ expected_schema.fields.each do |field|
105
+ deeper_path = deeper_path_for_hash(field.name, path)
106
+ validate_recursive(field.type, datum[field.name], deeper_path, result)
107
+ end
108
+ end
109
+ rescue TypeMismatchError
110
+ result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
111
+ end
112
+
113
+ def validate_simple(expected_schema, datum, path, result)
114
+ validate_type(expected_schema)
115
+
76
116
  case expected_schema.type_sym
77
117
  when :null
78
118
  fail TypeMismatchError unless datum.nil?
@@ -96,20 +136,6 @@ module Avro
96
136
  end
97
137
  when :enum
98
138
  result.add_error(path, enum_message(expected_schema.symbols, datum)) unless expected_schema.symbols.include?(datum)
99
- when :array
100
- validate_array(expected_schema, datum, path, result)
101
- when :map
102
- validate_map(expected_schema, datum, path, result)
103
- when :union
104
- validate_union(expected_schema, datum, path, result)
105
- when :record, :error, :request
106
- fail TypeMismatchError unless datum.is_a?(Hash)
107
- expected_schema.fields.each do |field|
108
- deeper_path = deeper_path_for_hash(field.name, path)
109
- validate_recursive(field.type, datum[field.name], deeper_path, result)
110
- end
111
- else
112
- fail "Unexpected schema type #{expected_schema.type_sym} #{expected_schema.inspect}"
113
139
  end
114
140
  rescue TypeMismatchError
115
141
  result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
@@ -1,3 +1,3 @@
1
1
  module AvroPatches
2
- VERSION = '0.2.0'.freeze
2
+ VERSION = '0.3.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro-patches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Salsify, Inc
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-05 00:00:00.000000000 Z
11
+ date: 2017-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -102,6 +102,8 @@ files:
102
102
  - bin/console
103
103
  - bin/setup
104
104
  - lib/avro-patches.rb
105
+ - lib/avro-patches/ensure_encoding.rb
106
+ - lib/avro-patches/ensure_encoding/io.rb
105
107
  - lib/avro-patches/logical_types.rb
106
108
  - lib/avro-patches/logical_types/io.rb
107
109
  - lib/avro-patches/logical_types/logical_types.rb