avro-patches 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4501694b4e822ab5d2f9b08cd4d2406474b7efa6
4
- data.tar.gz: 7cb86acf0bb9cb38486d59263904e1eac8eb4be7
3
+ metadata.gz: 28f04e46e3ccd4a1d9400d32fed8474f1d4b9edc
4
+ data.tar.gz: 412323d266beed3d397ee861775dc340e2d12020
5
5
  SHA512:
6
- metadata.gz: 7d2aa4dc274fef6e4727ac3d3842c984223b0ab8ab2fdaed522490d4a4a28d15ae636fb7844aea8f5ff20f0983e698c9d8aec42c3e7e3ba7b2b08e9e1a692761
7
- data.tar.gz: 03b0e275a36bfbf88e18638661bbc59bcad4a423a6767743a5560c9b2092e8a25a829128c941d8bb3b79693926b478361d78b47826e3b37939e0a94a7f2c172c
6
+ metadata.gz: d95df0967c75e5f5aba614785db7f7407e9e5b88f35c2c524ed230555425d4dc846cde3ae15cfe8f8923c6f1792d8724e01ca9e8dfda73a230304a6e56ad6714
7
+ data.tar.gz: 568746bb7f634a1168a1b109f59170f7ad79e3ed1cbcf99f6325913b559283c235c66d1c8e3e08ec5e7032bc610dcb62b4dc84e4ce3eb366e77fc78ba759003b
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # avro-patches
2
2
 
3
+ ## v0.3.0
4
+ - Further performance improvements for `Avro::SchemaValidator` and encoding.
5
+ - Ensure that strings are encoded as UTF-8.
6
+
3
7
  ## v0.2.0
4
8
  - Performance improvements for `Avro::SchemaValidator`.
5
9
 
data/lib/avro-patches.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'avro-patches/version'
2
2
 
3
3
  require 'avro'
4
+ require 'avro-patches/ensure_encoding'
4
5
  require 'avro-patches/schema_validator'
5
6
  require 'avro-patches/logical_types'
6
7
  require 'avro-patches/schema_compatibility'
@@ -0,0 +1,5 @@
1
+ # Change from "AVRO-1783: Ruby: Ensure correct binary encoding for byte strings"
2
+ # https://github.com/apache/avro/commit/315d842148d57590a58fafecf6e5ea378e9e0d74
3
+
4
+ # Only part of the above commit is included as we are not using protocols and RPC
5
+ require_relative 'ensure_encoding/io'
@@ -0,0 +1,12 @@
1
+ Avro::IO::DatumWriter.class_eval do
2
+ # A string is encoded as a long followed by that many bytes of
3
+ # UTF-8 encoded character data
4
+ def write_string(datum)
5
+ # The original commit used:
6
+ # datum = datum.encode('utf-8') if datum.respond_to? :encode
7
+ # This always allocated a new string even if the string was already UTF-8 encoded.
8
+ # The form below is slightly more efficient.
9
+ datum = datum.encode(Encoding::UTF_8) if datum.respond_to?(:encode) && datum.encoding != Encoding::UTF_8
10
+ write_bytes(datum)
11
+ end
12
+ end
@@ -2,7 +2,7 @@ Avro::IO::DatumWriter.class_eval do
2
2
  def write_data(writers_schema, logical_datum, encoder)
3
3
  datum = writers_schema.type_adapter.encode(logical_datum)
4
4
 
5
- unless Avro::Schema.validate(writers_schema, datum, encoded = true)
5
+ unless Avro::Schema.validate(writers_schema, datum, { recursive: false, encoded: true })
6
6
  raise Avro::IO::AvroTypeError.new(writers_schema, datum)
7
7
  end
8
8
 
@@ -57,8 +57,8 @@ Avro::Schema.class_eval do
57
57
  end
58
58
 
59
59
  # Determine if a ruby datum is an instance of a schema
60
- def self.validate(expected_schema, logical_datum, encoded = false)
61
- Avro::SchemaValidator.validate!(expected_schema, logical_datum, encoded)
60
+ def self.validate(expected_schema, logical_datum, options = { recursive: true, encoded: false })
61
+ Avro::SchemaValidator.validate!(expected_schema, logical_datum, options)
62
62
  true
63
63
  rescue Avro::SchemaValidator::ValidationError
64
64
  false
@@ -1,9 +1,16 @@
1
1
  module AvroPatches
2
2
  module LogicalTypes
3
3
  module SchemaValidatorPatch
4
- def validate!(expected_schema, logical_datum, encoded = false)
4
+ def validate!(expected_schema, logical_datum, options = { recursive: true, encoded: false })
5
+ options ||= {}
6
+ options[:recursive] = true unless options.key?(:recursive)
7
+
5
8
  result = Avro::SchemaValidator::Result.new
6
- validate_recursive(expected_schema, logical_datum, Avro::SchemaValidator::ROOT_IDENTIFIER, result, encoded)
9
+ if options[:recursive]
10
+ validate_recursive(expected_schema, logical_datum, Avro::SchemaValidator::ROOT_IDENTIFIER, result, options[:encoded])
11
+ else
12
+ validate_simple(expected_schema, logical_datum, Avro::SchemaValidator::ROOT_IDENTIFIER, result, options[:encoded])
13
+ end
7
14
  fail Avro::SchemaValidator::ValidationError, result if result.failure?
8
15
  result
9
16
  end
@@ -11,14 +18,41 @@ module AvroPatches
11
18
  private
12
19
 
13
20
  def validate_recursive(expected_schema, logical_datum, path, result, encoded = false)
14
- datum = if encoded
15
- logical_datum
16
- else
17
- expected_schema.type_adapter.encode(logical_datum) rescue nil
18
- end
21
+ datum = resolve_datum(expected_schema, logical_datum, encoded)
22
+
23
+ # The entire method is overridden so that encoded: true can be passed here
24
+ validate_simple(expected_schema, datum, path, result, true)
25
+
26
+ case expected_schema.type_sym
27
+ when :array
28
+ validate_array(expected_schema, datum, path, result)
29
+ when :map
30
+ validate_map(expected_schema, datum, path, result)
31
+ when :union
32
+ validate_union(expected_schema, datum, path, result)
33
+ when :record, :error, :request
34
+ fail Avro::SchemaValidator::TypeMismatchError unless datum.is_a?(Hash)
35
+ expected_schema.fields.each do |field|
36
+ deeper_path = deeper_path_for_hash(field.name, path)
37
+ validate_recursive(field.type, datum[field.name], deeper_path, result)
38
+ end
39
+ end
40
+ rescue Avro::SchemaValidator::TypeMismatchError
41
+ result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
42
+ end
19
43
 
44
+ def validate_simple(expected_schema, logical_datum, path, result, encoded = false)
45
+ datum = resolve_datum(expected_schema, logical_datum, encoded)
20
46
  super(expected_schema, datum, path, result)
21
47
  end
48
+
49
+ def resolve_datum(expected_schema, logical_datum, encoded)
50
+ if encoded
51
+ logical_datum
52
+ else
53
+ expected_schema.type_adapter.encode(logical_datum) rescue nil
54
+ end
55
+ end
22
56
  end
23
57
  end
24
58
  end
@@ -1,7 +1,7 @@
1
1
  Avro::Schema.class_eval do
2
2
  # Determine if a ruby datum is an instance of a schema
3
- def self.validate(expected_schema, datum)
4
- Avro::SchemaValidator.validate!(expected_schema, datum)
3
+ def self.validate(expected_schema, datum, options = { recursive: true })
4
+ Avro::SchemaValidator.validate!(expected_schema, datum, options)
5
5
  true
6
6
  rescue Avro::SchemaValidator::ValidationError
7
7
  false
@@ -63,16 +63,56 @@ module Avro
63
63
  TypeMismatchError = Class.new(ValidationError)
64
64
 
65
65
  class << self
66
- def validate!(expected_schema, datum)
67
- result = Result.new
68
- validate_recursive(expected_schema, datum, ROOT_IDENTIFIER, result)
69
- fail ValidationError, result if result.failure?
66
+ # This method is replaced by code in AvroPatches::LogicalTypes::SchemaValidatorPatch.
67
+ def validate!(expected_schema, datum, options = { recursive: true })
68
+ options ||= {}
69
+ options[:recursive] = true unless options.key?(:recursive)
70
+
71
+ result = Avro::SchemaValidator::Result.new
72
+ if options[:recursive]
73
+ validate_recursive(expected_schema, datum, ROOT_IDENTIFIER, result)
74
+ else
75
+ validate_simple(expected_schema, datum, ROOT_IDENTIFIER, result)
76
+ end
77
+ fail Avro::SchemaValidator::ValidationError, result if result.failure?
70
78
  result
71
79
  end
72
80
 
73
81
  private
74
82
 
83
+ def validate_type(expected_schema)
84
+ unless Avro::Schema::VALID_TYPES_SYM.include?(expected_schema.type_sym)
85
+ fail "Unexpected schema type #{expected_schema.type_sym} #{expected_schema.inspect}"
86
+ end
87
+ end
88
+
89
+ # This method is replaced by code in AvroPatches::LogicalTypes::SchemaValidatorPatch.
90
+ # The patches are layered this way because SchemaValidator exists on
91
+ # avro's master branch but logical type support is still in PR.
75
92
  def validate_recursive(expected_schema, datum, path, result)
93
+ validate_simple(expected_schema, datum, path, result)
94
+
95
+ case expected_schema.type_sym
96
+ when :array
97
+ validate_array(expected_schema, datum, path, result)
98
+ when :map
99
+ validate_map(expected_schema, datum, path, result)
100
+ when :union
101
+ validate_union(expected_schema, datum, path, result)
102
+ when :record, :error, :request
103
+ fail TypeMismatchError unless datum.is_a?(Hash)
104
+ expected_schema.fields.each do |field|
105
+ deeper_path = deeper_path_for_hash(field.name, path)
106
+ validate_recursive(field.type, datum[field.name], deeper_path, result)
107
+ end
108
+ end
109
+ rescue TypeMismatchError
110
+ result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
111
+ end
112
+
113
+ def validate_simple(expected_schema, datum, path, result)
114
+ validate_type(expected_schema)
115
+
76
116
  case expected_schema.type_sym
77
117
  when :null
78
118
  fail TypeMismatchError unless datum.nil?
@@ -96,20 +136,6 @@ module Avro
96
136
  end
97
137
  when :enum
98
138
  result.add_error(path, enum_message(expected_schema.symbols, datum)) unless expected_schema.symbols.include?(datum)
99
- when :array
100
- validate_array(expected_schema, datum, path, result)
101
- when :map
102
- validate_map(expected_schema, datum, path, result)
103
- when :union
104
- validate_union(expected_schema, datum, path, result)
105
- when :record, :error, :request
106
- fail TypeMismatchError unless datum.is_a?(Hash)
107
- expected_schema.fields.each do |field|
108
- deeper_path = deeper_path_for_hash(field.name, path)
109
- validate_recursive(field.type, datum[field.name], deeper_path, result)
110
- end
111
- else
112
- fail "Unexpected schema type #{expected_schema.type_sym} #{expected_schema.inspect}"
113
139
  end
114
140
  rescue TypeMismatchError
115
141
  result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
@@ -1,3 +1,3 @@
1
1
  module AvroPatches
2
- VERSION = '0.2.0'.freeze
2
+ VERSION = '0.3.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro-patches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Salsify, Inc
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-05 00:00:00.000000000 Z
11
+ date: 2017-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -102,6 +102,8 @@ files:
102
102
  - bin/console
103
103
  - bin/setup
104
104
  - lib/avro-patches.rb
105
+ - lib/avro-patches/ensure_encoding.rb
106
+ - lib/avro-patches/ensure_encoding/io.rb
105
107
  - lib/avro-patches/logical_types.rb
106
108
  - lib/avro-patches/logical_types/io.rb
107
109
  - lib/avro-patches/logical_types/logical_types.rb