typed_data 0.1.1 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa8bfbb2363bcf13407a8bb1fdeed0099f16a5cda9296dd77489fb2b2d7bb024
4
- data.tar.gz: 7c446db80a65cc04d22584f38d53c313092432e71ba64321c22d43967a976f8a
3
+ metadata.gz: 8328e9cdeaee3bbab6b796988ff1436fc913aa8be6078b99848fe96a4d109156
4
+ data.tar.gz: f3b471b2a4cbc86c8a8a3b07d038c373ca7272f5417f7dd4301cd25a187d4383
5
5
  SHA512:
6
- metadata.gz: 8e783163ba498a893cacff761e9d502acf0e524e8f57790cf3500ee8e29b2310133dfacd76f017d47cf81bde4f9f0550d4bc78523e7213e7dbd80adae8864215
7
- data.tar.gz: 31376d90ff56ab09b0a004b0d0bc8efaed10a6ec4d89cf8eee7c4fd2532c5f533507b96f29af365d6d413475f75d2be1a206d157ce3b19bc847ab24d2c52a17f
6
+ metadata.gz: 15cf75777af9064e4804709b3c4aef5b3f29d867d29eefa4c93e20a5216b75c1e988809ecf985bf285178e2f1e36ff513316aeb69f179c695f196a511ce439e6
7
+ data.tar.gz: a00c6feb6091ed05d0e28c1afb26d386cab86aa3c110792423bd17f03c8637fb8fab500ea0e220c2c6dc8942f3b6fdc8a86f3668eb23c817d53c847de9495afe
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # TypedData
2
2
 
3
- ![](https://github.com/abicky/ecsmec/workflows/test/badge.svg?branch=master)
3
+ ![](https://github.com/abicky/typed_data/workflows/CI/badge.svg?branch=master)
4
4
 
5
5
  TypedData is a library that converts hash objects managed by an Avro schema so that the objects can be loaded into BigQuery.
6
6
 
@@ -79,36 +79,42 @@ converter.convert({
79
79
  },
80
80
  })
81
81
  #=> {"int_field"=>1,
82
- # "int_or_string_field"=>{"int_value"=>nil, "string_value"=>"string"},
82
+ # "int_or_string_field"=>{"string_value"=>"string"},
83
83
  # "array_field"=>[1, 2],
84
- # "union_type_array_field"=>
85
- # [{"int_value"=>"1", "string_value"=>nil},
86
- # {"int_value"=>nil, "string_value"=>"2"}],
84
+ # "union_type_array_field"=>[{"int_value"=>"1"}, {"string_value"=>"2"}],
87
85
  # "nested_map_field"=>
88
86
  # [{"key"=>"nested_map",
89
87
  # "value"=>
90
- # [{"key"=>"key1", "value"=>{"int_value"=>"1", "string_value"=>nil}},
91
- # {"key"=>"key2", "value"=>{"int_value"=>nil, "string_value"=>"2"}}]}]}
88
+ # [{"key"=>"key1", "value"=>{"int_value"=>"1"}},
89
+ # {"key"=>"key2", "value"=>{"string_value"=>"2"}}]}]}
92
90
  ```
93
91
 
94
92
  You can specify a formatter for the union type keys. For example, the formatter for tables managed by [Google BigQuery Sink Connector](https://docs.confluent.io/current/connect/kafka-connect-bigquery/index.html) is like below:
95
93
 
96
94
  ```ruby
97
- schema = {
98
- "name" => "Record",
99
- "type" => "record",
100
- "fields" => [
101
- {
102
- "name" => "int_or_string_field",
103
- "type" => ["int", "string"],
104
- },
105
- ],
106
- }
107
-
108
95
  converter = TypedData::Converter.new(schema)
109
96
  converter.union_type_key_formatter = ->(type) { type.split("_").first }
110
- converter.convert({ "int_or_string_field" => "string" })
111
- #=> {"int_or_string_field"=>{"int"=>nil, "string"=>"string"}}
97
+ converter.convert({
98
+ "int_field" => 1,
99
+ "int_or_string_field" => "string",
100
+ "array_field" => [1, 2],
101
+ "union_type_array_field" => [1, "2"],
102
+ "nested_map_field" => {
103
+ "nested_map" => {
104
+ "key1" => 1,
105
+ "key2" => "2",
106
+ },
107
+ },
108
+ })
109
+ #=> {"int_field"=>1,
110
+ # "int_or_string_field"=>{"string"=>"string"},
111
+ # "array_field"=>[1, 2],
112
+ # "union_type_array_field"=>[{"int"=>"1"}, {"string"=>"2"}],
113
+ # "nested_map_field"=>
114
+ # [{"key"=>"nested_map",
115
+ # "value"=>
116
+ # [{"key"=>"key1", "value"=>{"int"=>"1"}},
117
+ # {"key"=>"key2", "value"=>{"string"=>"2"}}]}]}
112
118
  ```
113
119
 
114
120
 
@@ -120,7 +126,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
120
126
 
121
127
  ## Contributing
122
128
 
123
- Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/typed_data.
129
+ Bug reports and pull requests are welcome on GitHub at https://github.com/abicky/typed_data.
124
130
 
125
131
 
126
132
  ## License
@@ -31,9 +31,9 @@ module TypedData
31
31
  when Schema::RecordType
32
32
  converted[key] = convert_record(subtype, value)
33
33
  when Schema::UnionType
34
- converted[key] = convert_union(subtype, value, as_record_field: true)
34
+ converted[key] = convert_union(subtype, value)
35
35
  else
36
- converted[key] = subtype.coerce(value, formatter: union_type_key_formatter)
36
+ converted[key] = subtype.coerce(value)
37
37
  end
38
38
  end
39
39
  end
@@ -55,7 +55,7 @@ module TypedData
55
55
  when Schema::UnionType
56
56
  ret << convert_union(subtype, value)
57
57
  else
58
- ret << type.coerce(value, formatter: union_type_key_formatter)
58
+ ret << subtype.coerce(value)
59
59
  end
60
60
  end
61
61
  end
@@ -75,16 +75,15 @@ module TypedData
75
75
  when Schema::UnionType
76
76
  value = convert_union(subtype, value)
77
77
  else
78
- value = type.coerce(value, formatter: union_type_key_formatter)
78
+ value = subtype.coerce(value)
79
79
  end
80
80
  ret << { "key" => key, "value" => value }
81
81
  end
82
82
  end
83
83
 
84
84
  # @param type [UnionType]
85
- # @param as_record_field [Boolean]
86
85
  # @param map [Object]
87
- def convert_union(type, value, as_record_field: false)
86
+ def convert_union(type, value)
88
87
  subtype = type.find_match(value)
89
88
  case subtype
90
89
  when Schema::ArrayType
@@ -95,15 +94,18 @@ module TypedData
95
94
  converted_value = convert_record(subtype, value)
96
95
  when Schema::UnionType
97
96
  converted_value = convert_union(subtype, value)
97
+ when Schema::NullType
98
+ converted_value = nil
98
99
  else
99
- return type.coerce(value, formatter: union_type_key_formatter)
100
+ converted_value = subtype.coerce(value)
100
101
  end
101
102
 
102
- if as_record_field
103
+ if type.nullable_single?
103
104
  converted_value
105
+ elsif subtype.is_a?(Schema::NullType)
106
+ {}
104
107
  else
105
- type.default_value(union_type_key_formatter)
106
- .merge!(union_type_key_formatter.call(subtype.to_s) => converted_value)
108
+ { union_type_key_formatter.call(subtype.to_s) => converted_value }
107
109
  end
108
110
  end
109
111
  end
@@ -5,17 +5,16 @@ require "typed_data/schema/bytes_type"
5
5
  require "typed_data/schema/enum_type"
6
6
  require "typed_data/schema/float_type"
7
7
  require "typed_data/schema/int_type"
8
+ require "typed_data/schema/long_type"
8
9
  require "typed_data/schema/map_type"
9
10
  require "typed_data/schema/null_type"
10
11
  require "typed_data/schema/record_type"
11
12
  require "typed_data/schema/string_type"
12
13
  require "typed_data/schema/union_type"
14
+ require "typed_data/schema/errors"
13
15
 
14
16
  module TypedData
15
17
  class Schema
16
- class UnknownField < StandardError; end
17
- class UnsupportedType < StandardError; end
18
-
19
18
  class << self
20
19
  def build_type(type, logical_type = nil)
21
20
  type = type.first if type.is_a?(Array) && type.size == 1
@@ -42,14 +41,16 @@ module TypedData
42
41
  values = type["values"] || type[:values]
43
42
  MapType.new(values.is_a?(Array) ? values : [values])
44
43
  when "record"
45
- RecordType.new(type["fields"] || type[:fields])
44
+ RecordType.new(type["name"] || type[:name], type["fields"] || type[:fields])
46
45
  else
47
46
  raise UnsupportedType, "Unknown type: #{subtype}"
48
47
  end
49
48
  when "boolean"
50
49
  BooleanType.new(type, logical_type)
51
- when "int", "long"
50
+ when "int"
52
51
  IntType.new(type, logical_type)
52
+ when "long"
53
+ LongType.new(type, logical_type)
53
54
  when "float", "double"
54
55
  FloatType.new(type, logical_type)
55
56
  when "bytes"
@@ -72,7 +73,7 @@ module TypedData
72
73
  if (schema["type"] || schema[:type]) != "record"
73
74
  raise UnsupportedType, 'The root type must be "record"'
74
75
  end
75
- @root_type = RecordType.new(schema["fields"] || schema[:fields])
76
+ @root_type = RecordType.new(schema["name"] || schema[:name], schema["fields"] || schema[:fields])
76
77
  end
77
78
  end
78
79
  end
@@ -15,10 +15,6 @@ module TypedData
15
15
  "array_#{@type}"
16
16
  end
17
17
 
18
- def coerce(value, formatter:)
19
- @type.coerce(value, formatter: formatter)
20
- end
21
-
22
18
  def primitive?
23
19
  false
24
20
  end
@@ -3,7 +3,7 @@
3
3
  module TypedData
4
4
  class Schema
5
5
  class BytesType < Type
6
- def coerce(value, formatter:)
6
+ def coerce(value)
7
7
  [value].pack("m0")
8
8
  end
9
9
 
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TypedData
4
+ class Schema
5
+ class UnknownField < StandardError; end
6
+ class UnsupportedType < StandardError; end
7
+ class InvalidValue < StandardError; end
8
+ end
9
+ end
@@ -3,6 +3,9 @@
3
3
  module TypedData
4
4
  class Schema
5
5
  class IntType < Type
6
+ VALUE_RANGE = -2**31 .. 2**31 - 1
7
+ SUPPORTED_LOGICAL_TYPES = %w[date time-millis]
8
+
6
9
  def to_s
7
10
  if @logical_type
8
11
  "#{@name}_#{@logical_type.gsub("-", "_")}"
@@ -11,18 +14,12 @@ module TypedData
11
14
  end
12
15
  end
13
16
 
14
- def coerce(value, formatter:)
17
+ def coerce(value)
15
18
  case @logical_type
16
19
  when "date"
17
20
  (Date.new(1970, 1, 1) + value).to_s
18
21
  when "time-millis"
19
22
  Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%T.%3N")
20
- when "time-micros"
21
- Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
22
- when "timestamp-millis"
23
- Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
24
- when "timestamp-micros"
25
- Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
26
23
  else
27
24
  value
28
25
  end
@@ -33,7 +30,7 @@ module TypedData
33
30
  end
34
31
 
35
32
  def match?(value)
36
- value.is_a?(Integer)
33
+ value.is_a?(Integer) && VALUE_RANGE.cover?(value)
37
34
  end
38
35
  end
39
36
  end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TypedData
4
+ class Schema
5
+ class LongType < Type
6
+ SUPPORTED_LOGICAL_TYPES = %w[time-micros timestamp-millis timestamp-micros]
7
+
8
+ def to_s
9
+ if @logical_type
10
+ "#{@name}_#{@logical_type.gsub("-", "_")}"
11
+ else
12
+ @name
13
+ end
14
+ end
15
+
16
+ def coerce(value)
17
+ case @logical_type
18
+ when "time-micros"
19
+ Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
20
+ when "timestamp-millis"
21
+ Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
22
+ when "timestamp-micros"
23
+ Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
24
+ else
25
+ value
26
+ end
27
+ end
28
+
29
+ def primitive?
30
+ true
31
+ end
32
+
33
+ def match?(value)
34
+ value.is_a?(Integer)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -12,10 +12,6 @@ module TypedData
12
12
  "map_#{@type}"
13
13
  end
14
14
 
15
- def coerce(value, formatter:)
16
- @type.coerce(value, formatter: formatter)
17
- end
18
-
19
15
  def primitive?
20
16
  false
21
17
  end
@@ -4,7 +4,8 @@ module TypedData
4
4
  class Schema
5
5
  class RecordType < Type
6
6
  # @param fields [Array] an array of "fields" in an Avro schema
7
- def initialize(fields)
7
+ def initialize(name, fields)
8
+ @name = name
8
9
  @field_to_type = fields.each_with_object({}) do |field, h|
9
10
  h[field["name"] || field[:name]] = Schema.build_type(field["type"] || field[:type])
10
11
  end
@@ -21,8 +22,12 @@ module TypedData
21
22
  end
22
23
  end
23
24
 
25
+ def find_match(value)
26
+ raise InvalidValue, %Q{the value #{value.inspect} doesn't match the type #{self}}
27
+ end
28
+
24
29
  def match?(value)
25
- value.is_a?(Hash)
30
+ value.is_a?(Hash) && value.all? { |k, v| @field_to_type[k]&.match?(v) }
26
31
  end
27
32
  end
28
33
  end
@@ -3,6 +3,8 @@
3
3
  module TypedData
4
4
  class Schema
5
5
  class StringType < Type
6
+ SUPPORTED_LOGICAL_TYPES = %w[uuid]
7
+
6
8
  def primitive?
7
9
  true
8
10
  end
@@ -1,10 +1,16 @@
1
1
  # frozen_string_literal: true
2
+ require "typed_data/schema/errors"
2
3
 
3
4
  module TypedData
4
5
  class Schema
5
6
  class Type
7
+ SUPPORTED_LOGICAL_TYPES = []
8
+
6
9
  def initialize(name, logical_type = nil)
7
10
  @name = name
11
+ if logical_type && !self.class::SUPPORTED_LOGICAL_TYPES.include?(logical_type)
12
+ raise UnsupportedType, %Q{#{name} doesn't support the logical type "#{logical_type}"}
13
+ end
8
14
  @logical_type = logical_type
9
15
  end
10
16
 
@@ -12,7 +18,7 @@ module TypedData
12
18
  @name
13
19
  end
14
20
 
15
- def coerce(value, formatter:)
21
+ def coerce(value)
16
22
  value
17
23
  end
18
24
 
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ require "typed_data/schema/errors"
2
3
 
3
4
  module TypedData
4
5
  class Schema
@@ -6,41 +7,29 @@ module TypedData
6
7
  # @param types [Array<String>]
7
8
  def initialize(types)
8
9
  @types = types.map(&Schema.method(:build_type))
9
- @nullable_primitive = @types.size == 2 && @types.any?(&:primitive?) && @types.any? { |t| t.is_a?(NullType) }
10
+ @nullable_single = @types.size == 2 && @types.any? { |t| t.is_a?(NullType) }
11
+ @nullable_primitive = @nullable_single && @types.any?(&:primitive?)
10
12
  end
11
13
 
12
14
  def to_s
13
15
  @nullable_primitive ? @types.first.to_s : "union_#{@types.map(&:to_s).join("_")}"
14
16
  end
15
17
 
16
- def coerce(value, formatter:)
17
- return value if @nullable_primitive
18
-
19
- type = find_match(value)
20
- if type.is_a?(NullType)
21
- default_value(formatter)
22
- else
23
- default_value(formatter).merge!(formatter.call(type.to_s) => type.coerce(value, formatter: formatter).to_s)
24
- end
25
- end
26
-
27
18
  def primitive?
28
19
  false
29
20
  end
30
21
 
31
22
  def find_match(value)
32
- @types.find { |t| t.match?(value) }
23
+ @types.find { |t| t.match?(value) } or
24
+ raise InvalidValue, %Q{the value #{value.inspect} doesn't match the type #{@types.map(&:to_s)}}
33
25
  end
34
26
 
35
27
  def match?(value)
36
28
  @types.any? { |t| t.match?(value) }
37
29
  end
38
30
 
39
- def default_value(formatter)
40
- @types.each_with_object({}) do |t, v|
41
- next if t.is_a?(NullType)
42
- v[formatter.call(t.to_s)] = t.primitive? || t.is_a?(EnumType) ? nil : []
43
- end
31
+ def nullable_single?
32
+ @nullable_single
44
33
  end
45
34
  end
46
35
  end
@@ -1,3 +1,3 @@
1
1
  module TypedData
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: typed_data
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - abicky
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-04-21 00:00:00.000000000 Z
11
+ date: 2021-08-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: avro
@@ -63,8 +63,10 @@ files:
63
63
  - lib/typed_data/schema/boolean_type.rb
64
64
  - lib/typed_data/schema/bytes_type.rb
65
65
  - lib/typed_data/schema/enum_type.rb
66
+ - lib/typed_data/schema/errors.rb
66
67
  - lib/typed_data/schema/float_type.rb
67
68
  - lib/typed_data/schema/int_type.rb
69
+ - lib/typed_data/schema/long_type.rb
68
70
  - lib/typed_data/schema/map_type.rb
69
71
  - lib/typed_data/schema/null_type.rb
70
72
  - lib/typed_data/schema/record_type.rb
@@ -94,7 +96,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
94
96
  - !ruby/object:Gem::Version
95
97
  version: '0'
96
98
  requirements: []
97
- rubygems_version: 3.1.2
99
+ rubygems_version: 3.1.4
98
100
  signing_key:
99
101
  specification_version: 4
100
102
  summary: A library that converts hash objects managed by an Avro schema