typed_data 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +25 -19
- data/lib/typed_data/converter.rb +12 -10
- data/lib/typed_data/schema.rb +6 -3
- data/lib/typed_data/schema/array_type.rb +0 -4
- data/lib/typed_data/schema/bytes_type.rb +1 -1
- data/lib/typed_data/schema/int_type.rb +4 -8
- data/lib/typed_data/schema/long_type.rb +36 -0
- data/lib/typed_data/schema/map_type.rb +0 -4
- data/lib/typed_data/schema/record_type.rb +3 -2
- data/lib/typed_data/schema/type.rb +1 -1
- data/lib/typed_data/schema/union_type.rb +4 -17
- data/lib/typed_data/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 483f78cdcfb329a0d14f4173fa586f64eae12c8f61dbd78492f1cd85cceadbea
|
4
|
+
data.tar.gz: 45173313046d5a6dc54d920ed5bee399e64fd8af2a2e710e2b5051b922fcc440
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4041f615dedb5782ce40d15ae80eca87f3e168b988c2d2de4d9d07d0ba05b00bc2a6e8b14b981d65575ab9f2e38b0db50653286c3093ffaa8e7a586eb7a5e5d9
|
7
|
+
data.tar.gz: 04f2f8e8a86d63e8f5e06066b96e983332107f116694e059e0b06cd14ebb66827cd8bddc2e85c631298518d8387d28452edd856161b412c105f9a9d05d4151fa
|
data/README.md
CHANGED
@@ -79,36 +79,42 @@ converter.convert({
|
|
79
79
|
},
|
80
80
|
})
|
81
81
|
#=> {"int_field"=>1,
|
82
|
-
# "int_or_string_field"=>{"
|
82
|
+
# "int_or_string_field"=>{"string_value"=>"string"},
|
83
83
|
# "array_field"=>[1, 2],
|
84
|
-
# "union_type_array_field"=>
|
85
|
-
# [{"int_value"=>"1", "string_value"=>nil},
|
86
|
-
# {"int_value"=>nil, "string_value"=>"2"}],
|
84
|
+
# "union_type_array_field"=>[{"int_value"=>"1"}, {"string_value"=>"2"}],
|
87
85
|
# "nested_map_field"=>
|
88
86
|
# [{"key"=>"nested_map",
|
89
87
|
# "value"=>
|
90
|
-
# [{"key"=>"key1", "value"=>{"int_value"=>"1"
|
91
|
-
# {"key"=>"key2", "value"=>{"
|
88
|
+
# [{"key"=>"key1", "value"=>{"int_value"=>"1"}},
|
89
|
+
# {"key"=>"key2", "value"=>{"string_value"=>"2"}}]}]}
|
92
90
|
```
|
93
91
|
|
94
92
|
You can specify a formatter for the union type keys. For example, the formatter for tables managed by [Google BigQuery Sink Connector](https://docs.confluent.io/current/connect/kafka-connect-bigquery/index.html) is like below:
|
95
93
|
|
96
94
|
```ruby
|
97
|
-
schema = {
|
98
|
-
"name" => "Record",
|
99
|
-
"type" => "record",
|
100
|
-
"fields" => [
|
101
|
-
{
|
102
|
-
"name" => "int_or_string_field",
|
103
|
-
"type" => ["int", "string"],
|
104
|
-
},
|
105
|
-
],
|
106
|
-
}
|
107
|
-
|
108
95
|
converter = TypedData::Converter.new(schema)
|
109
96
|
converter.union_type_key_formatter = ->(type) { type.split("_").first }
|
110
|
-
converter.convert({
|
111
|
-
|
97
|
+
converter.convert({
|
98
|
+
"int_field" => 1,
|
99
|
+
"int_or_string_field" => "string",
|
100
|
+
"array_field" => [1, 2],
|
101
|
+
"union_type_array_field" => [1, "2"],
|
102
|
+
"nested_map_field" => {
|
103
|
+
"nested_map" => {
|
104
|
+
"key1" => 1,
|
105
|
+
"key2" => "2",
|
106
|
+
},
|
107
|
+
},
|
108
|
+
})
|
109
|
+
#=> {"int_field"=>1,
|
110
|
+
# "int_or_string_field"=>{"string"=>"string"},
|
111
|
+
# "array_field"=>[1, 2],
|
112
|
+
# "union_type_array_field"=>[{"int"=>"1"}, {"string"=>"2"}],
|
113
|
+
# "nested_map_field"=>
|
114
|
+
# [{"key"=>"nested_map",
|
115
|
+
# "value"=>
|
116
|
+
# [{"key"=>"key1", "value"=>{"int"=>"1"}},
|
117
|
+
# {"key"=>"key2", "value"=>{"string"=>"2"}}]}]}
|
112
118
|
```
|
113
119
|
|
114
120
|
|
data/lib/typed_data/converter.rb
CHANGED
@@ -31,9 +31,9 @@ module TypedData
|
|
31
31
|
when Schema::RecordType
|
32
32
|
converted[key] = convert_record(subtype, value)
|
33
33
|
when Schema::UnionType
|
34
|
-
converted[key] = convert_union(subtype, value
|
34
|
+
converted[key] = convert_union(subtype, value)
|
35
35
|
else
|
36
|
-
converted[key] = subtype.coerce(value
|
36
|
+
converted[key] = subtype.coerce(value)
|
37
37
|
end
|
38
38
|
end
|
39
39
|
end
|
@@ -55,7 +55,7 @@ module TypedData
|
|
55
55
|
when Schema::UnionType
|
56
56
|
ret << convert_union(subtype, value)
|
57
57
|
else
|
58
|
-
ret <<
|
58
|
+
ret << subtype.coerce(value)
|
59
59
|
end
|
60
60
|
end
|
61
61
|
end
|
@@ -75,16 +75,15 @@ module TypedData
|
|
75
75
|
when Schema::UnionType
|
76
76
|
value = convert_union(subtype, value)
|
77
77
|
else
|
78
|
-
value =
|
78
|
+
value = subtype.coerce(value)
|
79
79
|
end
|
80
80
|
ret << { "key" => key, "value" => value }
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
84
84
|
# @param type [UnionType]
|
85
|
-
# @param as_record_field [Boolean]
|
86
85
|
# @param map [Object]
|
87
|
-
def convert_union(type, value
|
86
|
+
def convert_union(type, value)
|
88
87
|
subtype = type.find_match(value)
|
89
88
|
case subtype
|
90
89
|
when Schema::ArrayType
|
@@ -95,15 +94,18 @@ module TypedData
|
|
95
94
|
converted_value = convert_record(subtype, value)
|
96
95
|
when Schema::UnionType
|
97
96
|
converted_value = convert_union(subtype, value)
|
97
|
+
when Schema::NullType
|
98
|
+
converted_value = nil
|
98
99
|
else
|
99
|
-
|
100
|
+
converted_value = subtype.coerce(value).to_s
|
100
101
|
end
|
101
102
|
|
102
|
-
if
|
103
|
+
if type.nullable_single?
|
103
104
|
converted_value
|
105
|
+
elsif subtype.is_a?(Schema::NullType)
|
106
|
+
{}
|
104
107
|
else
|
105
|
-
|
106
|
-
.merge!(union_type_key_formatter.call(subtype.to_s) => converted_value)
|
108
|
+
{ union_type_key_formatter.call(subtype.to_s) => converted_value }
|
107
109
|
end
|
108
110
|
end
|
109
111
|
end
|
data/lib/typed_data/schema.rb
CHANGED
@@ -5,6 +5,7 @@ require "typed_data/schema/bytes_type"
|
|
5
5
|
require "typed_data/schema/enum_type"
|
6
6
|
require "typed_data/schema/float_type"
|
7
7
|
require "typed_data/schema/int_type"
|
8
|
+
require "typed_data/schema/long_type"
|
8
9
|
require "typed_data/schema/map_type"
|
9
10
|
require "typed_data/schema/null_type"
|
10
11
|
require "typed_data/schema/record_type"
|
@@ -42,14 +43,16 @@ module TypedData
|
|
42
43
|
values = type["values"] || type[:values]
|
43
44
|
MapType.new(values.is_a?(Array) ? values : [values])
|
44
45
|
when "record"
|
45
|
-
RecordType.new(type["fields"] || type[:fields])
|
46
|
+
RecordType.new(type["name"] || type[:name], type["fields"] || type[:fields])
|
46
47
|
else
|
47
48
|
raise UnsupportedType, "Unknown type: #{subtype}"
|
48
49
|
end
|
49
50
|
when "boolean"
|
50
51
|
BooleanType.new(type, logical_type)
|
51
|
-
when "int"
|
52
|
+
when "int"
|
52
53
|
IntType.new(type, logical_type)
|
54
|
+
when "long"
|
55
|
+
LongType.new(type, logical_type)
|
53
56
|
when "float", "double"
|
54
57
|
FloatType.new(type, logical_type)
|
55
58
|
when "bytes"
|
@@ -72,7 +75,7 @@ module TypedData
|
|
72
75
|
if (schema["type"] || schema[:type]) != "record"
|
73
76
|
raise UnsupportedType, 'The root type must be "record"'
|
74
77
|
end
|
75
|
-
@root_type = RecordType.new(schema["fields"] || schema[:fields])
|
78
|
+
@root_type = RecordType.new(schema["name"] || schema[:name], schema["fields"] || schema[:fields])
|
76
79
|
end
|
77
80
|
end
|
78
81
|
end
|
@@ -3,6 +3,8 @@
|
|
3
3
|
module TypedData
|
4
4
|
class Schema
|
5
5
|
class IntType < Type
|
6
|
+
VALUE_RANGE = -2**31 .. 2**31 - 1
|
7
|
+
|
6
8
|
def to_s
|
7
9
|
if @logical_type
|
8
10
|
"#{@name}_#{@logical_type.gsub("-", "_")}"
|
@@ -11,18 +13,12 @@ module TypedData
|
|
11
13
|
end
|
12
14
|
end
|
13
15
|
|
14
|
-
def coerce(value
|
16
|
+
def coerce(value)
|
15
17
|
case @logical_type
|
16
18
|
when "date"
|
17
19
|
(Date.new(1970, 1, 1) + value).to_s
|
18
20
|
when "time-millis"
|
19
21
|
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%T.%3N")
|
20
|
-
when "time-micros"
|
21
|
-
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
|
22
|
-
when "timestamp-millis"
|
23
|
-
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
|
24
|
-
when "timestamp-micros"
|
25
|
-
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
|
26
22
|
else
|
27
23
|
value
|
28
24
|
end
|
@@ -33,7 +29,7 @@ module TypedData
|
|
33
29
|
end
|
34
30
|
|
35
31
|
def match?(value)
|
36
|
-
value.is_a?(Integer)
|
32
|
+
value.is_a?(Integer) && VALUE_RANGE.cover?(value)
|
37
33
|
end
|
38
34
|
end
|
39
35
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module TypedData
|
4
|
+
class Schema
|
5
|
+
class LongType < Type
|
6
|
+
def to_s
|
7
|
+
if @logical_type
|
8
|
+
"#{@name}_#{@logical_type.gsub("-", "_")}"
|
9
|
+
else
|
10
|
+
@name
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def coerce(value)
|
15
|
+
case @logical_type
|
16
|
+
when "time-micros"
|
17
|
+
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
|
18
|
+
when "timestamp-millis"
|
19
|
+
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
|
20
|
+
when "timestamp-micros"
|
21
|
+
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
|
22
|
+
else
|
23
|
+
value
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def primitive?
|
28
|
+
true
|
29
|
+
end
|
30
|
+
|
31
|
+
def match?(value)
|
32
|
+
value.is_a?(Integer)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -4,7 +4,8 @@ module TypedData
|
|
4
4
|
class Schema
|
5
5
|
class RecordType < Type
|
6
6
|
# @param fields [Array] an array of "fields" in an Avro schema
|
7
|
-
def initialize(fields)
|
7
|
+
def initialize(name, fields)
|
8
|
+
@name = name
|
8
9
|
@field_to_type = fields.each_with_object({}) do |field, h|
|
9
10
|
h[field["name"] || field[:name]] = Schema.build_type(field["type"] || field[:type])
|
10
11
|
end
|
@@ -22,7 +23,7 @@ module TypedData
|
|
22
23
|
end
|
23
24
|
|
24
25
|
def match?(value)
|
25
|
-
value.is_a?(Hash)
|
26
|
+
value.is_a?(Hash) && value.all? { |k, v| @field_to_type[k]&.match?(v) }
|
26
27
|
end
|
27
28
|
end
|
28
29
|
end
|
@@ -6,24 +6,14 @@ module TypedData
|
|
6
6
|
# @param types [Array<String>]
|
7
7
|
def initialize(types)
|
8
8
|
@types = types.map(&Schema.method(:build_type))
|
9
|
-
@
|
9
|
+
@nullable_single = @types.size == 2 && @types.any? { |t| t.is_a?(NullType) }
|
10
|
+
@nullable_primitive = @nullable_single && @types.any?(&:primitive?)
|
10
11
|
end
|
11
12
|
|
12
13
|
def to_s
|
13
14
|
@nullable_primitive ? @types.first.to_s : "union_#{@types.map(&:to_s).join("_")}"
|
14
15
|
end
|
15
16
|
|
16
|
-
def coerce(value, formatter:)
|
17
|
-
return value if @nullable_primitive
|
18
|
-
|
19
|
-
type = find_match(value)
|
20
|
-
if type.is_a?(NullType)
|
21
|
-
default_value(formatter)
|
22
|
-
else
|
23
|
-
default_value(formatter).merge!(formatter.call(type.to_s) => type.coerce(value, formatter: formatter).to_s)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
17
|
def primitive?
|
28
18
|
false
|
29
19
|
end
|
@@ -36,11 +26,8 @@ module TypedData
|
|
36
26
|
@types.any? { |t| t.match?(value) }
|
37
27
|
end
|
38
28
|
|
39
|
-
def
|
40
|
-
@
|
41
|
-
next if t.is_a?(NullType)
|
42
|
-
v[formatter.call(t.to_s)] = t.primitive? || t.is_a?(EnumType) ? nil : []
|
43
|
-
end
|
29
|
+
def nullable_single?
|
30
|
+
@nullable_single
|
44
31
|
end
|
45
32
|
end
|
46
33
|
end
|
data/lib/typed_data/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: typed_data
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- abicky
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-04-
|
11
|
+
date: 2020-04-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: avro
|
@@ -65,6 +65,7 @@ files:
|
|
65
65
|
- lib/typed_data/schema/enum_type.rb
|
66
66
|
- lib/typed_data/schema/float_type.rb
|
67
67
|
- lib/typed_data/schema/int_type.rb
|
68
|
+
- lib/typed_data/schema/long_type.rb
|
68
69
|
- lib/typed_data/schema/map_type.rb
|
69
70
|
- lib/typed_data/schema/null_type.rb
|
70
71
|
- lib/typed_data/schema/record_type.rb
|