typed_data 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +25 -19
- data/lib/typed_data/converter.rb +12 -10
- data/lib/typed_data/schema.rb +6 -3
- data/lib/typed_data/schema/array_type.rb +0 -4
- data/lib/typed_data/schema/bytes_type.rb +1 -1
- data/lib/typed_data/schema/int_type.rb +4 -8
- data/lib/typed_data/schema/long_type.rb +36 -0
- data/lib/typed_data/schema/map_type.rb +0 -4
- data/lib/typed_data/schema/record_type.rb +3 -2
- data/lib/typed_data/schema/type.rb +1 -1
- data/lib/typed_data/schema/union_type.rb +4 -17
- data/lib/typed_data/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 483f78cdcfb329a0d14f4173fa586f64eae12c8f61dbd78492f1cd85cceadbea
|
4
|
+
data.tar.gz: 45173313046d5a6dc54d920ed5bee399e64fd8af2a2e710e2b5051b922fcc440
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4041f615dedb5782ce40d15ae80eca87f3e168b988c2d2de4d9d07d0ba05b00bc2a6e8b14b981d65575ab9f2e38b0db50653286c3093ffaa8e7a586eb7a5e5d9
|
7
|
+
data.tar.gz: 04f2f8e8a86d63e8f5e06066b96e983332107f116694e059e0b06cd14ebb66827cd8bddc2e85c631298518d8387d28452edd856161b412c105f9a9d05d4151fa
|
data/README.md
CHANGED
@@ -79,36 +79,42 @@ converter.convert({
|
|
79
79
|
},
|
80
80
|
})
|
81
81
|
#=> {"int_field"=>1,
|
82
|
-
# "int_or_string_field"=>{"
|
82
|
+
# "int_or_string_field"=>{"string_value"=>"string"},
|
83
83
|
# "array_field"=>[1, 2],
|
84
|
-
# "union_type_array_field"=>
|
85
|
-
# [{"int_value"=>"1", "string_value"=>nil},
|
86
|
-
# {"int_value"=>nil, "string_value"=>"2"}],
|
84
|
+
# "union_type_array_field"=>[{"int_value"=>"1"}, {"string_value"=>"2"}],
|
87
85
|
# "nested_map_field"=>
|
88
86
|
# [{"key"=>"nested_map",
|
89
87
|
# "value"=>
|
90
|
-
# [{"key"=>"key1", "value"=>{"int_value"=>"1"
|
91
|
-
# {"key"=>"key2", "value"=>{"
|
88
|
+
# [{"key"=>"key1", "value"=>{"int_value"=>"1"}},
|
89
|
+
# {"key"=>"key2", "value"=>{"string_value"=>"2"}}]}]}
|
92
90
|
```
|
93
91
|
|
94
92
|
You can specify a formatter for the union type keys. For example, the formatter for tables managed by [Google BigQuery Sink Connector](https://docs.confluent.io/current/connect/kafka-connect-bigquery/index.html) is like below:
|
95
93
|
|
96
94
|
```ruby
|
97
|
-
schema = {
|
98
|
-
"name" => "Record",
|
99
|
-
"type" => "record",
|
100
|
-
"fields" => [
|
101
|
-
{
|
102
|
-
"name" => "int_or_string_field",
|
103
|
-
"type" => ["int", "string"],
|
104
|
-
},
|
105
|
-
],
|
106
|
-
}
|
107
|
-
|
108
95
|
converter = TypedData::Converter.new(schema)
|
109
96
|
converter.union_type_key_formatter = ->(type) { type.split("_").first }
|
110
|
-
converter.convert({
|
111
|
-
|
97
|
+
converter.convert({
|
98
|
+
"int_field" => 1,
|
99
|
+
"int_or_string_field" => "string",
|
100
|
+
"array_field" => [1, 2],
|
101
|
+
"union_type_array_field" => [1, "2"],
|
102
|
+
"nested_map_field" => {
|
103
|
+
"nested_map" => {
|
104
|
+
"key1" => 1,
|
105
|
+
"key2" => "2",
|
106
|
+
},
|
107
|
+
},
|
108
|
+
})
|
109
|
+
#=> {"int_field"=>1,
|
110
|
+
# "int_or_string_field"=>{"string"=>"string"},
|
111
|
+
# "array_field"=>[1, 2],
|
112
|
+
# "union_type_array_field"=>[{"int"=>"1"}, {"string"=>"2"}],
|
113
|
+
# "nested_map_field"=>
|
114
|
+
# [{"key"=>"nested_map",
|
115
|
+
# "value"=>
|
116
|
+
# [{"key"=>"key1", "value"=>{"int"=>"1"}},
|
117
|
+
# {"key"=>"key2", "value"=>{"string"=>"2"}}]}]}
|
112
118
|
```
|
113
119
|
|
114
120
|
|
data/lib/typed_data/converter.rb
CHANGED
@@ -31,9 +31,9 @@ module TypedData
|
|
31
31
|
when Schema::RecordType
|
32
32
|
converted[key] = convert_record(subtype, value)
|
33
33
|
when Schema::UnionType
|
34
|
-
converted[key] = convert_union(subtype, value
|
34
|
+
converted[key] = convert_union(subtype, value)
|
35
35
|
else
|
36
|
-
converted[key] = subtype.coerce(value
|
36
|
+
converted[key] = subtype.coerce(value)
|
37
37
|
end
|
38
38
|
end
|
39
39
|
end
|
@@ -55,7 +55,7 @@ module TypedData
|
|
55
55
|
when Schema::UnionType
|
56
56
|
ret << convert_union(subtype, value)
|
57
57
|
else
|
58
|
-
ret <<
|
58
|
+
ret << subtype.coerce(value)
|
59
59
|
end
|
60
60
|
end
|
61
61
|
end
|
@@ -75,16 +75,15 @@ module TypedData
|
|
75
75
|
when Schema::UnionType
|
76
76
|
value = convert_union(subtype, value)
|
77
77
|
else
|
78
|
-
value =
|
78
|
+
value = subtype.coerce(value)
|
79
79
|
end
|
80
80
|
ret << { "key" => key, "value" => value }
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
84
84
|
# @param type [UnionType]
|
85
|
-
# @param as_record_field [Boolean]
|
86
85
|
# @param map [Object]
|
87
|
-
def convert_union(type, value
|
86
|
+
def convert_union(type, value)
|
88
87
|
subtype = type.find_match(value)
|
89
88
|
case subtype
|
90
89
|
when Schema::ArrayType
|
@@ -95,15 +94,18 @@ module TypedData
|
|
95
94
|
converted_value = convert_record(subtype, value)
|
96
95
|
when Schema::UnionType
|
97
96
|
converted_value = convert_union(subtype, value)
|
97
|
+
when Schema::NullType
|
98
|
+
converted_value = nil
|
98
99
|
else
|
99
|
-
|
100
|
+
converted_value = subtype.coerce(value).to_s
|
100
101
|
end
|
101
102
|
|
102
|
-
if
|
103
|
+
if type.nullable_single?
|
103
104
|
converted_value
|
105
|
+
elsif subtype.is_a?(Schema::NullType)
|
106
|
+
{}
|
104
107
|
else
|
105
|
-
|
106
|
-
.merge!(union_type_key_formatter.call(subtype.to_s) => converted_value)
|
108
|
+
{ union_type_key_formatter.call(subtype.to_s) => converted_value }
|
107
109
|
end
|
108
110
|
end
|
109
111
|
end
|
data/lib/typed_data/schema.rb
CHANGED
@@ -5,6 +5,7 @@ require "typed_data/schema/bytes_type"
|
|
5
5
|
require "typed_data/schema/enum_type"
|
6
6
|
require "typed_data/schema/float_type"
|
7
7
|
require "typed_data/schema/int_type"
|
8
|
+
require "typed_data/schema/long_type"
|
8
9
|
require "typed_data/schema/map_type"
|
9
10
|
require "typed_data/schema/null_type"
|
10
11
|
require "typed_data/schema/record_type"
|
@@ -42,14 +43,16 @@ module TypedData
|
|
42
43
|
values = type["values"] || type[:values]
|
43
44
|
MapType.new(values.is_a?(Array) ? values : [values])
|
44
45
|
when "record"
|
45
|
-
RecordType.new(type["fields"] || type[:fields])
|
46
|
+
RecordType.new(type["name"] || type[:name], type["fields"] || type[:fields])
|
46
47
|
else
|
47
48
|
raise UnsupportedType, "Unknown type: #{subtype}"
|
48
49
|
end
|
49
50
|
when "boolean"
|
50
51
|
BooleanType.new(type, logical_type)
|
51
|
-
when "int"
|
52
|
+
when "int"
|
52
53
|
IntType.new(type, logical_type)
|
54
|
+
when "long"
|
55
|
+
LongType.new(type, logical_type)
|
53
56
|
when "float", "double"
|
54
57
|
FloatType.new(type, logical_type)
|
55
58
|
when "bytes"
|
@@ -72,7 +75,7 @@ module TypedData
|
|
72
75
|
if (schema["type"] || schema[:type]) != "record"
|
73
76
|
raise UnsupportedType, 'The root type must be "record"'
|
74
77
|
end
|
75
|
-
@root_type = RecordType.new(schema["fields"] || schema[:fields])
|
78
|
+
@root_type = RecordType.new(schema["name"] || schema[:name], schema["fields"] || schema[:fields])
|
76
79
|
end
|
77
80
|
end
|
78
81
|
end
|
@@ -3,6 +3,8 @@
|
|
3
3
|
module TypedData
|
4
4
|
class Schema
|
5
5
|
class IntType < Type
|
6
|
+
VALUE_RANGE = -2**31 .. 2**31 - 1
|
7
|
+
|
6
8
|
def to_s
|
7
9
|
if @logical_type
|
8
10
|
"#{@name}_#{@logical_type.gsub("-", "_")}"
|
@@ -11,18 +13,12 @@ module TypedData
|
|
11
13
|
end
|
12
14
|
end
|
13
15
|
|
14
|
-
def coerce(value
|
16
|
+
def coerce(value)
|
15
17
|
case @logical_type
|
16
18
|
when "date"
|
17
19
|
(Date.new(1970, 1, 1) + value).to_s
|
18
20
|
when "time-millis"
|
19
21
|
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%T.%3N")
|
20
|
-
when "time-micros"
|
21
|
-
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
|
22
|
-
when "timestamp-millis"
|
23
|
-
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
|
24
|
-
when "timestamp-micros"
|
25
|
-
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
|
26
22
|
else
|
27
23
|
value
|
28
24
|
end
|
@@ -33,7 +29,7 @@ module TypedData
|
|
33
29
|
end
|
34
30
|
|
35
31
|
def match?(value)
|
36
|
-
value.is_a?(Integer)
|
32
|
+
value.is_a?(Integer) && VALUE_RANGE.cover?(value)
|
37
33
|
end
|
38
34
|
end
|
39
35
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module TypedData
|
4
|
+
class Schema
|
5
|
+
class LongType < Type
|
6
|
+
def to_s
|
7
|
+
if @logical_type
|
8
|
+
"#{@name}_#{@logical_type.gsub("-", "_")}"
|
9
|
+
else
|
10
|
+
@name
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def coerce(value)
|
15
|
+
case @logical_type
|
16
|
+
when "time-micros"
|
17
|
+
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
|
18
|
+
when "timestamp-millis"
|
19
|
+
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
|
20
|
+
when "timestamp-micros"
|
21
|
+
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
|
22
|
+
else
|
23
|
+
value
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def primitive?
|
28
|
+
true
|
29
|
+
end
|
30
|
+
|
31
|
+
def match?(value)
|
32
|
+
value.is_a?(Integer)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -4,7 +4,8 @@ module TypedData
|
|
4
4
|
class Schema
|
5
5
|
class RecordType < Type
|
6
6
|
# @param fields [Array] an array of "fields" in an Avro schema
|
7
|
-
def initialize(fields)
|
7
|
+
def initialize(name, fields)
|
8
|
+
@name = name
|
8
9
|
@field_to_type = fields.each_with_object({}) do |field, h|
|
9
10
|
h[field["name"] || field[:name]] = Schema.build_type(field["type"] || field[:type])
|
10
11
|
end
|
@@ -22,7 +23,7 @@ module TypedData
|
|
22
23
|
end
|
23
24
|
|
24
25
|
def match?(value)
|
25
|
-
value.is_a?(Hash)
|
26
|
+
value.is_a?(Hash) && value.all? { |k, v| @field_to_type[k]&.match?(v) }
|
26
27
|
end
|
27
28
|
end
|
28
29
|
end
|
@@ -6,24 +6,14 @@ module TypedData
|
|
6
6
|
# @param types [Array<String>]
|
7
7
|
def initialize(types)
|
8
8
|
@types = types.map(&Schema.method(:build_type))
|
9
|
-
@
|
9
|
+
@nullable_single = @types.size == 2 && @types.any? { |t| t.is_a?(NullType) }
|
10
|
+
@nullable_primitive = @nullable_single && @types.any?(&:primitive?)
|
10
11
|
end
|
11
12
|
|
12
13
|
def to_s
|
13
14
|
@nullable_primitive ? @types.first.to_s : "union_#{@types.map(&:to_s).join("_")}"
|
14
15
|
end
|
15
16
|
|
16
|
-
def coerce(value, formatter:)
|
17
|
-
return value if @nullable_primitive
|
18
|
-
|
19
|
-
type = find_match(value)
|
20
|
-
if type.is_a?(NullType)
|
21
|
-
default_value(formatter)
|
22
|
-
else
|
23
|
-
default_value(formatter).merge!(formatter.call(type.to_s) => type.coerce(value, formatter: formatter).to_s)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
17
|
def primitive?
|
28
18
|
false
|
29
19
|
end
|
@@ -36,11 +26,8 @@ module TypedData
|
|
36
26
|
@types.any? { |t| t.match?(value) }
|
37
27
|
end
|
38
28
|
|
39
|
-
def
|
40
|
-
@
|
41
|
-
next if t.is_a?(NullType)
|
42
|
-
v[formatter.call(t.to_s)] = t.primitive? || t.is_a?(EnumType) ? nil : []
|
43
|
-
end
|
29
|
+
def nullable_single?
|
30
|
+
@nullable_single
|
44
31
|
end
|
45
32
|
end
|
46
33
|
end
|
data/lib/typed_data/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: typed_data
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- abicky
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-04-
|
11
|
+
date: 2020-04-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: avro
|
@@ -65,6 +65,7 @@ files:
|
|
65
65
|
- lib/typed_data/schema/enum_type.rb
|
66
66
|
- lib/typed_data/schema/float_type.rb
|
67
67
|
- lib/typed_data/schema/int_type.rb
|
68
|
+
- lib/typed_data/schema/long_type.rb
|
68
69
|
- lib/typed_data/schema/map_type.rb
|
69
70
|
- lib/typed_data/schema/null_type.rb
|
70
71
|
- lib/typed_data/schema/record_type.rb
|