typed_data 0.1.1 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +27 -21
- data/lib/typed_data/converter.rb +12 -10
- data/lib/typed_data/schema.rb +7 -6
- data/lib/typed_data/schema/array_type.rb +0 -4
- data/lib/typed_data/schema/bytes_type.rb +1 -1
- data/lib/typed_data/schema/errors.rb +9 -0
- data/lib/typed_data/schema/int_type.rb +5 -8
- data/lib/typed_data/schema/long_type.rb +38 -0
- data/lib/typed_data/schema/map_type.rb +0 -4
- data/lib/typed_data/schema/record_type.rb +7 -2
- data/lib/typed_data/schema/string_type.rb +2 -0
- data/lib/typed_data/schema/type.rb +7 -1
- data/lib/typed_data/schema/union_type.rb +7 -18
- data/lib/typed_data/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8328e9cdeaee3bbab6b796988ff1436fc913aa8be6078b99848fe96a4d109156
|
4
|
+
data.tar.gz: f3b471b2a4cbc86c8a8a3b07d038c373ca7272f5417f7dd4301cd25a187d4383
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15cf75777af9064e4804709b3c4aef5b3f29d867d29eefa4c93e20a5216b75c1e988809ecf985bf285178e2f1e36ff513316aeb69f179c695f196a511ce439e6
|
7
|
+
data.tar.gz: a00c6feb6091ed05d0e28c1afb26d386cab86aa3c110792423bd17f03c8637fb8fab500ea0e220c2c6dc8942f3b6fdc8a86f3668eb23c817d53c847de9495afe
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# TypedData
|
2
2
|
|
3
|
-
![](https://github.com/abicky/
|
3
|
+
![](https://github.com/abicky/typed_data/workflows/CI/badge.svg?branch=master)
|
4
4
|
|
5
5
|
TypedData is a library that converts hash objects managed by an Avro schema so that the objects can be loaded into BigQuery.
|
6
6
|
|
@@ -79,36 +79,42 @@ converter.convert({
|
|
79
79
|
},
|
80
80
|
})
|
81
81
|
#=> {"int_field"=>1,
|
82
|
-
# "int_or_string_field"=>{"
|
82
|
+
# "int_or_string_field"=>{"string_value"=>"string"},
|
83
83
|
# "array_field"=>[1, 2],
|
84
|
-
# "union_type_array_field"=>
|
85
|
-
# [{"int_value"=>"1", "string_value"=>nil},
|
86
|
-
# {"int_value"=>nil, "string_value"=>"2"}],
|
84
|
+
# "union_type_array_field"=>[{"int_value"=>"1"}, {"string_value"=>"2"}],
|
87
85
|
# "nested_map_field"=>
|
88
86
|
# [{"key"=>"nested_map",
|
89
87
|
# "value"=>
|
90
|
-
# [{"key"=>"key1", "value"=>{"int_value"=>"1"
|
91
|
-
# {"key"=>"key2", "value"=>{"
|
88
|
+
# [{"key"=>"key1", "value"=>{"int_value"=>"1"}},
|
89
|
+
# {"key"=>"key2", "value"=>{"string_value"=>"2"}}]}]}
|
92
90
|
```
|
93
91
|
|
94
92
|
You can specify a formatter for the union type keys. For example, the formatter for tables managed by [Google BigQuery Sink Connector](https://docs.confluent.io/current/connect/kafka-connect-bigquery/index.html) is like below:
|
95
93
|
|
96
94
|
```ruby
|
97
|
-
schema = {
|
98
|
-
"name" => "Record",
|
99
|
-
"type" => "record",
|
100
|
-
"fields" => [
|
101
|
-
{
|
102
|
-
"name" => "int_or_string_field",
|
103
|
-
"type" => ["int", "string"],
|
104
|
-
},
|
105
|
-
],
|
106
|
-
}
|
107
|
-
|
108
95
|
converter = TypedData::Converter.new(schema)
|
109
96
|
converter.union_type_key_formatter = ->(type) { type.split("_").first }
|
110
|
-
converter.convert({
|
111
|
-
|
97
|
+
converter.convert({
|
98
|
+
"int_field" => 1,
|
99
|
+
"int_or_string_field" => "string",
|
100
|
+
"array_field" => [1, 2],
|
101
|
+
"union_type_array_field" => [1, "2"],
|
102
|
+
"nested_map_field" => {
|
103
|
+
"nested_map" => {
|
104
|
+
"key1" => 1,
|
105
|
+
"key2" => "2",
|
106
|
+
},
|
107
|
+
},
|
108
|
+
})
|
109
|
+
#=> {"int_field"=>1,
|
110
|
+
# "int_or_string_field"=>{"string"=>"string"},
|
111
|
+
# "array_field"=>[1, 2],
|
112
|
+
# "union_type_array_field"=>[{"int"=>"1"}, {"string"=>"2"}],
|
113
|
+
# "nested_map_field"=>
|
114
|
+
# [{"key"=>"nested_map",
|
115
|
+
# "value"=>
|
116
|
+
# [{"key"=>"key1", "value"=>{"int"=>"1"}},
|
117
|
+
# {"key"=>"key2", "value"=>{"string"=>"2"}}]}]}
|
112
118
|
```
|
113
119
|
|
114
120
|
|
@@ -120,7 +126,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
120
126
|
|
121
127
|
## Contributing
|
122
128
|
|
123
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
129
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/abicky/typed_data.
|
124
130
|
|
125
131
|
|
126
132
|
## License
|
data/lib/typed_data/converter.rb
CHANGED
@@ -31,9 +31,9 @@ module TypedData
|
|
31
31
|
when Schema::RecordType
|
32
32
|
converted[key] = convert_record(subtype, value)
|
33
33
|
when Schema::UnionType
|
34
|
-
converted[key] = convert_union(subtype, value
|
34
|
+
converted[key] = convert_union(subtype, value)
|
35
35
|
else
|
36
|
-
converted[key] = subtype.coerce(value
|
36
|
+
converted[key] = subtype.coerce(value)
|
37
37
|
end
|
38
38
|
end
|
39
39
|
end
|
@@ -55,7 +55,7 @@ module TypedData
|
|
55
55
|
when Schema::UnionType
|
56
56
|
ret << convert_union(subtype, value)
|
57
57
|
else
|
58
|
-
ret <<
|
58
|
+
ret << subtype.coerce(value)
|
59
59
|
end
|
60
60
|
end
|
61
61
|
end
|
@@ -75,16 +75,15 @@ module TypedData
|
|
75
75
|
when Schema::UnionType
|
76
76
|
value = convert_union(subtype, value)
|
77
77
|
else
|
78
|
-
value =
|
78
|
+
value = subtype.coerce(value)
|
79
79
|
end
|
80
80
|
ret << { "key" => key, "value" => value }
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
84
84
|
# @param type [UnionType]
|
85
|
-
# @param as_record_field [Boolean]
|
86
85
|
# @param map [Object]
|
87
|
-
def convert_union(type, value
|
86
|
+
def convert_union(type, value)
|
88
87
|
subtype = type.find_match(value)
|
89
88
|
case subtype
|
90
89
|
when Schema::ArrayType
|
@@ -95,15 +94,18 @@ module TypedData
|
|
95
94
|
converted_value = convert_record(subtype, value)
|
96
95
|
when Schema::UnionType
|
97
96
|
converted_value = convert_union(subtype, value)
|
97
|
+
when Schema::NullType
|
98
|
+
converted_value = nil
|
98
99
|
else
|
99
|
-
|
100
|
+
converted_value = subtype.coerce(value)
|
100
101
|
end
|
101
102
|
|
102
|
-
if
|
103
|
+
if type.nullable_single?
|
103
104
|
converted_value
|
105
|
+
elsif subtype.is_a?(Schema::NullType)
|
106
|
+
{}
|
104
107
|
else
|
105
|
-
|
106
|
-
.merge!(union_type_key_formatter.call(subtype.to_s) => converted_value)
|
108
|
+
{ union_type_key_formatter.call(subtype.to_s) => converted_value }
|
107
109
|
end
|
108
110
|
end
|
109
111
|
end
|
data/lib/typed_data/schema.rb
CHANGED
@@ -5,17 +5,16 @@ require "typed_data/schema/bytes_type"
|
|
5
5
|
require "typed_data/schema/enum_type"
|
6
6
|
require "typed_data/schema/float_type"
|
7
7
|
require "typed_data/schema/int_type"
|
8
|
+
require "typed_data/schema/long_type"
|
8
9
|
require "typed_data/schema/map_type"
|
9
10
|
require "typed_data/schema/null_type"
|
10
11
|
require "typed_data/schema/record_type"
|
11
12
|
require "typed_data/schema/string_type"
|
12
13
|
require "typed_data/schema/union_type"
|
14
|
+
require "typed_data/schema/errors"
|
13
15
|
|
14
16
|
module TypedData
|
15
17
|
class Schema
|
16
|
-
class UnknownField < StandardError; end
|
17
|
-
class UnsupportedType < StandardError; end
|
18
|
-
|
19
18
|
class << self
|
20
19
|
def build_type(type, logical_type = nil)
|
21
20
|
type = type.first if type.is_a?(Array) && type.size == 1
|
@@ -42,14 +41,16 @@ module TypedData
|
|
42
41
|
values = type["values"] || type[:values]
|
43
42
|
MapType.new(values.is_a?(Array) ? values : [values])
|
44
43
|
when "record"
|
45
|
-
RecordType.new(type["fields"] || type[:fields])
|
44
|
+
RecordType.new(type["name"] || type[:name], type["fields"] || type[:fields])
|
46
45
|
else
|
47
46
|
raise UnsupportedType, "Unknown type: #{subtype}"
|
48
47
|
end
|
49
48
|
when "boolean"
|
50
49
|
BooleanType.new(type, logical_type)
|
51
|
-
when "int"
|
50
|
+
when "int"
|
52
51
|
IntType.new(type, logical_type)
|
52
|
+
when "long"
|
53
|
+
LongType.new(type, logical_type)
|
53
54
|
when "float", "double"
|
54
55
|
FloatType.new(type, logical_type)
|
55
56
|
when "bytes"
|
@@ -72,7 +73,7 @@ module TypedData
|
|
72
73
|
if (schema["type"] || schema[:type]) != "record"
|
73
74
|
raise UnsupportedType, 'The root type must be "record"'
|
74
75
|
end
|
75
|
-
@root_type = RecordType.new(schema["fields"] || schema[:fields])
|
76
|
+
@root_type = RecordType.new(schema["name"] || schema[:name], schema["fields"] || schema[:fields])
|
76
77
|
end
|
77
78
|
end
|
78
79
|
end
|
@@ -3,6 +3,9 @@
|
|
3
3
|
module TypedData
|
4
4
|
class Schema
|
5
5
|
class IntType < Type
|
6
|
+
VALUE_RANGE = -2**31 .. 2**31 - 1
|
7
|
+
SUPPORTED_LOGICAL_TYPES = %w[date time-millis]
|
8
|
+
|
6
9
|
def to_s
|
7
10
|
if @logical_type
|
8
11
|
"#{@name}_#{@logical_type.gsub("-", "_")}"
|
@@ -11,18 +14,12 @@ module TypedData
|
|
11
14
|
end
|
12
15
|
end
|
13
16
|
|
14
|
-
def coerce(value
|
17
|
+
def coerce(value)
|
15
18
|
case @logical_type
|
16
19
|
when "date"
|
17
20
|
(Date.new(1970, 1, 1) + value).to_s
|
18
21
|
when "time-millis"
|
19
22
|
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%T.%3N")
|
20
|
-
when "time-micros"
|
21
|
-
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
|
22
|
-
when "timestamp-millis"
|
23
|
-
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
|
24
|
-
when "timestamp-micros"
|
25
|
-
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
|
26
23
|
else
|
27
24
|
value
|
28
25
|
end
|
@@ -33,7 +30,7 @@ module TypedData
|
|
33
30
|
end
|
34
31
|
|
35
32
|
def match?(value)
|
36
|
-
value.is_a?(Integer)
|
33
|
+
value.is_a?(Integer) && VALUE_RANGE.cover?(value)
|
37
34
|
end
|
38
35
|
end
|
39
36
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module TypedData
|
4
|
+
class Schema
|
5
|
+
class LongType < Type
|
6
|
+
SUPPORTED_LOGICAL_TYPES = %w[time-micros timestamp-millis timestamp-micros]
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
if @logical_type
|
10
|
+
"#{@name}_#{@logical_type.gsub("-", "_")}"
|
11
|
+
else
|
12
|
+
@name
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def coerce(value)
|
17
|
+
case @logical_type
|
18
|
+
when "time-micros"
|
19
|
+
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
|
20
|
+
when "timestamp-millis"
|
21
|
+
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
|
22
|
+
when "timestamp-micros"
|
23
|
+
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
|
24
|
+
else
|
25
|
+
value
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def primitive?
|
30
|
+
true
|
31
|
+
end
|
32
|
+
|
33
|
+
def match?(value)
|
34
|
+
value.is_a?(Integer)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -4,7 +4,8 @@ module TypedData
|
|
4
4
|
class Schema
|
5
5
|
class RecordType < Type
|
6
6
|
# @param fields [Array] an array of "fields" in an Avro schema
|
7
|
-
def initialize(fields)
|
7
|
+
def initialize(name, fields)
|
8
|
+
@name = name
|
8
9
|
@field_to_type = fields.each_with_object({}) do |field, h|
|
9
10
|
h[field["name"] || field[:name]] = Schema.build_type(field["type"] || field[:type])
|
10
11
|
end
|
@@ -21,8 +22,12 @@ module TypedData
|
|
21
22
|
end
|
22
23
|
end
|
23
24
|
|
25
|
+
def find_match(value)
|
26
|
+
raise InvalidValue, %Q{the value #{value.inspect} doesn't match the type #{self}}
|
27
|
+
end
|
28
|
+
|
24
29
|
def match?(value)
|
25
|
-
value.is_a?(Hash)
|
30
|
+
value.is_a?(Hash) && value.all? { |k, v| @field_to_type[k]&.match?(v) }
|
26
31
|
end
|
27
32
|
end
|
28
33
|
end
|
@@ -1,10 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require "typed_data/schema/errors"
|
2
3
|
|
3
4
|
module TypedData
|
4
5
|
class Schema
|
5
6
|
class Type
|
7
|
+
SUPPORTED_LOGICAL_TYPES = []
|
8
|
+
|
6
9
|
def initialize(name, logical_type = nil)
|
7
10
|
@name = name
|
11
|
+
if logical_type && !self.class::SUPPORTED_LOGICAL_TYPES.include?(logical_type)
|
12
|
+
raise UnsupportedType, %Q{#{name} doesn't support the logical type "#{logical_type}"}
|
13
|
+
end
|
8
14
|
@logical_type = logical_type
|
9
15
|
end
|
10
16
|
|
@@ -12,7 +18,7 @@ module TypedData
|
|
12
18
|
@name
|
13
19
|
end
|
14
20
|
|
15
|
-
def coerce(value
|
21
|
+
def coerce(value)
|
16
22
|
value
|
17
23
|
end
|
18
24
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require "typed_data/schema/errors"
|
2
3
|
|
3
4
|
module TypedData
|
4
5
|
class Schema
|
@@ -6,41 +7,29 @@ module TypedData
|
|
6
7
|
# @param types [Array<String>]
|
7
8
|
def initialize(types)
|
8
9
|
@types = types.map(&Schema.method(:build_type))
|
9
|
-
@
|
10
|
+
@nullable_single = @types.size == 2 && @types.any? { |t| t.is_a?(NullType) }
|
11
|
+
@nullable_primitive = @nullable_single && @types.any?(&:primitive?)
|
10
12
|
end
|
11
13
|
|
12
14
|
def to_s
|
13
15
|
@nullable_primitive ? @types.first.to_s : "union_#{@types.map(&:to_s).join("_")}"
|
14
16
|
end
|
15
17
|
|
16
|
-
def coerce(value, formatter:)
|
17
|
-
return value if @nullable_primitive
|
18
|
-
|
19
|
-
type = find_match(value)
|
20
|
-
if type.is_a?(NullType)
|
21
|
-
default_value(formatter)
|
22
|
-
else
|
23
|
-
default_value(formatter).merge!(formatter.call(type.to_s) => type.coerce(value, formatter: formatter).to_s)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
18
|
def primitive?
|
28
19
|
false
|
29
20
|
end
|
30
21
|
|
31
22
|
def find_match(value)
|
32
|
-
@types.find { |t| t.match?(value) }
|
23
|
+
@types.find { |t| t.match?(value) } or
|
24
|
+
raise InvalidValue, %Q{the value #{value.inspect} doesn't match the type #{@types.map(&:to_s)}}
|
33
25
|
end
|
34
26
|
|
35
27
|
def match?(value)
|
36
28
|
@types.any? { |t| t.match?(value) }
|
37
29
|
end
|
38
30
|
|
39
|
-
def
|
40
|
-
@
|
41
|
-
next if t.is_a?(NullType)
|
42
|
-
v[formatter.call(t.to_s)] = t.primitive? || t.is_a?(EnumType) ? nil : []
|
43
|
-
end
|
31
|
+
def nullable_single?
|
32
|
+
@nullable_single
|
44
33
|
end
|
45
34
|
end
|
46
35
|
end
|
data/lib/typed_data/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: typed_data
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- abicky
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: avro
|
@@ -63,8 +63,10 @@ files:
|
|
63
63
|
- lib/typed_data/schema/boolean_type.rb
|
64
64
|
- lib/typed_data/schema/bytes_type.rb
|
65
65
|
- lib/typed_data/schema/enum_type.rb
|
66
|
+
- lib/typed_data/schema/errors.rb
|
66
67
|
- lib/typed_data/schema/float_type.rb
|
67
68
|
- lib/typed_data/schema/int_type.rb
|
69
|
+
- lib/typed_data/schema/long_type.rb
|
68
70
|
- lib/typed_data/schema/map_type.rb
|
69
71
|
- lib/typed_data/schema/null_type.rb
|
70
72
|
- lib/typed_data/schema/record_type.rb
|
@@ -94,7 +96,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
94
96
|
- !ruby/object:Gem::Version
|
95
97
|
version: '0'
|
96
98
|
requirements: []
|
97
|
-
rubygems_version: 3.1.
|
99
|
+
rubygems_version: 3.1.4
|
98
100
|
signing_key:
|
99
101
|
specification_version: 4
|
100
102
|
summary: A library that converts hash objects managed by an Avro schema
|