typed_data 0.1.1 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +27 -21
- data/lib/typed_data/converter.rb +12 -10
- data/lib/typed_data/schema.rb +7 -6
- data/lib/typed_data/schema/array_type.rb +0 -4
- data/lib/typed_data/schema/bytes_type.rb +1 -1
- data/lib/typed_data/schema/errors.rb +9 -0
- data/lib/typed_data/schema/int_type.rb +5 -8
- data/lib/typed_data/schema/long_type.rb +38 -0
- data/lib/typed_data/schema/map_type.rb +0 -4
- data/lib/typed_data/schema/record_type.rb +7 -2
- data/lib/typed_data/schema/string_type.rb +2 -0
- data/lib/typed_data/schema/type.rb +7 -1
- data/lib/typed_data/schema/union_type.rb +7 -18
- data/lib/typed_data/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8328e9cdeaee3bbab6b796988ff1436fc913aa8be6078b99848fe96a4d109156
|
4
|
+
data.tar.gz: f3b471b2a4cbc86c8a8a3b07d038c373ca7272f5417f7dd4301cd25a187d4383
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15cf75777af9064e4804709b3c4aef5b3f29d867d29eefa4c93e20a5216b75c1e988809ecf985bf285178e2f1e36ff513316aeb69f179c695f196a511ce439e6
|
7
|
+
data.tar.gz: a00c6feb6091ed05d0e28c1afb26d386cab86aa3c110792423bd17f03c8637fb8fab500ea0e220c2c6dc8942f3b6fdc8a86f3668eb23c817d53c847de9495afe
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# TypedData
|
2
2
|
|
3
|
-

|
4
4
|
|
5
5
|
TypedData is a library that converts hash objects managed by an Avro schema so that the objects can be loaded into BigQuery.
|
6
6
|
|
@@ -79,36 +79,42 @@ converter.convert({
|
|
79
79
|
},
|
80
80
|
})
|
81
81
|
#=> {"int_field"=>1,
|
82
|
-
# "int_or_string_field"=>{"
|
82
|
+
# "int_or_string_field"=>{"string_value"=>"string"},
|
83
83
|
# "array_field"=>[1, 2],
|
84
|
-
# "union_type_array_field"=>
|
85
|
-
# [{"int_value"=>"1", "string_value"=>nil},
|
86
|
-
# {"int_value"=>nil, "string_value"=>"2"}],
|
84
|
+
# "union_type_array_field"=>[{"int_value"=>"1"}, {"string_value"=>"2"}],
|
87
85
|
# "nested_map_field"=>
|
88
86
|
# [{"key"=>"nested_map",
|
89
87
|
# "value"=>
|
90
|
-
# [{"key"=>"key1", "value"=>{"int_value"=>"1"
|
91
|
-
# {"key"=>"key2", "value"=>{"
|
88
|
+
# [{"key"=>"key1", "value"=>{"int_value"=>"1"}},
|
89
|
+
# {"key"=>"key2", "value"=>{"string_value"=>"2"}}]}]}
|
92
90
|
```
|
93
91
|
|
94
92
|
You can specify a formatter for the union type keys. For example, the formatter for tables managed by [Google BigQuery Sink Connector](https://docs.confluent.io/current/connect/kafka-connect-bigquery/index.html) is like below:
|
95
93
|
|
96
94
|
```ruby
|
97
|
-
schema = {
|
98
|
-
"name" => "Record",
|
99
|
-
"type" => "record",
|
100
|
-
"fields" => [
|
101
|
-
{
|
102
|
-
"name" => "int_or_string_field",
|
103
|
-
"type" => ["int", "string"],
|
104
|
-
},
|
105
|
-
],
|
106
|
-
}
|
107
|
-
|
108
95
|
converter = TypedData::Converter.new(schema)
|
109
96
|
converter.union_type_key_formatter = ->(type) { type.split("_").first }
|
110
|
-
converter.convert({
|
111
|
-
|
97
|
+
converter.convert({
|
98
|
+
"int_field" => 1,
|
99
|
+
"int_or_string_field" => "string",
|
100
|
+
"array_field" => [1, 2],
|
101
|
+
"union_type_array_field" => [1, "2"],
|
102
|
+
"nested_map_field" => {
|
103
|
+
"nested_map" => {
|
104
|
+
"key1" => 1,
|
105
|
+
"key2" => "2",
|
106
|
+
},
|
107
|
+
},
|
108
|
+
})
|
109
|
+
#=> {"int_field"=>1,
|
110
|
+
# "int_or_string_field"=>{"string"=>"string"},
|
111
|
+
# "array_field"=>[1, 2],
|
112
|
+
# "union_type_array_field"=>[{"int"=>"1"}, {"string"=>"2"}],
|
113
|
+
# "nested_map_field"=>
|
114
|
+
# [{"key"=>"nested_map",
|
115
|
+
# "value"=>
|
116
|
+
# [{"key"=>"key1", "value"=>{"int"=>"1"}},
|
117
|
+
# {"key"=>"key2", "value"=>{"string"=>"2"}}]}]}
|
112
118
|
```
|
113
119
|
|
114
120
|
|
@@ -120,7 +126,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
120
126
|
|
121
127
|
## Contributing
|
122
128
|
|
123
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
129
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/abicky/typed_data.
|
124
130
|
|
125
131
|
|
126
132
|
## License
|
data/lib/typed_data/converter.rb
CHANGED
@@ -31,9 +31,9 @@ module TypedData
|
|
31
31
|
when Schema::RecordType
|
32
32
|
converted[key] = convert_record(subtype, value)
|
33
33
|
when Schema::UnionType
|
34
|
-
converted[key] = convert_union(subtype, value
|
34
|
+
converted[key] = convert_union(subtype, value)
|
35
35
|
else
|
36
|
-
converted[key] = subtype.coerce(value
|
36
|
+
converted[key] = subtype.coerce(value)
|
37
37
|
end
|
38
38
|
end
|
39
39
|
end
|
@@ -55,7 +55,7 @@ module TypedData
|
|
55
55
|
when Schema::UnionType
|
56
56
|
ret << convert_union(subtype, value)
|
57
57
|
else
|
58
|
-
ret <<
|
58
|
+
ret << subtype.coerce(value)
|
59
59
|
end
|
60
60
|
end
|
61
61
|
end
|
@@ -75,16 +75,15 @@ module TypedData
|
|
75
75
|
when Schema::UnionType
|
76
76
|
value = convert_union(subtype, value)
|
77
77
|
else
|
78
|
-
value =
|
78
|
+
value = subtype.coerce(value)
|
79
79
|
end
|
80
80
|
ret << { "key" => key, "value" => value }
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
84
84
|
# @param type [UnionType]
|
85
|
-
# @param as_record_field [Boolean]
|
86
85
|
# @param map [Object]
|
87
|
-
def convert_union(type, value
|
86
|
+
def convert_union(type, value)
|
88
87
|
subtype = type.find_match(value)
|
89
88
|
case subtype
|
90
89
|
when Schema::ArrayType
|
@@ -95,15 +94,18 @@ module TypedData
|
|
95
94
|
converted_value = convert_record(subtype, value)
|
96
95
|
when Schema::UnionType
|
97
96
|
converted_value = convert_union(subtype, value)
|
97
|
+
when Schema::NullType
|
98
|
+
converted_value = nil
|
98
99
|
else
|
99
|
-
|
100
|
+
converted_value = subtype.coerce(value)
|
100
101
|
end
|
101
102
|
|
102
|
-
if
|
103
|
+
if type.nullable_single?
|
103
104
|
converted_value
|
105
|
+
elsif subtype.is_a?(Schema::NullType)
|
106
|
+
{}
|
104
107
|
else
|
105
|
-
|
106
|
-
.merge!(union_type_key_formatter.call(subtype.to_s) => converted_value)
|
108
|
+
{ union_type_key_formatter.call(subtype.to_s) => converted_value }
|
107
109
|
end
|
108
110
|
end
|
109
111
|
end
|
data/lib/typed_data/schema.rb
CHANGED
@@ -5,17 +5,16 @@ require "typed_data/schema/bytes_type"
|
|
5
5
|
require "typed_data/schema/enum_type"
|
6
6
|
require "typed_data/schema/float_type"
|
7
7
|
require "typed_data/schema/int_type"
|
8
|
+
require "typed_data/schema/long_type"
|
8
9
|
require "typed_data/schema/map_type"
|
9
10
|
require "typed_data/schema/null_type"
|
10
11
|
require "typed_data/schema/record_type"
|
11
12
|
require "typed_data/schema/string_type"
|
12
13
|
require "typed_data/schema/union_type"
|
14
|
+
require "typed_data/schema/errors"
|
13
15
|
|
14
16
|
module TypedData
|
15
17
|
class Schema
|
16
|
-
class UnknownField < StandardError; end
|
17
|
-
class UnsupportedType < StandardError; end
|
18
|
-
|
19
18
|
class << self
|
20
19
|
def build_type(type, logical_type = nil)
|
21
20
|
type = type.first if type.is_a?(Array) && type.size == 1
|
@@ -42,14 +41,16 @@ module TypedData
|
|
42
41
|
values = type["values"] || type[:values]
|
43
42
|
MapType.new(values.is_a?(Array) ? values : [values])
|
44
43
|
when "record"
|
45
|
-
RecordType.new(type["fields"] || type[:fields])
|
44
|
+
RecordType.new(type["name"] || type[:name], type["fields"] || type[:fields])
|
46
45
|
else
|
47
46
|
raise UnsupportedType, "Unknown type: #{subtype}"
|
48
47
|
end
|
49
48
|
when "boolean"
|
50
49
|
BooleanType.new(type, logical_type)
|
51
|
-
when "int"
|
50
|
+
when "int"
|
52
51
|
IntType.new(type, logical_type)
|
52
|
+
when "long"
|
53
|
+
LongType.new(type, logical_type)
|
53
54
|
when "float", "double"
|
54
55
|
FloatType.new(type, logical_type)
|
55
56
|
when "bytes"
|
@@ -72,7 +73,7 @@ module TypedData
|
|
72
73
|
if (schema["type"] || schema[:type]) != "record"
|
73
74
|
raise UnsupportedType, 'The root type must be "record"'
|
74
75
|
end
|
75
|
-
@root_type = RecordType.new(schema["fields"] || schema[:fields])
|
76
|
+
@root_type = RecordType.new(schema["name"] || schema[:name], schema["fields"] || schema[:fields])
|
76
77
|
end
|
77
78
|
end
|
78
79
|
end
|
@@ -3,6 +3,9 @@
|
|
3
3
|
module TypedData
|
4
4
|
class Schema
|
5
5
|
class IntType < Type
|
6
|
+
VALUE_RANGE = -2**31 .. 2**31 - 1
|
7
|
+
SUPPORTED_LOGICAL_TYPES = %w[date time-millis]
|
8
|
+
|
6
9
|
def to_s
|
7
10
|
if @logical_type
|
8
11
|
"#{@name}_#{@logical_type.gsub("-", "_")}"
|
@@ -11,18 +14,12 @@ module TypedData
|
|
11
14
|
end
|
12
15
|
end
|
13
16
|
|
14
|
-
def coerce(value
|
17
|
+
def coerce(value)
|
15
18
|
case @logical_type
|
16
19
|
when "date"
|
17
20
|
(Date.new(1970, 1, 1) + value).to_s
|
18
21
|
when "time-millis"
|
19
22
|
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%T.%3N")
|
20
|
-
when "time-micros"
|
21
|
-
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
|
22
|
-
when "timestamp-millis"
|
23
|
-
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
|
24
|
-
when "timestamp-micros"
|
25
|
-
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
|
26
23
|
else
|
27
24
|
value
|
28
25
|
end
|
@@ -33,7 +30,7 @@ module TypedData
|
|
33
30
|
end
|
34
31
|
|
35
32
|
def match?(value)
|
36
|
-
value.is_a?(Integer)
|
33
|
+
value.is_a?(Integer) && VALUE_RANGE.cover?(value)
|
37
34
|
end
|
38
35
|
end
|
39
36
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module TypedData
|
4
|
+
class Schema
|
5
|
+
class LongType < Type
|
6
|
+
SUPPORTED_LOGICAL_TYPES = %w[time-micros timestamp-millis timestamp-micros]
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
if @logical_type
|
10
|
+
"#{@name}_#{@logical_type.gsub("-", "_")}"
|
11
|
+
else
|
12
|
+
@name
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def coerce(value)
|
17
|
+
case @logical_type
|
18
|
+
when "time-micros"
|
19
|
+
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
|
20
|
+
when "timestamp-millis"
|
21
|
+
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
|
22
|
+
when "timestamp-micros"
|
23
|
+
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
|
24
|
+
else
|
25
|
+
value
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def primitive?
|
30
|
+
true
|
31
|
+
end
|
32
|
+
|
33
|
+
def match?(value)
|
34
|
+
value.is_a?(Integer)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -4,7 +4,8 @@ module TypedData
|
|
4
4
|
class Schema
|
5
5
|
class RecordType < Type
|
6
6
|
# @param fields [Array] an array of "fields" in an Avro schema
|
7
|
-
def initialize(fields)
|
7
|
+
def initialize(name, fields)
|
8
|
+
@name = name
|
8
9
|
@field_to_type = fields.each_with_object({}) do |field, h|
|
9
10
|
h[field["name"] || field[:name]] = Schema.build_type(field["type"] || field[:type])
|
10
11
|
end
|
@@ -21,8 +22,12 @@ module TypedData
|
|
21
22
|
end
|
22
23
|
end
|
23
24
|
|
25
|
+
def find_match(value)
|
26
|
+
raise InvalidValue, %Q{the value #{value.inspect} doesn't match the type #{self}}
|
27
|
+
end
|
28
|
+
|
24
29
|
def match?(value)
|
25
|
-
value.is_a?(Hash)
|
30
|
+
value.is_a?(Hash) && value.all? { |k, v| @field_to_type[k]&.match?(v) }
|
26
31
|
end
|
27
32
|
end
|
28
33
|
end
|
@@ -1,10 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require "typed_data/schema/errors"
|
2
3
|
|
3
4
|
module TypedData
|
4
5
|
class Schema
|
5
6
|
class Type
|
7
|
+
SUPPORTED_LOGICAL_TYPES = []
|
8
|
+
|
6
9
|
def initialize(name, logical_type = nil)
|
7
10
|
@name = name
|
11
|
+
if logical_type && !self.class::SUPPORTED_LOGICAL_TYPES.include?(logical_type)
|
12
|
+
raise UnsupportedType, %Q{#{name} doesn't support the logical type "#{logical_type}"}
|
13
|
+
end
|
8
14
|
@logical_type = logical_type
|
9
15
|
end
|
10
16
|
|
@@ -12,7 +18,7 @@ module TypedData
|
|
12
18
|
@name
|
13
19
|
end
|
14
20
|
|
15
|
-
def coerce(value
|
21
|
+
def coerce(value)
|
16
22
|
value
|
17
23
|
end
|
18
24
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
require "typed_data/schema/errors"
|
2
3
|
|
3
4
|
module TypedData
|
4
5
|
class Schema
|
@@ -6,41 +7,29 @@ module TypedData
|
|
6
7
|
# @param types [Array<String>]
|
7
8
|
def initialize(types)
|
8
9
|
@types = types.map(&Schema.method(:build_type))
|
9
|
-
@
|
10
|
+
@nullable_single = @types.size == 2 && @types.any? { |t| t.is_a?(NullType) }
|
11
|
+
@nullable_primitive = @nullable_single && @types.any?(&:primitive?)
|
10
12
|
end
|
11
13
|
|
12
14
|
def to_s
|
13
15
|
@nullable_primitive ? @types.first.to_s : "union_#{@types.map(&:to_s).join("_")}"
|
14
16
|
end
|
15
17
|
|
16
|
-
def coerce(value, formatter:)
|
17
|
-
return value if @nullable_primitive
|
18
|
-
|
19
|
-
type = find_match(value)
|
20
|
-
if type.is_a?(NullType)
|
21
|
-
default_value(formatter)
|
22
|
-
else
|
23
|
-
default_value(formatter).merge!(formatter.call(type.to_s) => type.coerce(value, formatter: formatter).to_s)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
18
|
def primitive?
|
28
19
|
false
|
29
20
|
end
|
30
21
|
|
31
22
|
def find_match(value)
|
32
|
-
@types.find { |t| t.match?(value) }
|
23
|
+
@types.find { |t| t.match?(value) } or
|
24
|
+
raise InvalidValue, %Q{the value #{value.inspect} doesn't match the type #{@types.map(&:to_s)}}
|
33
25
|
end
|
34
26
|
|
35
27
|
def match?(value)
|
36
28
|
@types.any? { |t| t.match?(value) }
|
37
29
|
end
|
38
30
|
|
39
|
-
def
|
40
|
-
@
|
41
|
-
next if t.is_a?(NullType)
|
42
|
-
v[formatter.call(t.to_s)] = t.primitive? || t.is_a?(EnumType) ? nil : []
|
43
|
-
end
|
31
|
+
def nullable_single?
|
32
|
+
@nullable_single
|
44
33
|
end
|
45
34
|
end
|
46
35
|
end
|
data/lib/typed_data/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: typed_data
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- abicky
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: avro
|
@@ -63,8 +63,10 @@ files:
|
|
63
63
|
- lib/typed_data/schema/boolean_type.rb
|
64
64
|
- lib/typed_data/schema/bytes_type.rb
|
65
65
|
- lib/typed_data/schema/enum_type.rb
|
66
|
+
- lib/typed_data/schema/errors.rb
|
66
67
|
- lib/typed_data/schema/float_type.rb
|
67
68
|
- lib/typed_data/schema/int_type.rb
|
69
|
+
- lib/typed_data/schema/long_type.rb
|
68
70
|
- lib/typed_data/schema/map_type.rb
|
69
71
|
- lib/typed_data/schema/null_type.rb
|
70
72
|
- lib/typed_data/schema/record_type.rb
|
@@ -94,7 +96,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
94
96
|
- !ruby/object:Gem::Version
|
95
97
|
version: '0'
|
96
98
|
requirements: []
|
97
|
-
rubygems_version: 3.1.
|
99
|
+
rubygems_version: 3.1.4
|
98
100
|
signing_key:
|
99
101
|
specification_version: 4
|
100
102
|
summary: A library that converts hash objects managed by an Avro schema
|