typed_data 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/test.yml +22 -0
- data/.gitignore +12 -0
- data/.rspec +3 -0
- data/.travis.yml +6 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +128 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/typed_data/converter.rb +106 -0
- data/lib/typed_data/schema/array_type.rb +35 -0
- data/lib/typed_data/schema/boolean_type.rb +15 -0
- data/lib/typed_data/schema/bytes_type.rb +19 -0
- data/lib/typed_data/schema/enum_type.rb +20 -0
- data/lib/typed_data/schema/float_type.rb +15 -0
- data/lib/typed_data/schema/int_type.rb +40 -0
- data/lib/typed_data/schema/map_type.rb +32 -0
- data/lib/typed_data/schema/null_type.rb +15 -0
- data/lib/typed_data/schema/record_type.rb +29 -0
- data/lib/typed_data/schema/string_type.rb +15 -0
- data/lib/typed_data/schema/type.rb +28 -0
- data/lib/typed_data/schema/union_type.rb +44 -0
- data/lib/typed_data/schema.rb +78 -0
- data/lib/typed_data/version.rb +3 -0
- data/lib/typed_data.rb +7 -0
- data/typed_data.gemspec +27 -0
- metadata +101 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a692d748f21fdbfccf38620f4bb2a0845544e4d2b8dd27ac45ae18dd11c7565c
|
4
|
+
data.tar.gz: 2ed2ade4631169260f48100a5658a4ed4c8f79c25f664ea7ba75700de37cee4c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cef89670d16f7d370ca4e5dfe58a4d5e42eb72453e1649488dfccbfd0451602bf55468ab33e3343a274f4a63baddb589cdf82b92e8e7820354cd0489a7670e52
|
7
|
+
data.tar.gz: c3c1f4a9cfdf5b6833ea92129a981ceaa5e5576b18a62e0044242ffa5f50afd4931f23c1b9f16b1ebebdfd2983bab94d288f2bd3cf2eed75ae0f3f53066f5e75
|
@@ -0,0 +1,22 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
pull_request:
|
6
|
+
|
7
|
+
jobs:
|
8
|
+
build:
|
9
|
+
|
10
|
+
runs-on: ubuntu-latest
|
11
|
+
|
12
|
+
steps:
|
13
|
+
- uses: actions/checkout@v2
|
14
|
+
- name: Set up Ruby 2.6
|
15
|
+
uses: actions/setup-ruby@v1
|
16
|
+
with:
|
17
|
+
ruby-version: 2.6.x
|
18
|
+
- name: Build and test with Rake
|
19
|
+
run: |
|
20
|
+
gem install bundler
|
21
|
+
bundle install --jobs 4 --retry 3
|
22
|
+
bundle exec rake
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2020 Takeshi Arabiki (abicky)
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
# TypedData
|
2
|
+
|
3
|
+
![](https://github.com/abicky/ecsmec/workflows/test/badge.svg?branch=master)
|
4
|
+
|
5
|
+
TypedData is a library that converts hash objects managed by an Avro schema so that the objects can be loaded into BigQuery.
|
6
|
+
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Add this line to your application's Gemfile:
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
gem 'typed_data'
|
14
|
+
```
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
$ bundle install
|
19
|
+
|
20
|
+
Or install it yourself as:
|
21
|
+
|
22
|
+
$ gem install typed_data
|
23
|
+
|
24
|
+
## Usage
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
require "typed_data"
|
28
|
+
|
29
|
+
schema = {
|
30
|
+
"name" => "Record",
|
31
|
+
"type" => "record",
|
32
|
+
"fields" => [
|
33
|
+
{
|
34
|
+
"name" => "int_field",
|
35
|
+
"type" => "int",
|
36
|
+
},
|
37
|
+
{
|
38
|
+
"name" => "int_or_string_field",
|
39
|
+
"type" => ["int", "string"],
|
40
|
+
},
|
41
|
+
{
|
42
|
+
"name" => "array_field",
|
43
|
+
"type" => {
|
44
|
+
"type" => "array",
|
45
|
+
"items" => "int",
|
46
|
+
},
|
47
|
+
},
|
48
|
+
{
|
49
|
+
"name" => "union_type_array_field",
|
50
|
+
"type" => {
|
51
|
+
"type" => "array",
|
52
|
+
"items" => ["int", "string"],
|
53
|
+
},
|
54
|
+
},
|
55
|
+
{
|
56
|
+
"name" => "nested_map_field",
|
57
|
+
"type" => {
|
58
|
+
"type" => "map",
|
59
|
+
"values" => {
|
60
|
+
"type" => "map",
|
61
|
+
"values" => ["int", "string"],
|
62
|
+
},
|
63
|
+
},
|
64
|
+
},
|
65
|
+
],
|
66
|
+
}
|
67
|
+
|
68
|
+
converter = TypedData::Converter.new(schema)
|
69
|
+
converter.convert({
|
70
|
+
"int_field" => 1,
|
71
|
+
"int_or_string_field" => "string",
|
72
|
+
"array_field" => [1, 2],
|
73
|
+
"union_type_array_field" => [1, "2"],
|
74
|
+
"nested_map_field" => {
|
75
|
+
"nested_map" => {
|
76
|
+
"key1" => 1,
|
77
|
+
"key2" => "2",
|
78
|
+
},
|
79
|
+
},
|
80
|
+
})
|
81
|
+
#=> {"int_field"=>1,
|
82
|
+
# "int_or_string_field"=>{"int_value"=>nil, "string_value"=>"string"},
|
83
|
+
# "array_field"=>[1, 2],
|
84
|
+
# "union_type_array_field"=>
|
85
|
+
# [{"int_value"=>"1", "string_value"=>nil},
|
86
|
+
# {"int_value"=>nil, "string_value"=>"2"}],
|
87
|
+
# "nested_map_field"=>
|
88
|
+
# [{"key"=>"nested_map",
|
89
|
+
# "value"=>
|
90
|
+
# [{"key"=>"key1", "value"=>{"int_value"=>"1", "string_value"=>nil}},
|
91
|
+
# {"key"=>"key2", "value"=>{"int_value"=>nil, "string_value"=>"2"}}]}]}
|
92
|
+
```
|
93
|
+
|
94
|
+
You can specify a formatter for the union type keys. For example, the formatter for tables managed by [Google BigQuery Sink Connector](https://docs.confluent.io/current/connect/kafka-connect-bigquery/index.html) is like below:
|
95
|
+
|
96
|
+
```ruby
|
97
|
+
schema = {
|
98
|
+
"name" => "Record",
|
99
|
+
"type" => "record",
|
100
|
+
"fields" => [
|
101
|
+
{
|
102
|
+
"name" => "int_or_string_field",
|
103
|
+
"type" => ["int", "string"],
|
104
|
+
},
|
105
|
+
],
|
106
|
+
}
|
107
|
+
|
108
|
+
converter = TypedData::Converter.new(schema)
|
109
|
+
converter.union_type_key_formatter = ->(type) { type.split("_").first }
|
110
|
+
converter.convert({ "int_or_string_field" => "string" })
|
111
|
+
#=> {"int_or_string_field"=>{"int"=>nil, "string"=>"string"}}
|
112
|
+
```
|
113
|
+
|
114
|
+
|
115
|
+
## Development
|
116
|
+
|
117
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
118
|
+
|
119
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
120
|
+
|
121
|
+
## Contributing
|
122
|
+
|
123
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/typed_data.
|
124
|
+
|
125
|
+
|
126
|
+
## License
|
127
|
+
|
128
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "typed_data"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "typed_data/schema"
|
3
|
+
|
4
|
+
module TypedData
|
5
|
+
class Converter
|
6
|
+
attr_accessor :union_type_key_formatter
|
7
|
+
|
8
|
+
# @param schema [Hash] an Avro schema
|
9
|
+
def initialize(schema)
|
10
|
+
@schema = Schema.new(schema)
|
11
|
+
@union_type_key_formatter = ->(type) { "#{type}_value" }
|
12
|
+
end
|
13
|
+
|
14
|
+
# @param data [Hash]
|
15
|
+
def convert(data)
|
16
|
+
convert_record(@schema.root_type, data)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
# @param type [RecordType]
|
22
|
+
# @param record [Hash{String => Object}]
|
23
|
+
def convert_record(type, record)
|
24
|
+
record.each_with_object({}) do |(key, value), converted|
|
25
|
+
subtype = type.find_type(key)
|
26
|
+
case subtype
|
27
|
+
when Schema::ArrayType
|
28
|
+
converted[key] = convert_array(subtype, value)
|
29
|
+
when Schema::MapType
|
30
|
+
converted[key] = convert_map(subtype, value)
|
31
|
+
when Schema::RecordType
|
32
|
+
converted[key] = convert_record(subtype, value)
|
33
|
+
when Schema::UnionType
|
34
|
+
converted[key] = convert_union(subtype, value)
|
35
|
+
else
|
36
|
+
converted[key] = subtype.coerce(value, formatter: union_type_key_formatter)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# @param type [ArrayType]
|
42
|
+
# @param array [Array<Object>]
|
43
|
+
def convert_array(type, array)
|
44
|
+
array.each_with_object([]) do |value, ret|
|
45
|
+
next if value.nil?
|
46
|
+
|
47
|
+
subtype = type.find_match(value)
|
48
|
+
case subtype
|
49
|
+
when Schema::ArrayType
|
50
|
+
ret.concat(convert_array(subtype, value))
|
51
|
+
when Schema::MapType
|
52
|
+
ret << convert_map(subtype, value)
|
53
|
+
when Schema::RecordType
|
54
|
+
ret << convert_record(subtype, value)
|
55
|
+
when Schema::UnionType
|
56
|
+
ret << convert_union(subtype, value)
|
57
|
+
else
|
58
|
+
ret << type.coerce(value, formatter: union_type_key_formatter)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# @param type [MapType]
|
64
|
+
# @param map [Hash{String => Object}]
|
65
|
+
def convert_map(type, map)
|
66
|
+
map.each_with_object([]) do |(key, value), ret|
|
67
|
+
subtype = type.find_match(value)
|
68
|
+
case subtype
|
69
|
+
when Schema::ArrayType
|
70
|
+
value = convert_array(subtype, value)
|
71
|
+
when Schema::MapType
|
72
|
+
value = convert_map(subtype, value)
|
73
|
+
when Schema::RecordType
|
74
|
+
value = convert_record(subtype, value)
|
75
|
+
when Schema::UnionType
|
76
|
+
value = convert_union(subtype, value)
|
77
|
+
else
|
78
|
+
value = type.coerce(value, formatter: union_type_key_formatter)
|
79
|
+
end
|
80
|
+
ret << { "key" => key, "value" => value }
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# @param type [UnionType]
|
85
|
+
# @param map [Object]
|
86
|
+
def convert_union(type, value)
|
87
|
+
subtype = type.find_match(value)
|
88
|
+
case subtype
|
89
|
+
when Schema::ArrayType
|
90
|
+
type.default_value(union_type_key_formatter)
|
91
|
+
.merge!(union_type_key_formatter.call(subtype.to_s) => convert_array(subtype, value))
|
92
|
+
when Schema::MapType
|
93
|
+
type.default_value(union_type_key_formatter)
|
94
|
+
.merge!(union_type_key_formatter.call(subtype.to_s) => convert_map(subtype, value))
|
95
|
+
when Schema::RecordType
|
96
|
+
type.default_value(union_type_key_formatter)
|
97
|
+
.merge!(union_type_key_formatter.call(subtype.to_s) => convert_record(subtype, value))
|
98
|
+
when Schema::UnionType
|
99
|
+
type.default_value(union_type_key_formatter)
|
100
|
+
.merge!(union_type_key_formatter.call(subtype.to_s) => convert_union(subtype, value))
|
101
|
+
else
|
102
|
+
type.coerce(value, formatter: union_type_key_formatter)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "typed_data/schema/type"
|
3
|
+
|
4
|
+
module TypedData
|
5
|
+
class Schema
|
6
|
+
class ArrayType < Type
|
7
|
+
attr_reader :fields
|
8
|
+
|
9
|
+
# @param types [Array<String>]
|
10
|
+
def initialize(types)
|
11
|
+
@type = Schema.build_type(types.select { |t| t != "null" })
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
"array_#{@type}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def coerce(value, formatter:)
|
19
|
+
@type.coerce(value, formatter: formatter)
|
20
|
+
end
|
21
|
+
|
22
|
+
def primitive?
|
23
|
+
false
|
24
|
+
end
|
25
|
+
|
26
|
+
def find_match(value)
|
27
|
+
@type.match?(value) ? @type : @type.find_match(value)
|
28
|
+
end
|
29
|
+
|
30
|
+
def match?(value)
|
31
|
+
value.is_a?(Array) && value.all? { |v| @type.match?(v) }
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module TypedData
|
4
|
+
class Schema
|
5
|
+
class BytesType < Type
|
6
|
+
def coerce(value, formatter:)
|
7
|
+
[value].pack("m0")
|
8
|
+
end
|
9
|
+
|
10
|
+
def primitive?
|
11
|
+
true
|
12
|
+
end
|
13
|
+
|
14
|
+
def match?(value)
|
15
|
+
value.is_a?(String)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module TypedData
|
4
|
+
class Schema
|
5
|
+
class EnumType < Type
|
6
|
+
def initialize(name, symbols)
|
7
|
+
@name = name
|
8
|
+
@symbols = symbols
|
9
|
+
end
|
10
|
+
|
11
|
+
def primitive?
|
12
|
+
false
|
13
|
+
end
|
14
|
+
|
15
|
+
def match?(value)
|
16
|
+
@symbols.include?(value)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module TypedData
|
4
|
+
class Schema
|
5
|
+
class IntType < Type
|
6
|
+
def to_s
|
7
|
+
if @logical_type
|
8
|
+
"#{@name}_#{@logical_type.gsub("-", "_")}"
|
9
|
+
else
|
10
|
+
@name
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def coerce(value, formatter:)
|
15
|
+
case @logical_type
|
16
|
+
when "date"
|
17
|
+
(Date.new(1970, 1, 1) + value).to_s
|
18
|
+
when "time-millis"
|
19
|
+
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%T.%3N")
|
20
|
+
when "time-micros"
|
21
|
+
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
|
22
|
+
when "timestamp-millis"
|
23
|
+
Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
|
24
|
+
when "timestamp-micros"
|
25
|
+
Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
|
26
|
+
else
|
27
|
+
value
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def primitive?
|
32
|
+
true
|
33
|
+
end
|
34
|
+
|
35
|
+
def match?(value)
|
36
|
+
value.is_a?(Integer)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module TypedData
|
4
|
+
class Schema
|
5
|
+
class MapType < Type
|
6
|
+
# @param types [Array<String>]
|
7
|
+
def initialize(types)
|
8
|
+
@type = Schema.build_type(types)
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
"map_#{@type}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def coerce(value, formatter:)
|
16
|
+
@type.coerce(value, formatter: formatter)
|
17
|
+
end
|
18
|
+
|
19
|
+
def primitive?
|
20
|
+
false
|
21
|
+
end
|
22
|
+
|
23
|
+
def find_match(value)
|
24
|
+
@type.match?(value) ? @type : @type.find_match(value)
|
25
|
+
end
|
26
|
+
|
27
|
+
def match?(value)
|
28
|
+
value.is_a?(Hash) && value.all? { |_, v| @type.match?(v) }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module TypedData
|
4
|
+
class Schema
|
5
|
+
class RecordType < Type
|
6
|
+
# @param fields [Array] an array of "fields" in an Avro schema
|
7
|
+
def initialize(fields)
|
8
|
+
@field_to_type = fields.each_with_object({}) do |field, h|
|
9
|
+
h[field["name"] || field[:name]] = Schema.build_type(field["type"] || field[:type])
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def primitive?
|
14
|
+
false
|
15
|
+
end
|
16
|
+
|
17
|
+
# @param field_name [String, Symbol]
|
18
|
+
def find_type(field_name)
|
19
|
+
@field_to_type.fetch(field_name.to_s) do
|
20
|
+
raise UnknownField, "Unknown field \"#{field_name}\""
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def match?(value)
|
25
|
+
value.is_a?(Hash)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module TypedData
|
4
|
+
class Schema
|
5
|
+
class Type
|
6
|
+
def initialize(name, logical_type = nil)
|
7
|
+
@name = name
|
8
|
+
@logical_type = logical_type
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
@name
|
13
|
+
end
|
14
|
+
|
15
|
+
def coerce(value, formatter:)
|
16
|
+
value
|
17
|
+
end
|
18
|
+
|
19
|
+
def primitive?
|
20
|
+
raise NotImplementedError, "#{self.class}##{__method__} is not implement"
|
21
|
+
end
|
22
|
+
|
23
|
+
def match?(value)
|
24
|
+
raise NotImplementedError, "#{self.class}##{__method__} is not implement"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module TypedData
|
4
|
+
class Schema
|
5
|
+
class UnionType < Type
|
6
|
+
# @param types [Array<String>]
|
7
|
+
def initialize(types)
|
8
|
+
@types = types.map(&Schema.method(:build_type))
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
"union_#{@types.map(&:to_s).join("_")}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def coerce(value, formatter:)
|
16
|
+
type = find_match(value)
|
17
|
+
if type.is_a?(NullType)
|
18
|
+
default_value(formatter)
|
19
|
+
else
|
20
|
+
default_value(formatter).merge!(formatter.call(type.to_s) => type.coerce(value, formatter: formatter).to_s)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def primitive?
|
25
|
+
false
|
26
|
+
end
|
27
|
+
|
28
|
+
def find_match(value)
|
29
|
+
@types.find { |t| t.match?(value) }
|
30
|
+
end
|
31
|
+
|
32
|
+
def match?(value)
|
33
|
+
@types.any? { |t| t.match?(value) }
|
34
|
+
end
|
35
|
+
|
36
|
+
def default_value(formatter)
|
37
|
+
@types.each_with_object({}) do |t, v|
|
38
|
+
next if t.is_a?(NullType)
|
39
|
+
v[formatter.call(t.to_s)] = t.primitive? || t.is_a?(EnumType) ? nil : []
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "typed_data/schema/array_type"
|
3
|
+
require "typed_data/schema/boolean_type"
|
4
|
+
require "typed_data/schema/bytes_type"
|
5
|
+
require "typed_data/schema/enum_type"
|
6
|
+
require "typed_data/schema/float_type"
|
7
|
+
require "typed_data/schema/int_type"
|
8
|
+
require "typed_data/schema/map_type"
|
9
|
+
require "typed_data/schema/null_type"
|
10
|
+
require "typed_data/schema/record_type"
|
11
|
+
require "typed_data/schema/string_type"
|
12
|
+
require "typed_data/schema/union_type"
|
13
|
+
|
14
|
+
module TypedData
|
15
|
+
class Schema
|
16
|
+
class UnknownField < StandardError; end
|
17
|
+
class UnsupportedType < StandardError; end
|
18
|
+
|
19
|
+
class << self
|
20
|
+
def build_type(type, logical_type = nil)
|
21
|
+
type = type.first if type.is_a?(Array) && type.size == 1
|
22
|
+
|
23
|
+
case type
|
24
|
+
when Array
|
25
|
+
UnionType.new(type)
|
26
|
+
when Hash
|
27
|
+
subtype = type["type"] || type[:type]
|
28
|
+
logical_type = type["logicalType"] || type[:logicalType]
|
29
|
+
if logical_type
|
30
|
+
return build_type(subtype, logical_type)
|
31
|
+
end
|
32
|
+
|
33
|
+
case subtype
|
34
|
+
when "enum"
|
35
|
+
EnumType.new(type["name"] || type[:name], type["symbols"] || type[:symbols])
|
36
|
+
when "fixed"
|
37
|
+
BytesType.new(type["name"] || type[:name] || "bytes")
|
38
|
+
when "array"
|
39
|
+
items = type["items"] || type[:items]
|
40
|
+
ArrayType.new(items.is_a?(Array) ? items : [items])
|
41
|
+
when "map"
|
42
|
+
values = type["values"] || type[:values]
|
43
|
+
MapType.new(values.is_a?(Array) ? values : [values])
|
44
|
+
when "record"
|
45
|
+
RecordType.new(type["fields"] || type[:fields])
|
46
|
+
else
|
47
|
+
raise UnsupportedType, "Unknown type: #{subtype}"
|
48
|
+
end
|
49
|
+
when "boolean"
|
50
|
+
BooleanType.new(type, logical_type)
|
51
|
+
when "int", "long"
|
52
|
+
IntType.new(type, logical_type)
|
53
|
+
when "float", "double"
|
54
|
+
FloatType.new(type, logical_type)
|
55
|
+
when "bytes"
|
56
|
+
BytesType.new(type, logical_type)
|
57
|
+
when "string"
|
58
|
+
StringType.new(type, logical_type)
|
59
|
+
when "null"
|
60
|
+
NullType.new(type, logical_type)
|
61
|
+
else
|
62
|
+
raise UnsupportedType, "Unknown type: #{type}"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
attr_reader :root_type
|
68
|
+
|
69
|
+
# @param schema [Hash] an Avro schema
|
70
|
+
def initialize(schema)
|
71
|
+
@schema = schema
|
72
|
+
if (schema["type"] || schema[:type]) != "record"
|
73
|
+
raise UnsupportedType, 'The root type must be "record"'
|
74
|
+
end
|
75
|
+
@root_type = RecordType.new(schema["fields"] || schema[:fields])
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
data/lib/typed_data.rb
ADDED
data/typed_data.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require_relative 'lib/typed_data/version'
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "typed_data"
|
5
|
+
spec.version = TypedData::VERSION
|
6
|
+
spec.authors = ["abicky"]
|
7
|
+
spec.email = ["takeshi.arabiki@gmail.com"]
|
8
|
+
|
9
|
+
spec.summary = %q{A library that converts hash objects managed by an Avro schema}
|
10
|
+
spec.description = %q{TypedData is a library that converts hash objects managed by an Avro schema so that the objects can be loaded into BigQuery.}
|
11
|
+
spec.homepage = "https://github.com/abicky/typed_data"
|
12
|
+
spec.license = "MIT"
|
13
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
|
+
|
15
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
16
|
+
spec.metadata["source_code_uri"] = "https://github.com/abicky/typed_data"
|
17
|
+
|
18
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
19
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
end
|
21
|
+
spec.bindir = "exe"
|
22
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
23
|
+
spec.require_paths = ["lib"]
|
24
|
+
|
25
|
+
spec.add_development_dependency "avro"
|
26
|
+
spec.add_development_dependency "google-cloud-bigquery"
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: typed_data
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- abicky
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-04-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: avro
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: google-cloud-bigquery
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: TypedData is a library that converts hash objects managed by an Avro
|
42
|
+
schema so that the objects can be loaded into BigQuery.
|
43
|
+
email:
|
44
|
+
- takeshi.arabiki@gmail.com
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- ".github/workflows/test.yml"
|
50
|
+
- ".gitignore"
|
51
|
+
- ".rspec"
|
52
|
+
- ".travis.yml"
|
53
|
+
- Gemfile
|
54
|
+
- LICENSE.txt
|
55
|
+
- README.md
|
56
|
+
- Rakefile
|
57
|
+
- bin/console
|
58
|
+
- bin/setup
|
59
|
+
- lib/typed_data.rb
|
60
|
+
- lib/typed_data/converter.rb
|
61
|
+
- lib/typed_data/schema.rb
|
62
|
+
- lib/typed_data/schema/array_type.rb
|
63
|
+
- lib/typed_data/schema/boolean_type.rb
|
64
|
+
- lib/typed_data/schema/bytes_type.rb
|
65
|
+
- lib/typed_data/schema/enum_type.rb
|
66
|
+
- lib/typed_data/schema/float_type.rb
|
67
|
+
- lib/typed_data/schema/int_type.rb
|
68
|
+
- lib/typed_data/schema/map_type.rb
|
69
|
+
- lib/typed_data/schema/null_type.rb
|
70
|
+
- lib/typed_data/schema/record_type.rb
|
71
|
+
- lib/typed_data/schema/string_type.rb
|
72
|
+
- lib/typed_data/schema/type.rb
|
73
|
+
- lib/typed_data/schema/union_type.rb
|
74
|
+
- lib/typed_data/version.rb
|
75
|
+
- typed_data.gemspec
|
76
|
+
homepage: https://github.com/abicky/typed_data
|
77
|
+
licenses:
|
78
|
+
- MIT
|
79
|
+
metadata:
|
80
|
+
homepage_uri: https://github.com/abicky/typed_data
|
81
|
+
source_code_uri: https://github.com/abicky/typed_data
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options: []
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: 2.3.0
|
91
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
requirements: []
|
97
|
+
rubygems_version: 3.1.2
|
98
|
+
signing_key:
|
99
|
+
specification_version: 4
|
100
|
+
summary: A library that converts hash objects managed by an Avro schema
|
101
|
+
test_files: []
|