typed_data 0.1.1 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa8bfbb2363bcf13407a8bb1fdeed0099f16a5cda9296dd77489fb2b2d7bb024
4
- data.tar.gz: 7c446db80a65cc04d22584f38d53c313092432e71ba64321c22d43967a976f8a
3
+ metadata.gz: 8328e9cdeaee3bbab6b796988ff1436fc913aa8be6078b99848fe96a4d109156
4
+ data.tar.gz: f3b471b2a4cbc86c8a8a3b07d038c373ca7272f5417f7dd4301cd25a187d4383
5
5
  SHA512:
6
- metadata.gz: 8e783163ba498a893cacff761e9d502acf0e524e8f57790cf3500ee8e29b2310133dfacd76f017d47cf81bde4f9f0550d4bc78523e7213e7dbd80adae8864215
7
- data.tar.gz: 31376d90ff56ab09b0a004b0d0bc8efaed10a6ec4d89cf8eee7c4fd2532c5f533507b96f29af365d6d413475f75d2be1a206d157ce3b19bc847ab24d2c52a17f
6
+ metadata.gz: 15cf75777af9064e4804709b3c4aef5b3f29d867d29eefa4c93e20a5216b75c1e988809ecf985bf285178e2f1e36ff513316aeb69f179c695f196a511ce439e6
7
+ data.tar.gz: a00c6feb6091ed05d0e28c1afb26d386cab86aa3c110792423bd17f03c8637fb8fab500ea0e220c2c6dc8942f3b6fdc8a86f3668eb23c817d53c847de9495afe
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # TypedData
2
2
 
3
- ![](https://github.com/abicky/ecsmec/workflows/test/badge.svg?branch=master)
3
+ ![](https://github.com/abicky/typed_data/workflows/CI/badge.svg?branch=master)
4
4
 
5
5
  TypedData is a library that converts hash objects managed by an Avro schema so that the objects can be loaded into BigQuery.
6
6
 
@@ -79,36 +79,42 @@ converter.convert({
79
79
  },
80
80
  })
81
81
  #=> {"int_field"=>1,
82
- # "int_or_string_field"=>{"int_value"=>nil, "string_value"=>"string"},
82
+ # "int_or_string_field"=>{"string_value"=>"string"},
83
83
  # "array_field"=>[1, 2],
84
- # "union_type_array_field"=>
85
- # [{"int_value"=>"1", "string_value"=>nil},
86
- # {"int_value"=>nil, "string_value"=>"2"}],
84
+ # "union_type_array_field"=>[{"int_value"=>"1"}, {"string_value"=>"2"}],
87
85
  # "nested_map_field"=>
88
86
  # [{"key"=>"nested_map",
89
87
  # "value"=>
90
- # [{"key"=>"key1", "value"=>{"int_value"=>"1", "string_value"=>nil}},
91
- # {"key"=>"key2", "value"=>{"int_value"=>nil, "string_value"=>"2"}}]}]}
88
+ # [{"key"=>"key1", "value"=>{"int_value"=>"1"}},
89
+ # {"key"=>"key2", "value"=>{"string_value"=>"2"}}]}]}
92
90
  ```
93
91
 
94
92
  You can specify a formatter for the union type keys. For example, the formatter for tables managed by [Google BigQuery Sink Connector](https://docs.confluent.io/current/connect/kafka-connect-bigquery/index.html) is like below:
95
93
 
96
94
  ```ruby
97
- schema = {
98
- "name" => "Record",
99
- "type" => "record",
100
- "fields" => [
101
- {
102
- "name" => "int_or_string_field",
103
- "type" => ["int", "string"],
104
- },
105
- ],
106
- }
107
-
108
95
  converter = TypedData::Converter.new(schema)
109
96
  converter.union_type_key_formatter = ->(type) { type.split("_").first }
110
- converter.convert({ "int_or_string_field" => "string" })
111
- #=> {"int_or_string_field"=>{"int"=>nil, "string"=>"string"}}
97
+ converter.convert({
98
+ "int_field" => 1,
99
+ "int_or_string_field" => "string",
100
+ "array_field" => [1, 2],
101
+ "union_type_array_field" => [1, "2"],
102
+ "nested_map_field" => {
103
+ "nested_map" => {
104
+ "key1" => 1,
105
+ "key2" => "2",
106
+ },
107
+ },
108
+ })
109
+ #=> {"int_field"=>1,
110
+ # "int_or_string_field"=>{"string"=>"string"},
111
+ # "array_field"=>[1, 2],
112
+ # "union_type_array_field"=>[{"int"=>"1"}, {"string"=>"2"}],
113
+ # "nested_map_field"=>
114
+ # [{"key"=>"nested_map",
115
+ # "value"=>
116
+ # [{"key"=>"key1", "value"=>{"int"=>"1"}},
117
+ # {"key"=>"key2", "value"=>{"string"=>"2"}}]}]}
112
118
  ```
113
119
 
114
120
 
@@ -120,7 +126,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
120
126
 
121
127
  ## Contributing
122
128
 
123
- Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/typed_data.
129
+ Bug reports and pull requests are welcome on GitHub at https://github.com/abicky/typed_data.
124
130
 
125
131
 
126
132
  ## License
@@ -31,9 +31,9 @@ module TypedData
31
31
  when Schema::RecordType
32
32
  converted[key] = convert_record(subtype, value)
33
33
  when Schema::UnionType
34
- converted[key] = convert_union(subtype, value, as_record_field: true)
34
+ converted[key] = convert_union(subtype, value)
35
35
  else
36
- converted[key] = subtype.coerce(value, formatter: union_type_key_formatter)
36
+ converted[key] = subtype.coerce(value)
37
37
  end
38
38
  end
39
39
  end
@@ -55,7 +55,7 @@ module TypedData
55
55
  when Schema::UnionType
56
56
  ret << convert_union(subtype, value)
57
57
  else
58
- ret << type.coerce(value, formatter: union_type_key_formatter)
58
+ ret << subtype.coerce(value)
59
59
  end
60
60
  end
61
61
  end
@@ -75,16 +75,15 @@ module TypedData
75
75
  when Schema::UnionType
76
76
  value = convert_union(subtype, value)
77
77
  else
78
- value = type.coerce(value, formatter: union_type_key_formatter)
78
+ value = subtype.coerce(value)
79
79
  end
80
80
  ret << { "key" => key, "value" => value }
81
81
  end
82
82
  end
83
83
 
84
84
  # @param type [UnionType]
85
- # @param as_record_field [Boolean]
86
85
  # @param map [Object]
87
- def convert_union(type, value, as_record_field: false)
86
+ def convert_union(type, value)
88
87
  subtype = type.find_match(value)
89
88
  case subtype
90
89
  when Schema::ArrayType
@@ -95,15 +94,18 @@ module TypedData
95
94
  converted_value = convert_record(subtype, value)
96
95
  when Schema::UnionType
97
96
  converted_value = convert_union(subtype, value)
97
+ when Schema::NullType
98
+ converted_value = nil
98
99
  else
99
- return type.coerce(value, formatter: union_type_key_formatter)
100
+ converted_value = subtype.coerce(value)
100
101
  end
101
102
 
102
- if as_record_field
103
+ if type.nullable_single?
103
104
  converted_value
105
+ elsif subtype.is_a?(Schema::NullType)
106
+ {}
104
107
  else
105
- type.default_value(union_type_key_formatter)
106
- .merge!(union_type_key_formatter.call(subtype.to_s) => converted_value)
108
+ { union_type_key_formatter.call(subtype.to_s) => converted_value }
107
109
  end
108
110
  end
109
111
  end
@@ -5,17 +5,16 @@ require "typed_data/schema/bytes_type"
5
5
  require "typed_data/schema/enum_type"
6
6
  require "typed_data/schema/float_type"
7
7
  require "typed_data/schema/int_type"
8
+ require "typed_data/schema/long_type"
8
9
  require "typed_data/schema/map_type"
9
10
  require "typed_data/schema/null_type"
10
11
  require "typed_data/schema/record_type"
11
12
  require "typed_data/schema/string_type"
12
13
  require "typed_data/schema/union_type"
14
+ require "typed_data/schema/errors"
13
15
 
14
16
  module TypedData
15
17
  class Schema
16
- class UnknownField < StandardError; end
17
- class UnsupportedType < StandardError; end
18
-
19
18
  class << self
20
19
  def build_type(type, logical_type = nil)
21
20
  type = type.first if type.is_a?(Array) && type.size == 1
@@ -42,14 +41,16 @@ module TypedData
42
41
  values = type["values"] || type[:values]
43
42
  MapType.new(values.is_a?(Array) ? values : [values])
44
43
  when "record"
45
- RecordType.new(type["fields"] || type[:fields])
44
+ RecordType.new(type["name"] || type[:name], type["fields"] || type[:fields])
46
45
  else
47
46
  raise UnsupportedType, "Unknown type: #{subtype}"
48
47
  end
49
48
  when "boolean"
50
49
  BooleanType.new(type, logical_type)
51
- when "int", "long"
50
+ when "int"
52
51
  IntType.new(type, logical_type)
52
+ when "long"
53
+ LongType.new(type, logical_type)
53
54
  when "float", "double"
54
55
  FloatType.new(type, logical_type)
55
56
  when "bytes"
@@ -72,7 +73,7 @@ module TypedData
72
73
  if (schema["type"] || schema[:type]) != "record"
73
74
  raise UnsupportedType, 'The root type must be "record"'
74
75
  end
75
- @root_type = RecordType.new(schema["fields"] || schema[:fields])
76
+ @root_type = RecordType.new(schema["name"] || schema[:name], schema["fields"] || schema[:fields])
76
77
  end
77
78
  end
78
79
  end
@@ -15,10 +15,6 @@ module TypedData
15
15
  "array_#{@type}"
16
16
  end
17
17
 
18
- def coerce(value, formatter:)
19
- @type.coerce(value, formatter: formatter)
20
- end
21
-
22
18
  def primitive?
23
19
  false
24
20
  end
@@ -3,7 +3,7 @@
3
3
  module TypedData
4
4
  class Schema
5
5
  class BytesType < Type
6
- def coerce(value, formatter:)
6
+ def coerce(value)
7
7
  [value].pack("m0")
8
8
  end
9
9
 
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TypedData
4
+ class Schema
5
+ class UnknownField < StandardError; end
6
+ class UnsupportedType < StandardError; end
7
+ class InvalidValue < StandardError; end
8
+ end
9
+ end
@@ -3,6 +3,9 @@
3
3
  module TypedData
4
4
  class Schema
5
5
  class IntType < Type
6
+ VALUE_RANGE = -2**31 .. 2**31 - 1
7
+ SUPPORTED_LOGICAL_TYPES = %w[date time-millis]
8
+
6
9
  def to_s
7
10
  if @logical_type
8
11
  "#{@name}_#{@logical_type.gsub("-", "_")}"
@@ -11,18 +14,12 @@ module TypedData
11
14
  end
12
15
  end
13
16
 
14
- def coerce(value, formatter:)
17
+ def coerce(value)
15
18
  case @logical_type
16
19
  when "date"
17
20
  (Date.new(1970, 1, 1) + value).to_s
18
21
  when "time-millis"
19
22
  Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%T.%3N")
20
- when "time-micros"
21
- Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
22
- when "timestamp-millis"
23
- Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
24
- when "timestamp-micros"
25
- Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
26
23
  else
27
24
  value
28
25
  end
@@ -33,7 +30,7 @@ module TypedData
33
30
  end
34
31
 
35
32
  def match?(value)
36
- value.is_a?(Integer)
33
+ value.is_a?(Integer) && VALUE_RANGE.cover?(value)
37
34
  end
38
35
  end
39
36
  end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TypedData
4
+ class Schema
5
+ class LongType < Type
6
+ SUPPORTED_LOGICAL_TYPES = %w[time-micros timestamp-millis timestamp-micros]
7
+
8
+ def to_s
9
+ if @logical_type
10
+ "#{@name}_#{@logical_type.gsub("-", "_")}"
11
+ else
12
+ @name
13
+ end
14
+ end
15
+
16
+ def coerce(value)
17
+ case @logical_type
18
+ when "time-micros"
19
+ Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
20
+ when "timestamp-millis"
21
+ Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
22
+ when "timestamp-micros"
23
+ Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
24
+ else
25
+ value
26
+ end
27
+ end
28
+
29
+ def primitive?
30
+ true
31
+ end
32
+
33
+ def match?(value)
34
+ value.is_a?(Integer)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -12,10 +12,6 @@ module TypedData
12
12
  "map_#{@type}"
13
13
  end
14
14
 
15
- def coerce(value, formatter:)
16
- @type.coerce(value, formatter: formatter)
17
- end
18
-
19
15
  def primitive?
20
16
  false
21
17
  end
@@ -4,7 +4,8 @@ module TypedData
4
4
  class Schema
5
5
  class RecordType < Type
6
6
  # @param fields [Array] an array of "fields" in an Avro schema
7
- def initialize(fields)
7
+ def initialize(name, fields)
8
+ @name = name
8
9
  @field_to_type = fields.each_with_object({}) do |field, h|
9
10
  h[field["name"] || field[:name]] = Schema.build_type(field["type"] || field[:type])
10
11
  end
@@ -21,8 +22,12 @@ module TypedData
21
22
  end
22
23
  end
23
24
 
25
+ def find_match(value)
26
+ raise InvalidValue, %Q{the value #{value.inspect} doesn't match the type #{self}}
27
+ end
28
+
24
29
  def match?(value)
25
- value.is_a?(Hash)
30
+ value.is_a?(Hash) && value.all? { |k, v| @field_to_type[k]&.match?(v) }
26
31
  end
27
32
  end
28
33
  end
@@ -3,6 +3,8 @@
3
3
  module TypedData
4
4
  class Schema
5
5
  class StringType < Type
6
+ SUPPORTED_LOGICAL_TYPES = %w[uuid]
7
+
6
8
  def primitive?
7
9
  true
8
10
  end
@@ -1,10 +1,16 @@
1
1
  # frozen_string_literal: true
2
+ require "typed_data/schema/errors"
2
3
 
3
4
  module TypedData
4
5
  class Schema
5
6
  class Type
7
+ SUPPORTED_LOGICAL_TYPES = []
8
+
6
9
  def initialize(name, logical_type = nil)
7
10
  @name = name
11
+ if logical_type && !self.class::SUPPORTED_LOGICAL_TYPES.include?(logical_type)
12
+ raise UnsupportedType, %Q{#{name} doesn't support the logical type "#{logical_type}"}
13
+ end
8
14
  @logical_type = logical_type
9
15
  end
10
16
 
@@ -12,7 +18,7 @@ module TypedData
12
18
  @name
13
19
  end
14
20
 
15
- def coerce(value, formatter:)
21
+ def coerce(value)
16
22
  value
17
23
  end
18
24
 
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ require "typed_data/schema/errors"
2
3
 
3
4
  module TypedData
4
5
  class Schema
@@ -6,41 +7,29 @@ module TypedData
6
7
  # @param types [Array<String>]
7
8
  def initialize(types)
8
9
  @types = types.map(&Schema.method(:build_type))
9
- @nullable_primitive = @types.size == 2 && @types.any?(&:primitive?) && @types.any? { |t| t.is_a?(NullType) }
10
+ @nullable_single = @types.size == 2 && @types.any? { |t| t.is_a?(NullType) }
11
+ @nullable_primitive = @nullable_single && @types.any?(&:primitive?)
10
12
  end
11
13
 
12
14
  def to_s
13
15
  @nullable_primitive ? @types.first.to_s : "union_#{@types.map(&:to_s).join("_")}"
14
16
  end
15
17
 
16
- def coerce(value, formatter:)
17
- return value if @nullable_primitive
18
-
19
- type = find_match(value)
20
- if type.is_a?(NullType)
21
- default_value(formatter)
22
- else
23
- default_value(formatter).merge!(formatter.call(type.to_s) => type.coerce(value, formatter: formatter).to_s)
24
- end
25
- end
26
-
27
18
  def primitive?
28
19
  false
29
20
  end
30
21
 
31
22
  def find_match(value)
32
- @types.find { |t| t.match?(value) }
23
+ @types.find { |t| t.match?(value) } or
24
+ raise InvalidValue, %Q{the value #{value.inspect} doesn't match the type #{@types.map(&:to_s)}}
33
25
  end
34
26
 
35
27
  def match?(value)
36
28
  @types.any? { |t| t.match?(value) }
37
29
  end
38
30
 
39
- def default_value(formatter)
40
- @types.each_with_object({}) do |t, v|
41
- next if t.is_a?(NullType)
42
- v[formatter.call(t.to_s)] = t.primitive? || t.is_a?(EnumType) ? nil : []
43
- end
31
+ def nullable_single?
32
+ @nullable_single
44
33
  end
45
34
  end
46
35
  end
@@ -1,3 +1,3 @@
1
1
  module TypedData
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: typed_data
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - abicky
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-04-21 00:00:00.000000000 Z
11
+ date: 2021-08-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: avro
@@ -63,8 +63,10 @@ files:
63
63
  - lib/typed_data/schema/boolean_type.rb
64
64
  - lib/typed_data/schema/bytes_type.rb
65
65
  - lib/typed_data/schema/enum_type.rb
66
+ - lib/typed_data/schema/errors.rb
66
67
  - lib/typed_data/schema/float_type.rb
67
68
  - lib/typed_data/schema/int_type.rb
69
+ - lib/typed_data/schema/long_type.rb
68
70
  - lib/typed_data/schema/map_type.rb
69
71
  - lib/typed_data/schema/null_type.rb
70
72
  - lib/typed_data/schema/record_type.rb
@@ -94,7 +96,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
94
96
  - !ruby/object:Gem::Version
95
97
  version: '0'
96
98
  requirements: []
97
- rubygems_version: 3.1.2
99
+ rubygems_version: 3.1.4
98
100
  signing_key:
99
101
  specification_version: 4
100
102
  summary: A library that converts hash objects managed by an Avro schema