typed_data 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa8bfbb2363bcf13407a8bb1fdeed0099f16a5cda9296dd77489fb2b2d7bb024
4
- data.tar.gz: 7c446db80a65cc04d22584f38d53c313092432e71ba64321c22d43967a976f8a
3
+ metadata.gz: 483f78cdcfb329a0d14f4173fa586f64eae12c8f61dbd78492f1cd85cceadbea
4
+ data.tar.gz: 45173313046d5a6dc54d920ed5bee399e64fd8af2a2e710e2b5051b922fcc440
5
5
  SHA512:
6
- metadata.gz: 8e783163ba498a893cacff761e9d502acf0e524e8f57790cf3500ee8e29b2310133dfacd76f017d47cf81bde4f9f0550d4bc78523e7213e7dbd80adae8864215
7
- data.tar.gz: 31376d90ff56ab09b0a004b0d0bc8efaed10a6ec4d89cf8eee7c4fd2532c5f533507b96f29af365d6d413475f75d2be1a206d157ce3b19bc847ab24d2c52a17f
6
+ metadata.gz: 4041f615dedb5782ce40d15ae80eca87f3e168b988c2d2de4d9d07d0ba05b00bc2a6e8b14b981d65575ab9f2e38b0db50653286c3093ffaa8e7a586eb7a5e5d9
7
+ data.tar.gz: 04f2f8e8a86d63e8f5e06066b96e983332107f116694e059e0b06cd14ebb66827cd8bddc2e85c631298518d8387d28452edd856161b412c105f9a9d05d4151fa
data/README.md CHANGED
@@ -79,36 +79,42 @@ converter.convert({
79
79
  },
80
80
  })
81
81
  #=> {"int_field"=>1,
82
- # "int_or_string_field"=>{"int_value"=>nil, "string_value"=>"string"},
82
+ # "int_or_string_field"=>{"string_value"=>"string"},
83
83
  # "array_field"=>[1, 2],
84
- # "union_type_array_field"=>
85
- # [{"int_value"=>"1", "string_value"=>nil},
86
- # {"int_value"=>nil, "string_value"=>"2"}],
84
+ # "union_type_array_field"=>[{"int_value"=>"1"}, {"string_value"=>"2"}],
87
85
  # "nested_map_field"=>
88
86
  # [{"key"=>"nested_map",
89
87
  # "value"=>
90
- # [{"key"=>"key1", "value"=>{"int_value"=>"1", "string_value"=>nil}},
91
- # {"key"=>"key2", "value"=>{"int_value"=>nil, "string_value"=>"2"}}]}]}
88
+ # [{"key"=>"key1", "value"=>{"int_value"=>"1"}},
89
+ # {"key"=>"key2", "value"=>{"string_value"=>"2"}}]}]}
92
90
  ```
93
91
 
94
92
  You can specify a formatter for the union type keys. For example, the formatter for tables managed by [Google BigQuery Sink Connector](https://docs.confluent.io/current/connect/kafka-connect-bigquery/index.html) is like below:
95
93
 
96
94
  ```ruby
97
- schema = {
98
- "name" => "Record",
99
- "type" => "record",
100
- "fields" => [
101
- {
102
- "name" => "int_or_string_field",
103
- "type" => ["int", "string"],
104
- },
105
- ],
106
- }
107
-
108
95
  converter = TypedData::Converter.new(schema)
109
96
  converter.union_type_key_formatter = ->(type) { type.split("_").first }
110
- converter.convert({ "int_or_string_field" => "string" })
111
- #=> {"int_or_string_field"=>{"int"=>nil, "string"=>"string"}}
97
+ converter.convert({
98
+ "int_field" => 1,
99
+ "int_or_string_field" => "string",
100
+ "array_field" => [1, 2],
101
+ "union_type_array_field" => [1, "2"],
102
+ "nested_map_field" => {
103
+ "nested_map" => {
104
+ "key1" => 1,
105
+ "key2" => "2",
106
+ },
107
+ },
108
+ })
109
+ #=> {"int_field"=>1,
110
+ # "int_or_string_field"=>{"string"=>"string"},
111
+ # "array_field"=>[1, 2],
112
+ # "union_type_array_field"=>[{"int"=>"1"}, {"string"=>"2"}],
113
+ # "nested_map_field"=>
114
+ # [{"key"=>"nested_map",
115
+ # "value"=>
116
+ # [{"key"=>"key1", "value"=>{"int"=>"1"}},
117
+ # {"key"=>"key2", "value"=>{"string"=>"2"}}]}]}
112
118
  ```
113
119
 
114
120
 
@@ -31,9 +31,9 @@ module TypedData
31
31
  when Schema::RecordType
32
32
  converted[key] = convert_record(subtype, value)
33
33
  when Schema::UnionType
34
- converted[key] = convert_union(subtype, value, as_record_field: true)
34
+ converted[key] = convert_union(subtype, value)
35
35
  else
36
- converted[key] = subtype.coerce(value, formatter: union_type_key_formatter)
36
+ converted[key] = subtype.coerce(value)
37
37
  end
38
38
  end
39
39
  end
@@ -55,7 +55,7 @@ module TypedData
55
55
  when Schema::UnionType
56
56
  ret << convert_union(subtype, value)
57
57
  else
58
- ret << type.coerce(value, formatter: union_type_key_formatter)
58
+ ret << subtype.coerce(value)
59
59
  end
60
60
  end
61
61
  end
@@ -75,16 +75,15 @@ module TypedData
75
75
  when Schema::UnionType
76
76
  value = convert_union(subtype, value)
77
77
  else
78
- value = type.coerce(value, formatter: union_type_key_formatter)
78
+ value = subtype.coerce(value)
79
79
  end
80
80
  ret << { "key" => key, "value" => value }
81
81
  end
82
82
  end
83
83
 
84
84
  # @param type [UnionType]
85
- # @param as_record_field [Boolean]
86
85
  # @param map [Object]
87
- def convert_union(type, value, as_record_field: false)
86
+ def convert_union(type, value)
88
87
  subtype = type.find_match(value)
89
88
  case subtype
90
89
  when Schema::ArrayType
@@ -95,15 +94,18 @@ module TypedData
95
94
  converted_value = convert_record(subtype, value)
96
95
  when Schema::UnionType
97
96
  converted_value = convert_union(subtype, value)
97
+ when Schema::NullType
98
+ converted_value = nil
98
99
  else
99
- return type.coerce(value, formatter: union_type_key_formatter)
100
+ converted_value = subtype.coerce(value).to_s
100
101
  end
101
102
 
102
- if as_record_field
103
+ if type.nullable_single?
103
104
  converted_value
105
+ elsif subtype.is_a?(Schema::NullType)
106
+ {}
104
107
  else
105
- type.default_value(union_type_key_formatter)
106
- .merge!(union_type_key_formatter.call(subtype.to_s) => converted_value)
108
+ { union_type_key_formatter.call(subtype.to_s) => converted_value }
107
109
  end
108
110
  end
109
111
  end
@@ -5,6 +5,7 @@ require "typed_data/schema/bytes_type"
5
5
  require "typed_data/schema/enum_type"
6
6
  require "typed_data/schema/float_type"
7
7
  require "typed_data/schema/int_type"
8
+ require "typed_data/schema/long_type"
8
9
  require "typed_data/schema/map_type"
9
10
  require "typed_data/schema/null_type"
10
11
  require "typed_data/schema/record_type"
@@ -42,14 +43,16 @@ module TypedData
42
43
  values = type["values"] || type[:values]
43
44
  MapType.new(values.is_a?(Array) ? values : [values])
44
45
  when "record"
45
- RecordType.new(type["fields"] || type[:fields])
46
+ RecordType.new(type["name"] || type[:name], type["fields"] || type[:fields])
46
47
  else
47
48
  raise UnsupportedType, "Unknown type: #{subtype}"
48
49
  end
49
50
  when "boolean"
50
51
  BooleanType.new(type, logical_type)
51
- when "int", "long"
52
+ when "int"
52
53
  IntType.new(type, logical_type)
54
+ when "long"
55
+ LongType.new(type, logical_type)
53
56
  when "float", "double"
54
57
  FloatType.new(type, logical_type)
55
58
  when "bytes"
@@ -72,7 +75,7 @@ module TypedData
72
75
  if (schema["type"] || schema[:type]) != "record"
73
76
  raise UnsupportedType, 'The root type must be "record"'
74
77
  end
75
- @root_type = RecordType.new(schema["fields"] || schema[:fields])
78
+ @root_type = RecordType.new(schema["name"] || schema[:name], schema["fields"] || schema[:fields])
76
79
  end
77
80
  end
78
81
  end
@@ -15,10 +15,6 @@ module TypedData
15
15
  "array_#{@type}"
16
16
  end
17
17
 
18
- def coerce(value, formatter:)
19
- @type.coerce(value, formatter: formatter)
20
- end
21
-
22
18
  def primitive?
23
19
  false
24
20
  end
@@ -3,7 +3,7 @@
3
3
  module TypedData
4
4
  class Schema
5
5
  class BytesType < Type
6
- def coerce(value, formatter:)
6
+ def coerce(value)
7
7
  [value].pack("m0")
8
8
  end
9
9
 
@@ -3,6 +3,8 @@
3
3
  module TypedData
4
4
  class Schema
5
5
  class IntType < Type
6
+ VALUE_RANGE = -2**31 .. 2**31 - 1
7
+
6
8
  def to_s
7
9
  if @logical_type
8
10
  "#{@name}_#{@logical_type.gsub("-", "_")}"
@@ -11,18 +13,12 @@ module TypedData
11
13
  end
12
14
  end
13
15
 
14
- def coerce(value, formatter:)
16
+ def coerce(value)
15
17
  case @logical_type
16
18
  when "date"
17
19
  (Date.new(1970, 1, 1) + value).to_s
18
20
  when "time-millis"
19
21
  Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%T.%3N")
20
- when "time-micros"
21
- Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
22
- when "timestamp-millis"
23
- Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
24
- when "timestamp-micros"
25
- Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
26
22
  else
27
23
  value
28
24
  end
@@ -33,7 +29,7 @@ module TypedData
33
29
  end
34
30
 
35
31
  def match?(value)
36
- value.is_a?(Integer)
32
+ value.is_a?(Integer) && VALUE_RANGE.cover?(value)
37
33
  end
38
34
  end
39
35
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TypedData
4
+ class Schema
5
+ class LongType < Type
6
+ def to_s
7
+ if @logical_type
8
+ "#{@name}_#{@logical_type.gsub("-", "_")}"
9
+ else
10
+ @name
11
+ end
12
+ end
13
+
14
+ def coerce(value)
15
+ case @logical_type
16
+ when "time-micros"
17
+ Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%T.%6N")
18
+ when "timestamp-millis"
19
+ Time.at(value / 1_000, value % 1_000 * 1_000).utc.strftime("%F %T.%3N")
20
+ when "timestamp-micros"
21
+ Time.at(value / 1_000_000, value % 1_000_000).utc.strftime("%F %T.%6N")
22
+ else
23
+ value
24
+ end
25
+ end
26
+
27
+ def primitive?
28
+ true
29
+ end
30
+
31
+ def match?(value)
32
+ value.is_a?(Integer)
33
+ end
34
+ end
35
+ end
36
+ end
@@ -12,10 +12,6 @@ module TypedData
12
12
  "map_#{@type}"
13
13
  end
14
14
 
15
- def coerce(value, formatter:)
16
- @type.coerce(value, formatter: formatter)
17
- end
18
-
19
15
  def primitive?
20
16
  false
21
17
  end
@@ -4,7 +4,8 @@ module TypedData
4
4
  class Schema
5
5
  class RecordType < Type
6
6
  # @param fields [Array] an array of "fields" in an Avro schema
7
- def initialize(fields)
7
+ def initialize(name, fields)
8
+ @name = name
8
9
  @field_to_type = fields.each_with_object({}) do |field, h|
9
10
  h[field["name"] || field[:name]] = Schema.build_type(field["type"] || field[:type])
10
11
  end
@@ -22,7 +23,7 @@ module TypedData
22
23
  end
23
24
 
24
25
  def match?(value)
25
- value.is_a?(Hash)
26
+ value.is_a?(Hash) && value.all? { |k, v| @field_to_type[k]&.match?(v) }
26
27
  end
27
28
  end
28
29
  end
@@ -12,7 +12,7 @@ module TypedData
12
12
  @name
13
13
  end
14
14
 
15
- def coerce(value, formatter:)
15
+ def coerce(value)
16
16
  value
17
17
  end
18
18
 
@@ -6,24 +6,14 @@ module TypedData
6
6
  # @param types [Array<String>]
7
7
  def initialize(types)
8
8
  @types = types.map(&Schema.method(:build_type))
9
- @nullable_primitive = @types.size == 2 && @types.any?(&:primitive?) && @types.any? { |t| t.is_a?(NullType) }
9
+ @nullable_single = @types.size == 2 && @types.any? { |t| t.is_a?(NullType) }
10
+ @nullable_primitive = @nullable_single && @types.any?(&:primitive?)
10
11
  end
11
12
 
12
13
  def to_s
13
14
  @nullable_primitive ? @types.first.to_s : "union_#{@types.map(&:to_s).join("_")}"
14
15
  end
15
16
 
16
- def coerce(value, formatter:)
17
- return value if @nullable_primitive
18
-
19
- type = find_match(value)
20
- if type.is_a?(NullType)
21
- default_value(formatter)
22
- else
23
- default_value(formatter).merge!(formatter.call(type.to_s) => type.coerce(value, formatter: formatter).to_s)
24
- end
25
- end
26
-
27
17
  def primitive?
28
18
  false
29
19
  end
@@ -36,11 +26,8 @@ module TypedData
36
26
  @types.any? { |t| t.match?(value) }
37
27
  end
38
28
 
39
- def default_value(formatter)
40
- @types.each_with_object({}) do |t, v|
41
- next if t.is_a?(NullType)
42
- v[formatter.call(t.to_s)] = t.primitive? || t.is_a?(EnumType) ? nil : []
43
- end
29
+ def nullable_single?
30
+ @nullable_single
44
31
  end
45
32
  end
46
33
  end
@@ -1,3 +1,3 @@
1
1
  module TypedData
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: typed_data
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - abicky
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-04-21 00:00:00.000000000 Z
11
+ date: 2020-04-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: avro
@@ -65,6 +65,7 @@ files:
65
65
  - lib/typed_data/schema/enum_type.rb
66
66
  - lib/typed_data/schema/float_type.rb
67
67
  - lib/typed_data/schema/int_type.rb
68
+ - lib/typed_data/schema/long_type.rb
68
69
  - lib/typed_data/schema/map_type.rb
69
70
  - lib/typed_data/schema/null_type.rb
70
71
  - lib/typed_data/schema/record_type.rb