parqueteur 1.0.3 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Gemfile.lock +1 -1
- data/example.rb +24 -5
- data/lib/parqueteur/column.rb +3 -20
- data/lib/parqueteur/column_collection.rb +8 -0
- data/lib/parqueteur/converter.rb +22 -12
- data/lib/parqueteur/input.rb +2 -2
- data/lib/parqueteur/struct.rb +25 -0
- data/lib/parqueteur/type.rb +21 -0
- data/lib/parqueteur/type_resolver.rb +34 -48
- data/lib/parqueteur/types/array_type.rb +21 -0
- data/lib/parqueteur/types/boolean_type.rb +15 -0
- data/lib/parqueteur/types/int32_type.rb +25 -0
- data/lib/parqueteur/types/int64_type.rb +25 -0
- data/lib/parqueteur/types/map_type.rb +36 -0
- data/lib/parqueteur/types/string_type.rb +20 -0
- data/lib/parqueteur/types/struct_type.rb +35 -0
- data/lib/parqueteur/types/timestamp_type.rb +22 -0
- data/lib/parqueteur/version.rb +1 -1
- data/lib/parqueteur.rb +16 -4
- metadata +11 -2
- data/lib/parqueteur/value_array_builder.rb +0 -59
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90ffcadf5b78e4ffc3329eac9be1be34af9209a77a39a6395eefc1c56afa7ce6
|
4
|
+
data.tar.gz: 1d7f5257d3f86443e0d13b789d7449565198f6cda1563d111a36ad3728264044
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 262039094dd3aa5890f9d1a87d836eb64004c51fc57456d7880a36332295ac8a447fcd045c6bd2e053986ad4e9981021448b39e20010d0e41fef6b7e233d91ca
|
7
|
+
data.tar.gz: 73988e1f836acbe22e26c20b8559d8f79e9eeda55f9612fbefae7aa5bef131187b7a0716a3bd7804db5321b29b19f54b383e8594830814ae765d2c7b30986f59
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
data/example.rb
CHANGED
@@ -2,19 +2,38 @@ require 'bundler/setup'
|
|
2
2
|
require 'parqueteur'
|
3
3
|
|
4
4
|
class Foo < Parqueteur::Converter
|
5
|
-
column :id, :
|
5
|
+
column :id, :bigint
|
6
6
|
column :reference, :string
|
7
7
|
column :hash, :map, key: :string, value: :string
|
8
8
|
column :valid, :boolean
|
9
9
|
column :total, :integer
|
10
|
+
column :numbers, :array, elements: :integer
|
11
|
+
column :my_struct, :struct do
|
12
|
+
field :test, :string
|
13
|
+
field :mon_nombre, :integer
|
14
|
+
end
|
10
15
|
end
|
11
16
|
|
12
17
|
LETTERS = ('a'..'z').to_a
|
13
18
|
|
14
19
|
data = 1000.times.collect do |i|
|
15
|
-
{
|
20
|
+
{
|
21
|
+
'id' => i + 1,
|
22
|
+
'reference' => "coucou:#{i}",
|
23
|
+
'hash' => { 'a' => LETTERS.sample },
|
24
|
+
'valid' => rand < 0.5,
|
25
|
+
'total' => rand(100..500),
|
26
|
+
'numbers' => [1, 2, 3],
|
27
|
+
'my_struct' => {
|
28
|
+
'test' => 'super'
|
29
|
+
}
|
30
|
+
}
|
16
31
|
end
|
17
32
|
|
18
|
-
chunked_converter = Parqueteur::ChunkedConverter.new(data, Foo)
|
19
|
-
pp chunked_converter.write_files('test')
|
20
|
-
|
33
|
+
# chunked_converter = Parqueteur::ChunkedConverter.new(data, Foo)
|
34
|
+
# pp chunked_converter.write_files('test')
|
35
|
+
puts Foo.convert(data, output: 'tmp/test.parquet')
|
36
|
+
table = Arrow::Table.load('tmp/test.parquet')
|
37
|
+
table.each_record do |record|
|
38
|
+
puts record.to_h
|
39
|
+
end
|
data/lib/parqueteur/column.rb
CHANGED
@@ -4,31 +4,14 @@ module Parqueteur
|
|
4
4
|
class Column
|
5
5
|
attr_reader :name, :type, :options
|
6
6
|
|
7
|
-
def initialize(name, type, options = {})
|
7
|
+
def initialize(name, type, options = {}, &block)
|
8
8
|
@name = name.to_s
|
9
|
-
@type = type
|
9
|
+
@type = Parqueteur::TypeResolver.resolve(type, options, &block)
|
10
10
|
@options = options
|
11
11
|
end
|
12
12
|
|
13
13
|
def arrow_type
|
14
|
-
@arrow_type
|
15
|
-
end
|
16
|
-
|
17
|
-
def cast(value)
|
18
|
-
case @type
|
19
|
-
when :string then value.to_s
|
20
|
-
when :boolean then value == true
|
21
|
-
when :integer then value.to_i
|
22
|
-
when :long then value.to_i
|
23
|
-
when :timestamp
|
24
|
-
case value
|
25
|
-
when String then Time.parse(value).to_i
|
26
|
-
when Integer then value
|
27
|
-
else
|
28
|
-
raise ArgumentError, "Unable to cast '#{value}' to timestamp"
|
29
|
-
end
|
30
|
-
when :map then value
|
31
|
-
end
|
14
|
+
@type.arrow_type
|
32
15
|
end
|
33
16
|
|
34
17
|
def to_arrow_field
|
@@ -4,11 +4,18 @@ module Parqueteur
|
|
4
4
|
class ColumnCollection
|
5
5
|
include Enumerable
|
6
6
|
|
7
|
+
attr_reader :column_names
|
8
|
+
|
7
9
|
def initialize
|
8
10
|
@columns = []
|
11
|
+
@column_names = []
|
9
12
|
@columns_idx = {}
|
10
13
|
end
|
11
14
|
|
15
|
+
def key?(key)
|
16
|
+
@columns_idx.key?(key)
|
17
|
+
end
|
18
|
+
|
12
19
|
def each(&block)
|
13
20
|
@columns.each(&block)
|
14
21
|
end
|
@@ -17,6 +24,7 @@ module Parqueteur
|
|
17
24
|
unless @columns_idx.key?(column.name)
|
18
25
|
@columns_idx[column.name] = column
|
19
26
|
@columns << column
|
27
|
+
@column_names << column.name
|
20
28
|
end
|
21
29
|
|
22
30
|
true
|
data/lib/parqueteur/converter.rb
CHANGED
@@ -12,8 +12,8 @@ module Parqueteur
|
|
12
12
|
@columns ||= Parqueteur::ColumnCollection.new
|
13
13
|
end
|
14
14
|
|
15
|
-
def self.column(name, type, options = {})
|
16
|
-
columns.add(Parqueteur::Column.new(name, type, options))
|
15
|
+
def self.column(name, type, options = {}, &block)
|
16
|
+
columns.add(Parqueteur::Column.new(name, type, options, &block))
|
17
17
|
end
|
18
18
|
|
19
19
|
def self.transforms
|
@@ -69,7 +69,10 @@ module Parqueteur
|
|
69
69
|
def to_arrow_table
|
70
70
|
transforms = self.class.transforms
|
71
71
|
|
72
|
-
chunks = {}
|
72
|
+
chunks = self.class.columns.each_with_object({}) do |column, hash|
|
73
|
+
hash[column.name] = []
|
74
|
+
end
|
75
|
+
items_count = 0
|
73
76
|
@input.each_slice(100) do |items|
|
74
77
|
values = self.class.columns.each_with_object({}) do |column, hash|
|
75
78
|
hash[column.name] = []
|
@@ -98,20 +101,27 @@ module Parqueteur
|
|
98
101
|
|
99
102
|
values.each_with_object(chunks) do |item, hash|
|
100
103
|
column = self.class.columns.find(item[0])
|
101
|
-
hash[item[0]] ||= []
|
102
104
|
hash[item[0]].push(
|
103
|
-
|
104
|
-
item[1], column.type, column.options
|
105
|
-
)
|
105
|
+
column.type.build_value_array(item[1])
|
106
106
|
)
|
107
107
|
end
|
108
|
+
|
109
|
+
items_count += items.length
|
108
110
|
end
|
109
111
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
112
|
+
if items_count > 0
|
113
|
+
Arrow::Table.new(
|
114
|
+
chunks.transform_values! do |value|
|
115
|
+
Arrow::ChunkedArray.new(value)
|
116
|
+
end
|
117
|
+
)
|
118
|
+
else
|
119
|
+
Arrow::Table.new(
|
120
|
+
self.class.columns.each_with_object({}) do |column, hash|
|
121
|
+
hash[column.name] = column.type.build_value_array([])
|
122
|
+
end
|
123
|
+
)
|
124
|
+
end
|
115
125
|
end
|
116
126
|
end
|
117
127
|
end
|
data/lib/parqueteur/input.rb
CHANGED
@@ -13,7 +13,7 @@ module Parqueteur
|
|
13
13
|
else
|
14
14
|
arg.split("\n")
|
15
15
|
end
|
16
|
-
when
|
16
|
+
when Enumerable
|
17
17
|
arg
|
18
18
|
end,
|
19
19
|
options
|
@@ -36,7 +36,7 @@ module Parqueteur
|
|
36
36
|
JSON.parse(@source.read).each(&block)
|
37
37
|
end
|
38
38
|
@source.rewind
|
39
|
-
when
|
39
|
+
when Enumerable
|
40
40
|
@source.each(&block)
|
41
41
|
end
|
42
42
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Parqueteur
|
4
|
+
class Struct
|
5
|
+
def initialize(&block)
|
6
|
+
instance_exec(&block)
|
7
|
+
end
|
8
|
+
|
9
|
+
def fields
|
10
|
+
@fields ||= Parqueteur::ColumnCollection.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def field(name, type, options = {}, &block)
|
14
|
+
fields.add(Parqueteur::Column.new(name, type, options, &block))
|
15
|
+
end
|
16
|
+
|
17
|
+
def key?(key)
|
18
|
+
fields.key?(key)
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_arrow_type
|
22
|
+
fields.collect(&:to_arrow_field)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Parqueteur
|
4
|
+
class Type
|
5
|
+
attr_reader :options, :arrow_type
|
6
|
+
|
7
|
+
def initialize(options = {}, &block)
|
8
|
+
@options = options
|
9
|
+
@block = block
|
10
|
+
@arrow_type = arrow_type_builder
|
11
|
+
end
|
12
|
+
|
13
|
+
def build_value_array(values)
|
14
|
+
raise "#to_arrow_field must be implemented in #{self.class}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def resolve(type, options = {})
|
18
|
+
Parqueteur::TypeResolver.resolve(type, options)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -2,60 +2,46 @@
|
|
2
2
|
|
3
3
|
module Parqueteur
|
4
4
|
class TypeResolver
|
5
|
-
|
6
|
-
|
5
|
+
include Singleton
|
6
|
+
|
7
|
+
def self.registered_types
|
8
|
+
@registered_types ||= {
|
9
|
+
array: Parqueteur::Types::ArrayType,
|
10
|
+
bigint: Parqueteur::Types::Int64Type,
|
11
|
+
boolean: Parqueteur::Types::BooleanType,
|
12
|
+
int32: Parqueteur::Types::Int32Type,
|
13
|
+
int64: Parqueteur::Types::Int64Type,
|
14
|
+
integer: Parqueteur::Types::Int32Type,
|
15
|
+
map: Parqueteur::Types::MapType,
|
16
|
+
string: Parqueteur::Types::StringType,
|
17
|
+
struct: Parqueteur::Types::StructType,
|
18
|
+
timestamp: Parqueteur::Types::TimestampType
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.register_type(type, klass)
|
23
|
+
registered_types[type] = klass
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.resolve(*args, &block)
|
27
|
+
instance.resolve(*args, &block)
|
7
28
|
end
|
8
29
|
|
9
|
-
def resolve(type, options = {})
|
10
|
-
|
11
|
-
|
12
|
-
elements_opt = options.fetch(:elements)
|
13
|
-
Arrow::ListDataType.new(
|
14
|
-
if elements_opt.is_a?(Hash)
|
15
|
-
resolve(elements_opt.fetch(:type), elements_opt)
|
16
|
-
else
|
17
|
-
resolve(elements_opt)
|
18
|
-
end
|
19
|
-
)
|
20
|
-
when :boolean
|
21
|
-
Arrow::BooleanDataType.new
|
22
|
-
when :integer
|
23
|
-
if options.fetch(:unsigned, false) == true
|
24
|
-
Arrow::UInt32DataType.new
|
25
|
-
else
|
26
|
-
Arrow::Int32DataType.new
|
27
|
-
end
|
28
|
-
when :long
|
29
|
-
if options.fetch(:unsigned, false) == true
|
30
|
-
Arrow::UInt64DataType.new
|
31
|
-
else
|
32
|
-
Arrow::Int64DataType.new
|
33
|
-
end
|
34
|
-
when :timestamp
|
35
|
-
Arrow::TimestampDataType.new(
|
36
|
-
options.fetch(:unit, :second)
|
37
|
-
)
|
38
|
-
when :string
|
39
|
-
Arrow::StringDataType.new
|
40
|
-
when :map
|
41
|
-
map_value = options.fetch(:value)
|
42
|
-
Arrow::MapDataType.new(
|
43
|
-
resolve(options.fetch(:key)),
|
44
|
-
if map_value.is_a?(Hash)
|
45
|
-
resolve(map_value.fetch(:type), map_value)
|
46
|
-
else
|
47
|
-
resolve(map_value)
|
48
|
-
end
|
49
|
-
)
|
30
|
+
def resolve(type, options = {}, &block)
|
31
|
+
if type.is_a?(Symbol)
|
32
|
+
resolve_from_symbol(type, options, &block)
|
50
33
|
else
|
51
|
-
|
34
|
+
type.new(options, &block)
|
52
35
|
end
|
53
36
|
end
|
54
|
-
end
|
55
|
-
end
|
56
37
|
|
57
|
-
private
|
38
|
+
private
|
58
39
|
|
59
|
-
def
|
40
|
+
def resolve_from_symbol(type, options, &block)
|
41
|
+
type_klass = self.class.registered_types.fetch(type.to_sym, nil)
|
42
|
+
raise Parqueteur::TypeNotFound, type if type_klass.nil?
|
60
43
|
|
44
|
+
type_klass.new(options, &block)
|
45
|
+
end
|
46
|
+
end
|
61
47
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Parqueteur
|
4
|
+
module Types
|
5
|
+
class ArrayType < Parqueteur::Type
|
6
|
+
def build_value_array(values)
|
7
|
+
Arrow::ListArrayBuilder.build(arrow_type, values)
|
8
|
+
end
|
9
|
+
|
10
|
+
def arrow_type_builder
|
11
|
+
Arrow::ListDataType.new(
|
12
|
+
if options[:elements].is_a?(Hash)
|
13
|
+
resolve(options[:elements].fetch(:type), options[:elements]).arrow_type
|
14
|
+
else
|
15
|
+
resolve(options[:elements]).arrow_type
|
16
|
+
end
|
17
|
+
)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Parqueteur
|
4
|
+
module Types
|
5
|
+
class BooleanType < Parqueteur::Type
|
6
|
+
def build_value_array(values)
|
7
|
+
Arrow::BooleanArray.new(values)
|
8
|
+
end
|
9
|
+
|
10
|
+
def arrow_type_builder
|
11
|
+
Arrow::BooleanDataType.new
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Parqueteur
|
4
|
+
module Types
|
5
|
+
class Int32Type < Parqueteur::Type
|
6
|
+
def build_value_array(values)
|
7
|
+
if options.fetch(:unsigned, false) == true
|
8
|
+
Arrow::UInt32Array.new(values)
|
9
|
+
else
|
10
|
+
Arrow::Int32Array.new(values)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def arrow_type_builder
|
15
|
+
if options.fetch(:unsigned, false) == true
|
16
|
+
Arrow::UInt32DataType.new
|
17
|
+
else
|
18
|
+
Arrow::Int32DataType.new
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# when :integer
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Parqueteur
|
4
|
+
module Types
|
5
|
+
class Int64Type < Parqueteur::Type
|
6
|
+
def build_value_array(values)
|
7
|
+
if options.fetch(:unsigned, false) == true
|
8
|
+
Arrow::UInt64Array.new(values)
|
9
|
+
else
|
10
|
+
Arrow::Int64Array.new(values)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def arrow_type_builder
|
15
|
+
if options.fetch(:unsigned, false) == true
|
16
|
+
Arrow::UInt64DataType.new
|
17
|
+
else
|
18
|
+
Arrow::Int64DataType.new
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# when :integer
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Parqueteur
|
4
|
+
module Types
|
5
|
+
class MapType < Parqueteur::Type
|
6
|
+
def build_value_array(values)
|
7
|
+
builder = Arrow::MapArrayBuilder.new(arrow_type)
|
8
|
+
values.each do |entry|
|
9
|
+
builder.append_value
|
10
|
+
next if entry.nil?
|
11
|
+
|
12
|
+
entry.each do |k, v|
|
13
|
+
builder.key_builder.append(k)
|
14
|
+
builder.item_builder.append(v)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
builder.finish
|
19
|
+
end
|
20
|
+
|
21
|
+
def arrow_type_builder
|
22
|
+
map_value = options.fetch(:value)
|
23
|
+
|
24
|
+
Arrow::MapDataType.new(
|
25
|
+
resolve(options.fetch(:key)).arrow_type,
|
26
|
+
if map_value.is_a?(Hash)
|
27
|
+
resolve(map_value.fetch(:type), map_value).arrow_type
|
28
|
+
else
|
29
|
+
resolve(map_value).arrow_type
|
30
|
+
end
|
31
|
+
)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# when :timestamp
|
4
|
+
# Arrow::TimestampDataType.new(
|
5
|
+
# options.fetch(:unit, :second)
|
6
|
+
# )
|
7
|
+
|
8
|
+
module Parqueteur
|
9
|
+
module Types
|
10
|
+
class StringType < Parqueteur::Type
|
11
|
+
def build_value_array(values)
|
12
|
+
Arrow::StringArray.new(values)
|
13
|
+
end
|
14
|
+
|
15
|
+
def arrow_type_builder
|
16
|
+
Arrow::StringDataType.new
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# when :timestamp
|
4
|
+
# Arrow::TimestampDataType.new(
|
5
|
+
# options.fetch(:unit, :second)
|
6
|
+
# )
|
7
|
+
|
8
|
+
module Parqueteur
|
9
|
+
module Types
|
10
|
+
class StructType < Parqueteur::Type
|
11
|
+
def build_value_array(values)
|
12
|
+
values.each do |value|
|
13
|
+
next if value.nil?
|
14
|
+
|
15
|
+
value.each_key do |key|
|
16
|
+
next if struct_object.key?(key)
|
17
|
+
|
18
|
+
raise Parqueteur::Error, "Struct field '#{key}' not found"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
Arrow::StructArrayBuilder.build(arrow_type, values)
|
22
|
+
end
|
23
|
+
|
24
|
+
def arrow_type_builder
|
25
|
+
Arrow::StructDataType.new(struct_object.to_arrow_type)
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def struct_object
|
31
|
+
@struct_object ||= Parqueteur::Struct.new(&@block)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# when :timestamp
|
4
|
+
# Arrow::TimestampDataType.new(
|
5
|
+
# options.fetch(:unit, :second)
|
6
|
+
# )
|
7
|
+
|
8
|
+
module Parqueteur
|
9
|
+
module Types
|
10
|
+
class TimestampType < Parqueteur::Type
|
11
|
+
def build_value_array(values)
|
12
|
+
Arrow::TimestampArray.new(values)
|
13
|
+
end
|
14
|
+
|
15
|
+
def arrow_type_builder
|
16
|
+
Arrow::TimestampDataType.new(
|
17
|
+
options.fetch(:unit, :second)
|
18
|
+
)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/parqueteur/version.rb
CHANGED
data/lib/parqueteur.rb
CHANGED
@@ -1,17 +1,29 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'json'
|
4
|
+
require 'singleton'
|
5
|
+
|
3
6
|
require_relative "parqueteur/version"
|
4
|
-
require 'parqueteur/
|
7
|
+
require 'parqueteur/chunked_converter'
|
5
8
|
require 'parqueteur/column'
|
6
9
|
require 'parqueteur/column_collection'
|
7
10
|
require 'parqueteur/converter'
|
8
|
-
require 'parqueteur/chunked_converter'
|
9
11
|
require 'parqueteur/input'
|
10
|
-
require 'parqueteur/
|
11
|
-
require '
|
12
|
+
require 'parqueteur/struct'
|
13
|
+
require 'parqueteur/type'
|
14
|
+
require 'parqueteur/type_resolver'
|
15
|
+
require 'parqueteur/types/array_type'
|
16
|
+
require 'parqueteur/types/boolean_type'
|
17
|
+
require 'parqueteur/types/int32_type'
|
18
|
+
require 'parqueteur/types/int64_type'
|
19
|
+
require 'parqueteur/types/map_type'
|
20
|
+
require 'parqueteur/types/string_type'
|
21
|
+
require 'parqueteur/types/struct_type'
|
22
|
+
require 'parqueteur/types/timestamp_type'
|
12
23
|
require 'parquet'
|
13
24
|
|
14
25
|
module Parqueteur
|
15
26
|
class Error < StandardError; end
|
27
|
+
class TypeNotFound < Error; end
|
16
28
|
# Your code goes here...
|
17
29
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parqueteur
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julien D.
|
@@ -45,8 +45,17 @@ files:
|
|
45
45
|
- lib/parqueteur/column_collection.rb
|
46
46
|
- lib/parqueteur/converter.rb
|
47
47
|
- lib/parqueteur/input.rb
|
48
|
+
- lib/parqueteur/struct.rb
|
49
|
+
- lib/parqueteur/type.rb
|
48
50
|
- lib/parqueteur/type_resolver.rb
|
49
|
-
- lib/parqueteur/
|
51
|
+
- lib/parqueteur/types/array_type.rb
|
52
|
+
- lib/parqueteur/types/boolean_type.rb
|
53
|
+
- lib/parqueteur/types/int32_type.rb
|
54
|
+
- lib/parqueteur/types/int64_type.rb
|
55
|
+
- lib/parqueteur/types/map_type.rb
|
56
|
+
- lib/parqueteur/types/string_type.rb
|
57
|
+
- lib/parqueteur/types/struct_type.rb
|
58
|
+
- lib/parqueteur/types/timestamp_type.rb
|
50
59
|
- lib/parqueteur/version.rb
|
51
60
|
- parqueteur.gemspec
|
52
61
|
- test.json
|
@@ -1,59 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Parqueteur
|
4
|
-
class ValueArrayBuilder
|
5
|
-
attr_reader :type, :options, :arrow_type
|
6
|
-
|
7
|
-
def self.build(input, type, options)
|
8
|
-
new(type, options).build(input)
|
9
|
-
end
|
10
|
-
|
11
|
-
def initialize(type, options)
|
12
|
-
@type = type
|
13
|
-
@options = options
|
14
|
-
@arrow_type = Parqueteur::TypeResolver.resolve(type, options)
|
15
|
-
end
|
16
|
-
|
17
|
-
def build(input)
|
18
|
-
return if input.nil?
|
19
|
-
|
20
|
-
case type
|
21
|
-
when :array
|
22
|
-
Arrow::ListArrayBuilder.build(arrow_type, input)
|
23
|
-
when :map
|
24
|
-
builder = Arrow::MapArrayBuilder.new(arrow_type)
|
25
|
-
input.each do |entry|
|
26
|
-
builder.append_value
|
27
|
-
next if entry.nil?
|
28
|
-
|
29
|
-
entry.each do |k, v|
|
30
|
-
builder.key_builder.append(k)
|
31
|
-
builder.item_builder.append(v)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
builder.finish
|
36
|
-
when :boolean
|
37
|
-
Arrow::BooleanArray.new(input)
|
38
|
-
when :integer
|
39
|
-
if options.fetch(:unsigned, false) == true
|
40
|
-
Arrow::UInt32Array.new(input)
|
41
|
-
else
|
42
|
-
Arrow::Int32Array.new(input)
|
43
|
-
end
|
44
|
-
when :long
|
45
|
-
if options.fetch(:unsigned, false) == true
|
46
|
-
Arrow::UInt64Array.new(input)
|
47
|
-
else
|
48
|
-
Arrow::Int64Array.new(input)
|
49
|
-
end
|
50
|
-
when :string
|
51
|
-
Arrow::StringArray.new(input)
|
52
|
-
when :timestamp
|
53
|
-
Arrow::TimestampArray.new(input)
|
54
|
-
else
|
55
|
-
raise Error, "unknown type: #{type}"
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|