parqueteur 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Gemfile.lock +1 -1
- data/example.rb +24 -5
- data/lib/parqueteur/column.rb +3 -20
- data/lib/parqueteur/column_collection.rb +8 -0
- data/lib/parqueteur/converter.rb +22 -12
- data/lib/parqueteur/input.rb +2 -2
- data/lib/parqueteur/struct.rb +25 -0
- data/lib/parqueteur/type.rb +21 -0
- data/lib/parqueteur/type_resolver.rb +34 -48
- data/lib/parqueteur/types/array_type.rb +21 -0
- data/lib/parqueteur/types/boolean_type.rb +15 -0
- data/lib/parqueteur/types/int32_type.rb +25 -0
- data/lib/parqueteur/types/int64_type.rb +25 -0
- data/lib/parqueteur/types/map_type.rb +36 -0
- data/lib/parqueteur/types/string_type.rb +20 -0
- data/lib/parqueteur/types/struct_type.rb +35 -0
- data/lib/parqueteur/types/timestamp_type.rb +22 -0
- data/lib/parqueteur/version.rb +1 -1
- data/lib/parqueteur.rb +16 -4
- metadata +11 -2
- data/lib/parqueteur/value_array_builder.rb +0 -59
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 90ffcadf5b78e4ffc3329eac9be1be34af9209a77a39a6395eefc1c56afa7ce6
|
|
4
|
+
data.tar.gz: 1d7f5257d3f86443e0d13b789d7449565198f6cda1563d111a36ad3728264044
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 262039094dd3aa5890f9d1a87d836eb64004c51fc57456d7880a36332295ac8a447fcd045c6bd2e053986ad4e9981021448b39e20010d0e41fef6b7e233d91ca
|
|
7
|
+
data.tar.gz: 73988e1f836acbe22e26c20b8559d8f79e9eeda55f9612fbefae7aa5bef131187b7a0716a3bd7804db5321b29b19f54b383e8594830814ae765d2c7b30986f59
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
data/example.rb
CHANGED
|
@@ -2,19 +2,38 @@ require 'bundler/setup'
|
|
|
2
2
|
require 'parqueteur'
|
|
3
3
|
|
|
4
4
|
class Foo < Parqueteur::Converter
|
|
5
|
-
column :id, :
|
|
5
|
+
column :id, :bigint
|
|
6
6
|
column :reference, :string
|
|
7
7
|
column :hash, :map, key: :string, value: :string
|
|
8
8
|
column :valid, :boolean
|
|
9
9
|
column :total, :integer
|
|
10
|
+
column :numbers, :array, elements: :integer
|
|
11
|
+
column :my_struct, :struct do
|
|
12
|
+
field :test, :string
|
|
13
|
+
field :mon_nombre, :integer
|
|
14
|
+
end
|
|
10
15
|
end
|
|
11
16
|
|
|
12
17
|
LETTERS = ('a'..'z').to_a
|
|
13
18
|
|
|
14
19
|
data = 1000.times.collect do |i|
|
|
15
|
-
{
|
|
20
|
+
{
|
|
21
|
+
'id' => i + 1,
|
|
22
|
+
'reference' => "coucou:#{i}",
|
|
23
|
+
'hash' => { 'a' => LETTERS.sample },
|
|
24
|
+
'valid' => rand < 0.5,
|
|
25
|
+
'total' => rand(100..500),
|
|
26
|
+
'numbers' => [1, 2, 3],
|
|
27
|
+
'my_struct' => {
|
|
28
|
+
'test' => 'super'
|
|
29
|
+
}
|
|
30
|
+
}
|
|
16
31
|
end
|
|
17
32
|
|
|
18
|
-
chunked_converter = Parqueteur::ChunkedConverter.new(data, Foo)
|
|
19
|
-
pp chunked_converter.write_files('test')
|
|
20
|
-
|
|
33
|
+
# chunked_converter = Parqueteur::ChunkedConverter.new(data, Foo)
|
|
34
|
+
# pp chunked_converter.write_files('test')
|
|
35
|
+
puts Foo.convert(data, output: 'tmp/test.parquet')
|
|
36
|
+
table = Arrow::Table.load('tmp/test.parquet')
|
|
37
|
+
table.each_record do |record|
|
|
38
|
+
puts record.to_h
|
|
39
|
+
end
|
data/lib/parqueteur/column.rb
CHANGED
|
@@ -4,31 +4,14 @@ module Parqueteur
|
|
|
4
4
|
class Column
|
|
5
5
|
attr_reader :name, :type, :options
|
|
6
6
|
|
|
7
|
-
def initialize(name, type, options = {})
|
|
7
|
+
def initialize(name, type, options = {}, &block)
|
|
8
8
|
@name = name.to_s
|
|
9
|
-
@type = type
|
|
9
|
+
@type = Parqueteur::TypeResolver.resolve(type, options, &block)
|
|
10
10
|
@options = options
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
def arrow_type
|
|
14
|
-
@arrow_type
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
def cast(value)
|
|
18
|
-
case @type
|
|
19
|
-
when :string then value.to_s
|
|
20
|
-
when :boolean then value == true
|
|
21
|
-
when :integer then value.to_i
|
|
22
|
-
when :long then value.to_i
|
|
23
|
-
when :timestamp
|
|
24
|
-
case value
|
|
25
|
-
when String then Time.parse(value).to_i
|
|
26
|
-
when Integer then value
|
|
27
|
-
else
|
|
28
|
-
raise ArgumentError, "Unable to cast '#{value}' to timestamp"
|
|
29
|
-
end
|
|
30
|
-
when :map then value
|
|
31
|
-
end
|
|
14
|
+
@type.arrow_type
|
|
32
15
|
end
|
|
33
16
|
|
|
34
17
|
def to_arrow_field
|
|
@@ -4,11 +4,18 @@ module Parqueteur
|
|
|
4
4
|
class ColumnCollection
|
|
5
5
|
include Enumerable
|
|
6
6
|
|
|
7
|
+
attr_reader :column_names
|
|
8
|
+
|
|
7
9
|
def initialize
|
|
8
10
|
@columns = []
|
|
11
|
+
@column_names = []
|
|
9
12
|
@columns_idx = {}
|
|
10
13
|
end
|
|
11
14
|
|
|
15
|
+
def key?(key)
|
|
16
|
+
@columns_idx.key?(key)
|
|
17
|
+
end
|
|
18
|
+
|
|
12
19
|
def each(&block)
|
|
13
20
|
@columns.each(&block)
|
|
14
21
|
end
|
|
@@ -17,6 +24,7 @@ module Parqueteur
|
|
|
17
24
|
unless @columns_idx.key?(column.name)
|
|
18
25
|
@columns_idx[column.name] = column
|
|
19
26
|
@columns << column
|
|
27
|
+
@column_names << column.name
|
|
20
28
|
end
|
|
21
29
|
|
|
22
30
|
true
|
data/lib/parqueteur/converter.rb
CHANGED
|
@@ -12,8 +12,8 @@ module Parqueteur
|
|
|
12
12
|
@columns ||= Parqueteur::ColumnCollection.new
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
-
def self.column(name, type, options = {})
|
|
16
|
-
columns.add(Parqueteur::Column.new(name, type, options))
|
|
15
|
+
def self.column(name, type, options = {}, &block)
|
|
16
|
+
columns.add(Parqueteur::Column.new(name, type, options, &block))
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
def self.transforms
|
|
@@ -69,7 +69,10 @@ module Parqueteur
|
|
|
69
69
|
def to_arrow_table
|
|
70
70
|
transforms = self.class.transforms
|
|
71
71
|
|
|
72
|
-
chunks = {}
|
|
72
|
+
chunks = self.class.columns.each_with_object({}) do |column, hash|
|
|
73
|
+
hash[column.name] = []
|
|
74
|
+
end
|
|
75
|
+
items_count = 0
|
|
73
76
|
@input.each_slice(100) do |items|
|
|
74
77
|
values = self.class.columns.each_with_object({}) do |column, hash|
|
|
75
78
|
hash[column.name] = []
|
|
@@ -98,20 +101,27 @@ module Parqueteur
|
|
|
98
101
|
|
|
99
102
|
values.each_with_object(chunks) do |item, hash|
|
|
100
103
|
column = self.class.columns.find(item[0])
|
|
101
|
-
hash[item[0]] ||= []
|
|
102
104
|
hash[item[0]].push(
|
|
103
|
-
|
|
104
|
-
item[1], column.type, column.options
|
|
105
|
-
)
|
|
105
|
+
column.type.build_value_array(item[1])
|
|
106
106
|
)
|
|
107
107
|
end
|
|
108
|
+
|
|
109
|
+
items_count += items.length
|
|
108
110
|
end
|
|
109
111
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
112
|
+
if items_count > 0
|
|
113
|
+
Arrow::Table.new(
|
|
114
|
+
chunks.transform_values! do |value|
|
|
115
|
+
Arrow::ChunkedArray.new(value)
|
|
116
|
+
end
|
|
117
|
+
)
|
|
118
|
+
else
|
|
119
|
+
Arrow::Table.new(
|
|
120
|
+
self.class.columns.each_with_object({}) do |column, hash|
|
|
121
|
+
hash[column.name] = column.type.build_value_array([])
|
|
122
|
+
end
|
|
123
|
+
)
|
|
124
|
+
end
|
|
115
125
|
end
|
|
116
126
|
end
|
|
117
127
|
end
|
data/lib/parqueteur/input.rb
CHANGED
|
@@ -13,7 +13,7 @@ module Parqueteur
|
|
|
13
13
|
else
|
|
14
14
|
arg.split("\n")
|
|
15
15
|
end
|
|
16
|
-
when
|
|
16
|
+
when Enumerable
|
|
17
17
|
arg
|
|
18
18
|
end,
|
|
19
19
|
options
|
|
@@ -36,7 +36,7 @@ module Parqueteur
|
|
|
36
36
|
JSON.parse(@source.read).each(&block)
|
|
37
37
|
end
|
|
38
38
|
@source.rewind
|
|
39
|
-
when
|
|
39
|
+
when Enumerable
|
|
40
40
|
@source.each(&block)
|
|
41
41
|
end
|
|
42
42
|
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parqueteur
|
|
4
|
+
class Struct
|
|
5
|
+
def initialize(&block)
|
|
6
|
+
instance_exec(&block)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def fields
|
|
10
|
+
@fields ||= Parqueteur::ColumnCollection.new
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def field(name, type, options = {}, &block)
|
|
14
|
+
fields.add(Parqueteur::Column.new(name, type, options, &block))
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def key?(key)
|
|
18
|
+
fields.key?(key)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def to_arrow_type
|
|
22
|
+
fields.collect(&:to_arrow_field)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parqueteur
|
|
4
|
+
class Type
|
|
5
|
+
attr_reader :options, :arrow_type
|
|
6
|
+
|
|
7
|
+
def initialize(options = {}, &block)
|
|
8
|
+
@options = options
|
|
9
|
+
@block = block
|
|
10
|
+
@arrow_type = arrow_type_builder
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def build_value_array(values)
|
|
14
|
+
raise "#to_arrow_field must be implemented in #{self.class}"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def resolve(type, options = {})
|
|
18
|
+
Parqueteur::TypeResolver.resolve(type, options)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -2,60 +2,46 @@
|
|
|
2
2
|
|
|
3
3
|
module Parqueteur
|
|
4
4
|
class TypeResolver
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
include Singleton
|
|
6
|
+
|
|
7
|
+
def self.registered_types
|
|
8
|
+
@registered_types ||= {
|
|
9
|
+
array: Parqueteur::Types::ArrayType,
|
|
10
|
+
bigint: Parqueteur::Types::Int64Type,
|
|
11
|
+
boolean: Parqueteur::Types::BooleanType,
|
|
12
|
+
int32: Parqueteur::Types::Int32Type,
|
|
13
|
+
int64: Parqueteur::Types::Int64Type,
|
|
14
|
+
integer: Parqueteur::Types::Int32Type,
|
|
15
|
+
map: Parqueteur::Types::MapType,
|
|
16
|
+
string: Parqueteur::Types::StringType,
|
|
17
|
+
struct: Parqueteur::Types::StructType,
|
|
18
|
+
timestamp: Parqueteur::Types::TimestampType
|
|
19
|
+
}
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def self.register_type(type, klass)
|
|
23
|
+
registered_types[type] = klass
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def self.resolve(*args, &block)
|
|
27
|
+
instance.resolve(*args, &block)
|
|
7
28
|
end
|
|
8
29
|
|
|
9
|
-
def resolve(type, options = {})
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
elements_opt = options.fetch(:elements)
|
|
13
|
-
Arrow::ListDataType.new(
|
|
14
|
-
if elements_opt.is_a?(Hash)
|
|
15
|
-
resolve(elements_opt.fetch(:type), elements_opt)
|
|
16
|
-
else
|
|
17
|
-
resolve(elements_opt)
|
|
18
|
-
end
|
|
19
|
-
)
|
|
20
|
-
when :boolean
|
|
21
|
-
Arrow::BooleanDataType.new
|
|
22
|
-
when :integer
|
|
23
|
-
if options.fetch(:unsigned, false) == true
|
|
24
|
-
Arrow::UInt32DataType.new
|
|
25
|
-
else
|
|
26
|
-
Arrow::Int32DataType.new
|
|
27
|
-
end
|
|
28
|
-
when :long
|
|
29
|
-
if options.fetch(:unsigned, false) == true
|
|
30
|
-
Arrow::UInt64DataType.new
|
|
31
|
-
else
|
|
32
|
-
Arrow::Int64DataType.new
|
|
33
|
-
end
|
|
34
|
-
when :timestamp
|
|
35
|
-
Arrow::TimestampDataType.new(
|
|
36
|
-
options.fetch(:unit, :second)
|
|
37
|
-
)
|
|
38
|
-
when :string
|
|
39
|
-
Arrow::StringDataType.new
|
|
40
|
-
when :map
|
|
41
|
-
map_value = options.fetch(:value)
|
|
42
|
-
Arrow::MapDataType.new(
|
|
43
|
-
resolve(options.fetch(:key)),
|
|
44
|
-
if map_value.is_a?(Hash)
|
|
45
|
-
resolve(map_value.fetch(:type), map_value)
|
|
46
|
-
else
|
|
47
|
-
resolve(map_value)
|
|
48
|
-
end
|
|
49
|
-
)
|
|
30
|
+
def resolve(type, options = {}, &block)
|
|
31
|
+
if type.is_a?(Symbol)
|
|
32
|
+
resolve_from_symbol(type, options, &block)
|
|
50
33
|
else
|
|
51
|
-
|
|
34
|
+
type.new(options, &block)
|
|
52
35
|
end
|
|
53
36
|
end
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
37
|
|
|
57
|
-
private
|
|
38
|
+
private
|
|
58
39
|
|
|
59
|
-
def
|
|
40
|
+
def resolve_from_symbol(type, options, &block)
|
|
41
|
+
type_klass = self.class.registered_types.fetch(type.to_sym, nil)
|
|
42
|
+
raise Parqueteur::TypeNotFound, type if type_klass.nil?
|
|
60
43
|
|
|
44
|
+
type_klass.new(options, &block)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
61
47
|
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parqueteur
|
|
4
|
+
module Types
|
|
5
|
+
class ArrayType < Parqueteur::Type
|
|
6
|
+
def build_value_array(values)
|
|
7
|
+
Arrow::ListArrayBuilder.build(arrow_type, values)
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def arrow_type_builder
|
|
11
|
+
Arrow::ListDataType.new(
|
|
12
|
+
if options[:elements].is_a?(Hash)
|
|
13
|
+
resolve(options[:elements].fetch(:type), options[:elements]).arrow_type
|
|
14
|
+
else
|
|
15
|
+
resolve(options[:elements]).arrow_type
|
|
16
|
+
end
|
|
17
|
+
)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parqueteur
|
|
4
|
+
module Types
|
|
5
|
+
class BooleanType < Parqueteur::Type
|
|
6
|
+
def build_value_array(values)
|
|
7
|
+
Arrow::BooleanArray.new(values)
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def arrow_type_builder
|
|
11
|
+
Arrow::BooleanDataType.new
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parqueteur
|
|
4
|
+
module Types
|
|
5
|
+
class Int32Type < Parqueteur::Type
|
|
6
|
+
def build_value_array(values)
|
|
7
|
+
if options.fetch(:unsigned, false) == true
|
|
8
|
+
Arrow::UInt32Array.new(values)
|
|
9
|
+
else
|
|
10
|
+
Arrow::Int32Array.new(values)
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def arrow_type_builder
|
|
15
|
+
if options.fetch(:unsigned, false) == true
|
|
16
|
+
Arrow::UInt32DataType.new
|
|
17
|
+
else
|
|
18
|
+
Arrow::Int32DataType.new
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# when :integer
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parqueteur
|
|
4
|
+
module Types
|
|
5
|
+
class Int64Type < Parqueteur::Type
|
|
6
|
+
def build_value_array(values)
|
|
7
|
+
if options.fetch(:unsigned, false) == true
|
|
8
|
+
Arrow::UInt64Array.new(values)
|
|
9
|
+
else
|
|
10
|
+
Arrow::Int64Array.new(values)
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def arrow_type_builder
|
|
15
|
+
if options.fetch(:unsigned, false) == true
|
|
16
|
+
Arrow::UInt64DataType.new
|
|
17
|
+
else
|
|
18
|
+
Arrow::Int64DataType.new
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# when :integer
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Parqueteur
|
|
4
|
+
module Types
|
|
5
|
+
class MapType < Parqueteur::Type
|
|
6
|
+
def build_value_array(values)
|
|
7
|
+
builder = Arrow::MapArrayBuilder.new(arrow_type)
|
|
8
|
+
values.each do |entry|
|
|
9
|
+
builder.append_value
|
|
10
|
+
next if entry.nil?
|
|
11
|
+
|
|
12
|
+
entry.each do |k, v|
|
|
13
|
+
builder.key_builder.append(k)
|
|
14
|
+
builder.item_builder.append(v)
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
builder.finish
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def arrow_type_builder
|
|
22
|
+
map_value = options.fetch(:value)
|
|
23
|
+
|
|
24
|
+
Arrow::MapDataType.new(
|
|
25
|
+
resolve(options.fetch(:key)).arrow_type,
|
|
26
|
+
if map_value.is_a?(Hash)
|
|
27
|
+
resolve(map_value.fetch(:type), map_value).arrow_type
|
|
28
|
+
else
|
|
29
|
+
resolve(map_value).arrow_type
|
|
30
|
+
end
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# when :timestamp
|
|
4
|
+
# Arrow::TimestampDataType.new(
|
|
5
|
+
# options.fetch(:unit, :second)
|
|
6
|
+
# )
|
|
7
|
+
|
|
8
|
+
module Parqueteur
|
|
9
|
+
module Types
|
|
10
|
+
class StringType < Parqueteur::Type
|
|
11
|
+
def build_value_array(values)
|
|
12
|
+
Arrow::StringArray.new(values)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def arrow_type_builder
|
|
16
|
+
Arrow::StringDataType.new
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# when :timestamp
|
|
4
|
+
# Arrow::TimestampDataType.new(
|
|
5
|
+
# options.fetch(:unit, :second)
|
|
6
|
+
# )
|
|
7
|
+
|
|
8
|
+
module Parqueteur
|
|
9
|
+
module Types
|
|
10
|
+
class StructType < Parqueteur::Type
|
|
11
|
+
def build_value_array(values)
|
|
12
|
+
values.each do |value|
|
|
13
|
+
next if value.nil?
|
|
14
|
+
|
|
15
|
+
value.each_key do |key|
|
|
16
|
+
next if struct_object.key?(key)
|
|
17
|
+
|
|
18
|
+
raise Parqueteur::Error, "Struct field '#{key}' not found"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
Arrow::StructArrayBuilder.build(arrow_type, values)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def arrow_type_builder
|
|
25
|
+
Arrow::StructDataType.new(struct_object.to_arrow_type)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def struct_object
|
|
31
|
+
@struct_object ||= Parqueteur::Struct.new(&@block)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# when :timestamp
|
|
4
|
+
# Arrow::TimestampDataType.new(
|
|
5
|
+
# options.fetch(:unit, :second)
|
|
6
|
+
# )
|
|
7
|
+
|
|
8
|
+
module Parqueteur
|
|
9
|
+
module Types
|
|
10
|
+
class TimestampType < Parqueteur::Type
|
|
11
|
+
def build_value_array(values)
|
|
12
|
+
Arrow::TimestampArray.new(values)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def arrow_type_builder
|
|
16
|
+
Arrow::TimestampDataType.new(
|
|
17
|
+
options.fetch(:unit, :second)
|
|
18
|
+
)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
data/lib/parqueteur/version.rb
CHANGED
data/lib/parqueteur.rb
CHANGED
|
@@ -1,17 +1,29 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'singleton'
|
|
5
|
+
|
|
3
6
|
require_relative "parqueteur/version"
|
|
4
|
-
require 'parqueteur/
|
|
7
|
+
require 'parqueteur/chunked_converter'
|
|
5
8
|
require 'parqueteur/column'
|
|
6
9
|
require 'parqueteur/column_collection'
|
|
7
10
|
require 'parqueteur/converter'
|
|
8
|
-
require 'parqueteur/chunked_converter'
|
|
9
11
|
require 'parqueteur/input'
|
|
10
|
-
require 'parqueteur/
|
|
11
|
-
require '
|
|
12
|
+
require 'parqueteur/struct'
|
|
13
|
+
require 'parqueteur/type'
|
|
14
|
+
require 'parqueteur/type_resolver'
|
|
15
|
+
require 'parqueteur/types/array_type'
|
|
16
|
+
require 'parqueteur/types/boolean_type'
|
|
17
|
+
require 'parqueteur/types/int32_type'
|
|
18
|
+
require 'parqueteur/types/int64_type'
|
|
19
|
+
require 'parqueteur/types/map_type'
|
|
20
|
+
require 'parqueteur/types/string_type'
|
|
21
|
+
require 'parqueteur/types/struct_type'
|
|
22
|
+
require 'parqueteur/types/timestamp_type'
|
|
12
23
|
require 'parquet'
|
|
13
24
|
|
|
14
25
|
module Parqueteur
|
|
15
26
|
class Error < StandardError; end
|
|
27
|
+
class TypeNotFound < Error; end
|
|
16
28
|
# Your code goes here...
|
|
17
29
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: parqueteur
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0
|
|
4
|
+
version: 1.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Julien D.
|
|
@@ -45,8 +45,17 @@ files:
|
|
|
45
45
|
- lib/parqueteur/column_collection.rb
|
|
46
46
|
- lib/parqueteur/converter.rb
|
|
47
47
|
- lib/parqueteur/input.rb
|
|
48
|
+
- lib/parqueteur/struct.rb
|
|
49
|
+
- lib/parqueteur/type.rb
|
|
48
50
|
- lib/parqueteur/type_resolver.rb
|
|
49
|
-
- lib/parqueteur/
|
|
51
|
+
- lib/parqueteur/types/array_type.rb
|
|
52
|
+
- lib/parqueteur/types/boolean_type.rb
|
|
53
|
+
- lib/parqueteur/types/int32_type.rb
|
|
54
|
+
- lib/parqueteur/types/int64_type.rb
|
|
55
|
+
- lib/parqueteur/types/map_type.rb
|
|
56
|
+
- lib/parqueteur/types/string_type.rb
|
|
57
|
+
- lib/parqueteur/types/struct_type.rb
|
|
58
|
+
- lib/parqueteur/types/timestamp_type.rb
|
|
50
59
|
- lib/parqueteur/version.rb
|
|
51
60
|
- parqueteur.gemspec
|
|
52
61
|
- test.json
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Parqueteur
|
|
4
|
-
class ValueArrayBuilder
|
|
5
|
-
attr_reader :type, :options, :arrow_type
|
|
6
|
-
|
|
7
|
-
def self.build(input, type, options)
|
|
8
|
-
new(type, options).build(input)
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
def initialize(type, options)
|
|
12
|
-
@type = type
|
|
13
|
-
@options = options
|
|
14
|
-
@arrow_type = Parqueteur::TypeResolver.resolve(type, options)
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
def build(input)
|
|
18
|
-
return if input.nil?
|
|
19
|
-
|
|
20
|
-
case type
|
|
21
|
-
when :array
|
|
22
|
-
Arrow::ListArrayBuilder.build(arrow_type, input)
|
|
23
|
-
when :map
|
|
24
|
-
builder = Arrow::MapArrayBuilder.new(arrow_type)
|
|
25
|
-
input.each do |entry|
|
|
26
|
-
builder.append_value
|
|
27
|
-
next if entry.nil?
|
|
28
|
-
|
|
29
|
-
entry.each do |k, v|
|
|
30
|
-
builder.key_builder.append(k)
|
|
31
|
-
builder.item_builder.append(v)
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
builder.finish
|
|
36
|
-
when :boolean
|
|
37
|
-
Arrow::BooleanArray.new(input)
|
|
38
|
-
when :integer
|
|
39
|
-
if options.fetch(:unsigned, false) == true
|
|
40
|
-
Arrow::UInt32Array.new(input)
|
|
41
|
-
else
|
|
42
|
-
Arrow::Int32Array.new(input)
|
|
43
|
-
end
|
|
44
|
-
when :long
|
|
45
|
-
if options.fetch(:unsigned, false) == true
|
|
46
|
-
Arrow::UInt64Array.new(input)
|
|
47
|
-
else
|
|
48
|
-
Arrow::Int64Array.new(input)
|
|
49
|
-
end
|
|
50
|
-
when :string
|
|
51
|
-
Arrow::StringArray.new(input)
|
|
52
|
-
when :timestamp
|
|
53
|
-
Arrow::TimestampArray.new(input)
|
|
54
|
-
else
|
|
55
|
-
raise Error, "unknown type: #{type}"
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
end
|