red-arrow 8.0.0 → 10.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/arrow/converters.hpp +3 -0
- data/ext/arrow/extconf.rb +8 -4
- data/lib/arrow/array-builder.rb +40 -6
- data/lib/arrow/array-computable.rb +37 -0
- data/lib/arrow/array.rb +28 -0
- data/lib/arrow/chunked-array.rb +21 -0
- data/lib/arrow/column.rb +28 -0
- data/lib/arrow/data-type.rb +8 -3
- data/lib/arrow/decimal128-array-builder.rb +16 -6
- data/lib/arrow/decimal128.rb +14 -0
- data/lib/arrow/decimal256-array-builder.rb +16 -6
- data/lib/arrow/decimal256.rb +14 -0
- data/lib/arrow/field.rb +44 -3
- data/lib/arrow/list-data-type.rb +1 -6
- data/lib/arrow/loader.rb +3 -0
- data/lib/arrow/raw-table-converter.rb +6 -1
- data/lib/arrow/raw-tensor-converter.rb +89 -0
- data/lib/arrow/string-array-builder.rb +30 -0
- data/lib/arrow/tensor.rb +140 -0
- data/lib/arrow/time-unit.rb +31 -0
- data/lib/arrow/time32-array-builder.rb +2 -14
- data/lib/arrow/time32-data-type.rb +9 -38
- data/lib/arrow/time64-array-builder.rb +2 -14
- data/lib/arrow/time64-data-type.rb +9 -38
- data/lib/arrow/timestamp-array-builder.rb +2 -14
- data/lib/arrow/timestamp-data-type.rb +9 -34
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -1
- data/test/raw-records/test-dictionary-array.rb +341 -0
- data/test/test-array-builder.rb +62 -0
- data/test/test-chunked-array.rb +6 -0
- data/test/test-column.rb +31 -0
- data/test/test-decimal128-array-builder.rb +14 -0
- data/test/test-decimal128-array.rb +5 -2
- data/test/test-decimal128.rb +26 -2
- data/test/test-decimal256-array-builder.rb +14 -0
- data/test/test-decimal256-array.rb +5 -2
- data/test/test-decimal256.rb +26 -2
- data/test/test-field.rb +26 -0
- data/test/test-orc.rb +2 -2
- data/test/test-table.rb +16 -0
- data/test/test-tensor.rb +243 -2
- data/test/values/test-dictionary-array.rb +30 -0
- metadata +15 -9
@@ -0,0 +1,30 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class StringArrayBuilder
|
20
|
+
private
|
21
|
+
def convert_to_arrow_value(value)
|
22
|
+
case value
|
23
|
+
when GLib::Bytes, String
|
24
|
+
value
|
25
|
+
else
|
26
|
+
value.to_s
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/arrow/tensor.rb
CHANGED
@@ -15,10 +15,150 @@
|
|
15
15
|
# specific language governing permissions and limitations
|
16
16
|
# under the License.
|
17
17
|
|
18
|
+
require_relative "raw-tensor-converter"
|
19
|
+
|
18
20
|
module Arrow
|
19
21
|
class Tensor
|
22
|
+
alias_method :initialize_raw, :initialize
|
23
|
+
# Creates a new {Arrow::Tensor}.
|
24
|
+
#
|
25
|
+
# @overload initialize(raw_tensor, data_type: nil, shape: nil, dimension_names: nil)
|
26
|
+
#
|
27
|
+
# @param raw_tensor [::Array<Numeric>] The tensor represented as a
|
28
|
+
# raw `Array` (not `Arrow::Array`) and `Numeric`s. You can
|
29
|
+
# pass a nested `Array` for a multi-dimensional tensor.
|
30
|
+
#
|
31
|
+
# @param data_type [Arrow::DataType, String, Symbol, ::Array<String>,
|
32
|
+
# ::Array<Symbol>, Hash, nil] The element data type of the tensor.
|
33
|
+
#
|
34
|
+
# If you specify `nil`, data type is guessed from `raw_tensor`.
|
35
|
+
#
|
36
|
+
# See {Arrow::DataType.resolve} for how to specify data type.
|
37
|
+
#
|
38
|
+
# @param shape [::Array<Integer>, nil] The array of dimension sizes.
|
39
|
+
#
|
40
|
+
# If you specify `nil`, shape is guessed from `raw_tensor`.
|
41
|
+
#
|
42
|
+
# @param dimension_names [::Array<String>, ::Array<Symbol>, nil]
|
43
|
+
# The array of the dimension names.
|
44
|
+
#
|
45
|
+
# If you specify `nil`, all dimensions have empty names.
|
46
|
+
#
|
47
|
+
# @example Create a tensor from Ruby's Array
|
48
|
+
# raw_tensor = [
|
49
|
+
# [
|
50
|
+
# [1, 2, 3, 4],
|
51
|
+
# [5, 6, 7, 8],
|
52
|
+
# ],
|
53
|
+
# [
|
54
|
+
# [9, 10, 11, 12],
|
55
|
+
# [13, 14, 15, 16],
|
56
|
+
# ],
|
57
|
+
# [
|
58
|
+
# [17, 18, 19, 20],
|
59
|
+
# [21, 22, 23, 24],
|
60
|
+
# ],
|
61
|
+
# ]
|
62
|
+
# Arrow::Tensor.new(raw_tensor)
|
63
|
+
#
|
64
|
+
# @since 10.0.0
|
65
|
+
#
|
66
|
+
# @overload initialize(data_type, data, shape, strides, dimension_names)
|
67
|
+
#
|
68
|
+
# @param data_type [Arrow::DataType, String, Symbol, ::Array<String>,
|
69
|
+
# ::Array<Symbol>, Hash] The element data type of the tensor.
|
70
|
+
#
|
71
|
+
# See {Arrow::DataType.resolve} how to specify data type.
|
72
|
+
#
|
73
|
+
# @param data [Arrow::Buffer, String] The data of the tensor.
|
74
|
+
#
|
75
|
+
# @param shape [::Array<Integer>] The array of dimension sizes.
|
76
|
+
#
|
77
|
+
# @param strides [::Array<Integer>, nil] The array of strides which
|
78
|
+
# is the number of bytes between two adjacent elements in each
|
79
|
+
# dimension.
|
80
|
+
#
|
81
|
+
# If you specify `nil` or an empty `Array`, strides are
|
82
|
+
# guessed from `data_type` and `data`.
|
83
|
+
#
|
84
|
+
# @param dimension_names [::Array<String>, ::Array<Symbol>, nil]
|
85
|
+
# The array of the dimension names.
|
86
|
+
#
|
87
|
+
# If you specify `nil`, all dimensions doesn't have their names.
|
88
|
+
#
|
89
|
+
# @example Create a table from Arrow::Buffer
|
90
|
+
# raw_data = [
|
91
|
+
# 1, 2,
|
92
|
+
# 3, 4,
|
93
|
+
#
|
94
|
+
# 5, 6,
|
95
|
+
# 7, 8,
|
96
|
+
#
|
97
|
+
# 9, 10,
|
98
|
+
# 11, 12,
|
99
|
+
# ]
|
100
|
+
# data = Arrow::Buffer.new(raw_data.pack("c*").freeze)
|
101
|
+
# shape = [3, 2, 2]
|
102
|
+
# strides = []
|
103
|
+
# names = ["a", "b", "c"]
|
104
|
+
# Arrow::Tensor.new(:int8, data, shape, strides, names)
|
105
|
+
def initialize(*args,
|
106
|
+
data_type: nil,
|
107
|
+
data: nil,
|
108
|
+
shape: nil,
|
109
|
+
strides: nil,
|
110
|
+
dimension_names: nil)
|
111
|
+
n_args = args.size
|
112
|
+
case n_args
|
113
|
+
when 1
|
114
|
+
converter = RawTensorConverter.new(args[0],
|
115
|
+
data_type: data_type,
|
116
|
+
shape: shape,
|
117
|
+
strides: strides,
|
118
|
+
dimension_names: dimension_names)
|
119
|
+
data_type = converter.data_type
|
120
|
+
data = converter.data
|
121
|
+
shape = converter.shape
|
122
|
+
strides = converter.strides
|
123
|
+
dimension_names = converter.dimension_names
|
124
|
+
when 0, 2..5
|
125
|
+
data_type = args[0] || data_type
|
126
|
+
data = args[1] || data
|
127
|
+
shape = args[2] || shape
|
128
|
+
strides = args[3] || strides
|
129
|
+
dimension_names = args[4] || dimension_names
|
130
|
+
if data_type.nil?
|
131
|
+
raise ArgumentError, "data_type: is missing: #{data.inspect}"
|
132
|
+
end
|
133
|
+
else
|
134
|
+
message = "wrong number of arguments (given #{n_args}, expected 0..5)"
|
135
|
+
raise ArgumentError, message
|
136
|
+
end
|
137
|
+
initialize_raw(DataType.resolve(data_type),
|
138
|
+
data,
|
139
|
+
shape,
|
140
|
+
strides,
|
141
|
+
dimension_names)
|
142
|
+
end
|
143
|
+
|
144
|
+
def dimension_names
|
145
|
+
n_dimensions.times.collect do |i|
|
146
|
+
get_dimension_name(i)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
20
150
|
def to_arrow
|
21
151
|
self
|
22
152
|
end
|
153
|
+
|
154
|
+
def to_arrow_array
|
155
|
+
if n_dimensions != 1
|
156
|
+
raise RangeError, "must be 1 dimensional tensor: #{shape.inspect}"
|
157
|
+
end
|
158
|
+
value_data_type.array_class.new(size,
|
159
|
+
buffer,
|
160
|
+
nil,
|
161
|
+
0)
|
162
|
+
end
|
23
163
|
end
|
24
164
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class TimeUnit
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
if value.is_a?(Hash) and value.size == 1 and value[:unit]
|
24
|
+
super(value[:unit])
|
25
|
+
else
|
26
|
+
super
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -18,24 +18,12 @@
|
|
18
18
|
module Arrow
|
19
19
|
class Time32ArrayBuilder
|
20
20
|
class << self
|
21
|
-
def build(
|
22
|
-
builder = new(
|
21
|
+
def build(data_type, values)
|
22
|
+
builder = new(data_type)
|
23
23
|
builder.build(values)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
alias_method :initialize_raw, :initialize
|
28
|
-
def initialize(unit_or_data_type)
|
29
|
-
case unit_or_data_type
|
30
|
-
when DataType
|
31
|
-
data_type = unit_or_data_type
|
32
|
-
else
|
33
|
-
unit = unit_or_data_type
|
34
|
-
data_type = Time32DataType.new(unit)
|
35
|
-
end
|
36
|
-
initialize_raw(data_type)
|
37
|
-
end
|
38
|
-
|
39
27
|
def unit
|
40
28
|
@unit ||= value_data_type.unit
|
41
29
|
end
|
@@ -17,45 +17,16 @@
|
|
17
17
|
|
18
18
|
module Arrow
|
19
19
|
class Time32DataType
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
#
|
30
|
-
# The unit must be second or millisecond.
|
31
|
-
#
|
32
|
-
# @example Create a time32 data type with Arrow::TimeUnit
|
33
|
-
# Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
|
34
|
-
#
|
35
|
-
# @example Create a time32 data type with Symbol
|
36
|
-
# Arrow::Time32DataType.new(:milli)
|
37
|
-
#
|
38
|
-
# @overload initialize(description)
|
39
|
-
#
|
40
|
-
# @param description [Hash] The description of the time32 data
|
41
|
-
# type. It must have `:unit` value.
|
42
|
-
#
|
43
|
-
# @option description [Arrow::TimeUnit, Symbol] :unit The unit of
|
44
|
-
# the time32 data type.
|
45
|
-
#
|
46
|
-
# The unit must be second or millisecond.
|
47
|
-
#
|
48
|
-
# @example Create a time32 data type with Arrow::TimeUnit
|
49
|
-
# Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
|
50
|
-
#
|
51
|
-
# @example Create a time32 data type with Symbol
|
52
|
-
# Arrow::Time32DataType.new(unit: :milli)
|
53
|
-
def initialize(unit)
|
54
|
-
if unit.is_a?(Hash)
|
55
|
-
description = unit
|
56
|
-
unit = description[:unit]
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Symbol, Arrow::TimeUnit
|
25
|
+
new(value)
|
26
|
+
else
|
27
|
+
super
|
28
|
+
end
|
57
29
|
end
|
58
|
-
initialize_raw(unit)
|
59
30
|
end
|
60
31
|
end
|
61
32
|
end
|
@@ -18,24 +18,12 @@
|
|
18
18
|
module Arrow
|
19
19
|
class Time64ArrayBuilder
|
20
20
|
class << self
|
21
|
-
def build(
|
22
|
-
builder = new(
|
21
|
+
def build(data_type, values)
|
22
|
+
builder = new(data_type)
|
23
23
|
builder.build(values)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
alias_method :initialize_raw, :initialize
|
28
|
-
def initialize(unit_or_data_type)
|
29
|
-
case unit_or_data_type
|
30
|
-
when DataType
|
31
|
-
data_type = unit_or_data_type
|
32
|
-
else
|
33
|
-
unit = unit_or_data_type
|
34
|
-
data_type = Time64DataType.new(unit)
|
35
|
-
end
|
36
|
-
initialize_raw(data_type)
|
37
|
-
end
|
38
|
-
|
39
27
|
def unit
|
40
28
|
@unit ||= value_data_type.unit
|
41
29
|
end
|
@@ -17,45 +17,16 @@
|
|
17
17
|
|
18
18
|
module Arrow
|
19
19
|
class Time64DataType
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
#
|
30
|
-
# The unit must be microsecond or nanosecond.
|
31
|
-
#
|
32
|
-
# @example Create a time64 data type with Arrow::TimeUnit
|
33
|
-
# Arrow::Time64DataType.new(Arrow::TimeUnit::NANO)
|
34
|
-
#
|
35
|
-
# @example Create a time64 data type with Symbol
|
36
|
-
# Arrow::Time64DataType.new(:nano)
|
37
|
-
#
|
38
|
-
# @overload initialize(description)
|
39
|
-
#
|
40
|
-
# @param description [Hash] The description of the time64 data
|
41
|
-
# type. It must have `:unit` value.
|
42
|
-
#
|
43
|
-
# @option description [Arrow::TimeUnit, Symbol] :unit The unit of
|
44
|
-
# the time64 data type.
|
45
|
-
#
|
46
|
-
# The unit must be microsecond or nanosecond.
|
47
|
-
#
|
48
|
-
# @example Create a time64 data type with Arrow::TimeUnit
|
49
|
-
# Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO)
|
50
|
-
#
|
51
|
-
# @example Create a time64 data type with Symbol
|
52
|
-
# Arrow::Time64DataType.new(unit: :nano)
|
53
|
-
def initialize(unit)
|
54
|
-
if unit.is_a?(Hash)
|
55
|
-
description = unit
|
56
|
-
unit = description[:unit]
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Symbol, Arrow::TimeUnit
|
25
|
+
new(value)
|
26
|
+
else
|
27
|
+
super
|
28
|
+
end
|
57
29
|
end
|
58
|
-
initialize_raw(unit)
|
59
30
|
end
|
60
31
|
end
|
61
32
|
end
|
@@ -18,24 +18,12 @@
|
|
18
18
|
module Arrow
|
19
19
|
class TimestampArrayBuilder
|
20
20
|
class << self
|
21
|
-
def build(
|
22
|
-
builder = new(
|
21
|
+
def build(data_type, values)
|
22
|
+
builder = new(data_type)
|
23
23
|
builder.build(values)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
alias_method :initialize_raw, :initialize
|
28
|
-
def initialize(unit_or_data_type)
|
29
|
-
case unit_or_data_type
|
30
|
-
when DataType
|
31
|
-
data_type = unit_or_data_type
|
32
|
-
else
|
33
|
-
unit = unit_or_data_type
|
34
|
-
data_type = TimestampDataType.new(unit)
|
35
|
-
end
|
36
|
-
initialize_raw(data_type)
|
37
|
-
end
|
38
|
-
|
39
27
|
private
|
40
28
|
def unit_id
|
41
29
|
@unit_id ||= value_data_type.unit.nick.to_sym
|
@@ -17,41 +17,16 @@
|
|
17
17
|
|
18
18
|
module Arrow
|
19
19
|
class TimestampDataType
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
#
|
30
|
-
# @example Create a timestamp data type with Arrow::TimeUnit
|
31
|
-
# Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI)
|
32
|
-
#
|
33
|
-
# @example Create a timestamp data type with Symbol
|
34
|
-
# Arrow::TimestampDataType.new(:milli)
|
35
|
-
#
|
36
|
-
# @overload initialize(description)
|
37
|
-
#
|
38
|
-
# @param description [Hash] The description of the timestamp data
|
39
|
-
# type. It must have `:unit` value.
|
40
|
-
#
|
41
|
-
# @option description [Arrow::TimeUnit, Symbol] :unit The unit of
|
42
|
-
# the timestamp data type.
|
43
|
-
#
|
44
|
-
# @example Create a timestamp data type with Arrow::TimeUnit
|
45
|
-
# Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
|
46
|
-
#
|
47
|
-
# @example Create a timestamp data type with Symbol
|
48
|
-
# Arrow::TimestampDataType.new(unit: :milli)
|
49
|
-
def initialize(unit)
|
50
|
-
if unit.is_a?(Hash)
|
51
|
-
description = unit
|
52
|
-
unit = description[:unit]
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Symbol, Arrow::TimeUnit
|
25
|
+
new(value)
|
26
|
+
else
|
27
|
+
super
|
28
|
+
end
|
53
29
|
end
|
54
|
-
initialize_raw(unit)
|
55
30
|
end
|
56
31
|
end
|
57
32
|
end
|
data/lib/arrow/version.rb
CHANGED
data/red-arrow.gemspec
CHANGED
@@ -46,7 +46,7 @@ Gem::Specification.new do |spec|
|
|
46
46
|
spec.test_files += Dir.glob("test/**/*")
|
47
47
|
spec.extensions = ["ext/arrow/extconf.rb"]
|
48
48
|
|
49
|
-
spec.add_runtime_dependency("bigdecimal", ">=
|
49
|
+
spec.add_runtime_dependency("bigdecimal", ">= 3.1.0")
|
50
50
|
spec.add_runtime_dependency("extpp", ">= 0.0.7")
|
51
51
|
spec.add_runtime_dependency("gio2", ">= 3.5.0")
|
52
52
|
spec.add_runtime_dependency("native-package-installer")
|