red-arrow 9.0.0 → 10.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/arrow/extconf.rb +2 -0
- data/lib/arrow/array.rb +13 -1
- data/lib/arrow/data-type.rb +6 -2
- data/lib/arrow/raw-table-converter.rb +6 -1
- data/lib/arrow/raw-tensor-converter.rb +89 -0
- data/lib/arrow/tensor.rb +140 -0
- data/lib/arrow/version.rb +1 -1
- data/test/test-orc.rb +2 -2
- data/test/test-table.rb +16 -0
- data/test/test-tensor.rb +243 -2
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c8cc21ba05de4956b7dd412963d2d39eb5f5d31566c6891ae4388064553baa97
|
4
|
+
data.tar.gz: a7c1bfa1d73f3a4ab8403e347902ffd6f754f6964d3054788a79d1022b32520b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e33a2acb65472b70c99348fec8b472ebeac48bbd3ba3c6aabd91481ed214b07e41fa3c6c3090ecd9ed545ee54cbe2fb09a0a7d730421a816afe6c167b00b6a5a
|
7
|
+
data.tar.gz: 47ab431103a9bcb0f5b4af955013b40415a582723b55640cd6b82a4b8af5ef775ed974e9f62d235cc81c296c0d7a0438edf805f3ed1ab442cae58501cb73625a
|
data/ext/arrow/extconf.rb
CHANGED
data/lib/arrow/array.rb
CHANGED
@@ -47,7 +47,15 @@ module Arrow
|
|
47
47
|
nil
|
48
48
|
end
|
49
49
|
else
|
50
|
-
|
50
|
+
if value.respond_to?(:to_arrow_array)
|
51
|
+
begin
|
52
|
+
value.to_arrow_array
|
53
|
+
rescue RangeError
|
54
|
+
nil
|
55
|
+
end
|
56
|
+
else
|
57
|
+
nil
|
58
|
+
end
|
51
59
|
end
|
52
60
|
end
|
53
61
|
end
|
@@ -103,6 +111,10 @@ module Arrow
|
|
103
111
|
self
|
104
112
|
end
|
105
113
|
|
114
|
+
def to_arrow_array
|
115
|
+
self
|
116
|
+
end
|
117
|
+
|
106
118
|
alias_method :value_data_type_raw, :value_data_type
|
107
119
|
def value_data_type
|
108
120
|
@value_data_type ||= value_data_type_raw
|
data/lib/arrow/data-type.rb
CHANGED
@@ -188,9 +188,13 @@ module Arrow
|
|
188
188
|
end
|
189
189
|
end
|
190
190
|
|
191
|
-
def
|
191
|
+
def array_class
|
192
192
|
base_name = self.class.name.gsub(/DataType\z/, "")
|
193
|
-
|
193
|
+
::Arrow.const_get("#{base_name}Array")
|
194
|
+
end
|
195
|
+
|
196
|
+
def build_array(values)
|
197
|
+
builder_class = array_class.builder_class
|
194
198
|
args = [values]
|
195
199
|
args.unshift(self) unless builder_class.buildable?(args)
|
196
200
|
builder_class.build(*args)
|
@@ -35,7 +35,12 @@ module Arrow
|
|
35
35
|
fields = []
|
36
36
|
@values = []
|
37
37
|
@raw_table.each do |name, array|
|
38
|
-
|
38
|
+
if array.respond_to?(:to_arrow_array)
|
39
|
+
array = array.to_arrow_array
|
40
|
+
else
|
41
|
+
array = array.to_ary if array.respond_to?(:to_ary)
|
42
|
+
array = ArrayBuilder.build(array)
|
43
|
+
end
|
39
44
|
fields << Field.new(name.to_s, array.value_data_type)
|
40
45
|
@values << array
|
41
46
|
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class RawTensorConverter
|
20
|
+
attr_reader :data_type
|
21
|
+
attr_reader :data
|
22
|
+
attr_reader :shape
|
23
|
+
attr_reader :strides
|
24
|
+
attr_reader :dimension_names
|
25
|
+
def initialize(raw_tensor,
|
26
|
+
data_type: nil,
|
27
|
+
shape: nil,
|
28
|
+
strides: nil,
|
29
|
+
dimension_names: nil)
|
30
|
+
@raw_tensor = raw_tensor
|
31
|
+
@data_type = data_type
|
32
|
+
@data = nil
|
33
|
+
@shape = shape
|
34
|
+
@strides = strides
|
35
|
+
@dimension_names = dimension_names
|
36
|
+
convert
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
def convert
|
41
|
+
case @raw_tensor
|
42
|
+
when Buffer
|
43
|
+
@data = @raw_tensor
|
44
|
+
when String
|
45
|
+
unless @raw_tensor.encoding == Encoding::ASCII_8BIT
|
46
|
+
message = "raw tensor String must be an ASCII-8BIT encoded string: " +
|
47
|
+
"#{@raw_tensor.encoding.inspect}"
|
48
|
+
raise ArgumentError, message
|
49
|
+
end
|
50
|
+
@data = Arrow::Buffer.new(@raw_tensor)
|
51
|
+
else
|
52
|
+
@shape ||= guess_shape
|
53
|
+
build_buffer
|
54
|
+
unless @strides.nil?
|
55
|
+
message = "strides: is only accepted with " +
|
56
|
+
"an Arrow::Buffer or String raw tensor: #{@strides.inspect}"
|
57
|
+
raise ArgumentError, message
|
58
|
+
end
|
59
|
+
end
|
60
|
+
if @shape.nil?
|
61
|
+
raise ArgumentError, "shape: is missing: #{@raw_tensor.inspect}"
|
62
|
+
end
|
63
|
+
if @data_type.nil?
|
64
|
+
raise ArgumentError, "data_type: is missing: #{@raw_tensor.inspect}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def guess_shape
|
69
|
+
shape = [@raw_tensor.size]
|
70
|
+
target = @raw_tensor[0]
|
71
|
+
while target.is_a?(::Array)
|
72
|
+
shape << target.size
|
73
|
+
target = target[0]
|
74
|
+
end
|
75
|
+
shape
|
76
|
+
end
|
77
|
+
|
78
|
+
def build_buffer
|
79
|
+
if @data_type
|
80
|
+
@data_type = DataType.resolve(@data_type)
|
81
|
+
array = @data_type.build_array(@raw_tensor.flatten)
|
82
|
+
else
|
83
|
+
array = Array.new(@raw_tensor.flatten)
|
84
|
+
@data_type = array.value_data_type
|
85
|
+
end
|
86
|
+
@data = array.data_buffer
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
data/lib/arrow/tensor.rb
CHANGED
@@ -15,10 +15,150 @@
|
|
15
15
|
# specific language governing permissions and limitations
|
16
16
|
# under the License.
|
17
17
|
|
18
|
+
require_relative "raw-tensor-converter"
|
19
|
+
|
18
20
|
module Arrow
|
19
21
|
class Tensor
|
22
|
+
alias_method :initialize_raw, :initialize
|
23
|
+
# Creates a new {Arrow::Tensor}.
|
24
|
+
#
|
25
|
+
# @overload initialize(raw_tensor, data_type: nil, shape: nil, dimension_names: nil)
|
26
|
+
#
|
27
|
+
# @param raw_tensor [::Array<Numeric>] The tensor represented as a
|
28
|
+
# raw `Array` (not `Arrow::Array`) and `Numeric`s. You can
|
29
|
+
# pass a nested `Array` for a multi-dimensional tensor.
|
30
|
+
#
|
31
|
+
# @param data_type [Arrow::DataType, String, Symbol, ::Array<String>,
|
32
|
+
# ::Array<Symbol>, Hash, nil] The element data type of the tensor.
|
33
|
+
#
|
34
|
+
# If you specify `nil`, data type is guessed from `raw_tensor`.
|
35
|
+
#
|
36
|
+
# See {Arrow::DataType.resolve} for how to specify data type.
|
37
|
+
#
|
38
|
+
# @param shape [::Array<Integer>, nil] The array of dimension sizes.
|
39
|
+
#
|
40
|
+
# If you specify `nil`, shape is guessed from `raw_tensor`.
|
41
|
+
#
|
42
|
+
# @param dimension_names [::Array<String>, ::Array<Symbol>, nil]
|
43
|
+
# The array of the dimension names.
|
44
|
+
#
|
45
|
+
# If you specify `nil`, all dimensions have empty names.
|
46
|
+
#
|
47
|
+
# @example Create a tensor from Ruby's Array
|
48
|
+
# raw_tensor = [
|
49
|
+
# [
|
50
|
+
# [1, 2, 3, 4],
|
51
|
+
# [5, 6, 7, 8],
|
52
|
+
# ],
|
53
|
+
# [
|
54
|
+
# [9, 10, 11, 12],
|
55
|
+
# [13, 14, 15, 16],
|
56
|
+
# ],
|
57
|
+
# [
|
58
|
+
# [17, 18, 19, 20],
|
59
|
+
# [21, 22, 23, 24],
|
60
|
+
# ],
|
61
|
+
# ]
|
62
|
+
# Arrow::Tensor.new(raw_tensor)
|
63
|
+
#
|
64
|
+
# @since 10.0.0
|
65
|
+
#
|
66
|
+
# @overload initialize(data_type, data, shape, strides, dimension_names)
|
67
|
+
#
|
68
|
+
# @param data_type [Arrow::DataType, String, Symbol, ::Array<String>,
|
69
|
+
# ::Array<Symbol>, Hash] The element data type of the tensor.
|
70
|
+
#
|
71
|
+
# See {Arrow::DataType.resolve} how to specify data type.
|
72
|
+
#
|
73
|
+
# @param data [Arrow::Buffer, String] The data of the tensor.
|
74
|
+
#
|
75
|
+
# @param shape [::Array<Integer>] The array of dimension sizes.
|
76
|
+
#
|
77
|
+
# @param strides [::Array<Integer>, nil] The array of strides which
|
78
|
+
# is the number of bytes between two adjacent elements in each
|
79
|
+
# dimension.
|
80
|
+
#
|
81
|
+
# If you specify `nil` or an empty `Array`, strides are
|
82
|
+
# guessed from `data_type` and `data`.
|
83
|
+
#
|
84
|
+
# @param dimension_names [::Array<String>, ::Array<Symbol>, nil]
|
85
|
+
# The array of the dimension names.
|
86
|
+
#
|
87
|
+
# If you specify `nil`, all dimensions doesn't have their names.
|
88
|
+
#
|
89
|
+
# @example Create a table from Arrow::Buffer
|
90
|
+
# raw_data = [
|
91
|
+
# 1, 2,
|
92
|
+
# 3, 4,
|
93
|
+
#
|
94
|
+
# 5, 6,
|
95
|
+
# 7, 8,
|
96
|
+
#
|
97
|
+
# 9, 10,
|
98
|
+
# 11, 12,
|
99
|
+
# ]
|
100
|
+
# data = Arrow::Buffer.new(raw_data.pack("c*").freeze)
|
101
|
+
# shape = [3, 2, 2]
|
102
|
+
# strides = []
|
103
|
+
# names = ["a", "b", "c"]
|
104
|
+
# Arrow::Tensor.new(:int8, data, shape, strides, names)
|
105
|
+
def initialize(*args,
|
106
|
+
data_type: nil,
|
107
|
+
data: nil,
|
108
|
+
shape: nil,
|
109
|
+
strides: nil,
|
110
|
+
dimension_names: nil)
|
111
|
+
n_args = args.size
|
112
|
+
case n_args
|
113
|
+
when 1
|
114
|
+
converter = RawTensorConverter.new(args[0],
|
115
|
+
data_type: data_type,
|
116
|
+
shape: shape,
|
117
|
+
strides: strides,
|
118
|
+
dimension_names: dimension_names)
|
119
|
+
data_type = converter.data_type
|
120
|
+
data = converter.data
|
121
|
+
shape = converter.shape
|
122
|
+
strides = converter.strides
|
123
|
+
dimension_names = converter.dimension_names
|
124
|
+
when 0, 2..5
|
125
|
+
data_type = args[0] || data_type
|
126
|
+
data = args[1] || data
|
127
|
+
shape = args[2] || shape
|
128
|
+
strides = args[3] || strides
|
129
|
+
dimension_names = args[4] || dimension_names
|
130
|
+
if data_type.nil?
|
131
|
+
raise ArgumentError, "data_type: is missing: #{data.inspect}"
|
132
|
+
end
|
133
|
+
else
|
134
|
+
message = "wrong number of arguments (given #{n_args}, expected 0..5)"
|
135
|
+
raise ArgumentError, message
|
136
|
+
end
|
137
|
+
initialize_raw(DataType.resolve(data_type),
|
138
|
+
data,
|
139
|
+
shape,
|
140
|
+
strides,
|
141
|
+
dimension_names)
|
142
|
+
end
|
143
|
+
|
144
|
+
def dimension_names
|
145
|
+
n_dimensions.times.collect do |i|
|
146
|
+
get_dimension_name(i)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
20
150
|
def to_arrow
|
21
151
|
self
|
22
152
|
end
|
153
|
+
|
154
|
+
def to_arrow_array
|
155
|
+
if n_dimensions != 1
|
156
|
+
raise RangeError, "must be 1 dimensional tensor: #{shape.inspect}"
|
157
|
+
end
|
158
|
+
value_data_type.array_class.new(size,
|
159
|
+
buffer,
|
160
|
+
nil,
|
161
|
+
0)
|
162
|
+
end
|
23
163
|
end
|
24
164
|
end
|
data/lib/arrow/version.rb
CHANGED
data/test/test-orc.rb
CHANGED
@@ -164,8 +164,8 @@ class ORCTest < Test::Unit::TestCase
|
|
164
164
|
]
|
165
165
|
end
|
166
166
|
assert_equal([
|
167
|
-
["
|
168
|
-
["
|
167
|
+
["byte1: int8", [pp_values([1, 100])]],
|
168
|
+
["int1: int32", [pp_values([65536, 65536])]],
|
169
169
|
],
|
170
170
|
dump)
|
171
171
|
end
|
data/test/test-table.rb
CHANGED
@@ -40,6 +40,22 @@ class TableTest < Test::Unit::TestCase
|
|
40
40
|
@table = Arrow::Table.new(schema, [@count_array, @visible_array])
|
41
41
|
end
|
42
42
|
|
43
|
+
sub_test_case(".new") do
|
44
|
+
test("{Symbol: Arrow::Tensor}") do
|
45
|
+
assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
|
46
|
+
Arrow::Table.new(numbers: Arrow::Tensor.new([1, 2, 3])))
|
47
|
+
end
|
48
|
+
|
49
|
+
test("{Symbol: #to_ary}") do
|
50
|
+
array_like = Object.new
|
51
|
+
def array_like.to_ary
|
52
|
+
[1, 2, 3]
|
53
|
+
end
|
54
|
+
assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
|
55
|
+
Arrow::Table.new(numbers: array_like))
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
43
59
|
test("#columns") do
|
44
60
|
assert_equal([
|
45
61
|
Arrow::Column.new(@table, 0),
|
data/test/test-tensor.rb
CHANGED
@@ -16,6 +16,233 @@
|
|
16
16
|
# under the License.
|
17
17
|
|
18
18
|
class TensorTest < Test::Unit::TestCase
|
19
|
+
sub_test_case("class methods") do
|
20
|
+
sub_test_case(".new") do
|
21
|
+
def setup
|
22
|
+
@raw_tensor = [
|
23
|
+
[
|
24
|
+
[1, 2, 3, 4],
|
25
|
+
[5, 6, 7, 8],
|
26
|
+
],
|
27
|
+
[
|
28
|
+
[9, 10, 11, 12],
|
29
|
+
[13, 14, 15, 16],
|
30
|
+
],
|
31
|
+
[
|
32
|
+
[17, 18, 19, 20],
|
33
|
+
[21, 22, 23, 24],
|
34
|
+
],
|
35
|
+
]
|
36
|
+
@shape = [3, 2, 4]
|
37
|
+
@strides = [8, 4, 1]
|
38
|
+
end
|
39
|
+
|
40
|
+
test("Array") do
|
41
|
+
tensor = Arrow::Tensor.new(@raw_tensor)
|
42
|
+
assert_equal({
|
43
|
+
value_data_type: Arrow::UInt8DataType.new,
|
44
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
45
|
+
shape: @shape,
|
46
|
+
strides: @strides,
|
47
|
+
dimension_names: ["", "", ""],
|
48
|
+
},
|
49
|
+
{
|
50
|
+
value_data_type: tensor.value_data_type,
|
51
|
+
buffer: tensor.buffer.data.to_s,
|
52
|
+
shape: tensor.shape,
|
53
|
+
strides: tensor.strides,
|
54
|
+
dimension_names: tensor.dimension_names,
|
55
|
+
})
|
56
|
+
end
|
57
|
+
|
58
|
+
test("Array, data_type: Symbol") do
|
59
|
+
tensor = Arrow::Tensor.new(@raw_tensor, data_type: :int32)
|
60
|
+
assert_equal({
|
61
|
+
value_data_type: Arrow::Int32DataType.new,
|
62
|
+
buffer: @raw_tensor.flatten.pack("l*"),
|
63
|
+
shape: @shape,
|
64
|
+
strides: @strides.collect {|x| x * 4},
|
65
|
+
dimension_names: ["", "", ""],
|
66
|
+
},
|
67
|
+
{
|
68
|
+
value_data_type: tensor.value_data_type,
|
69
|
+
buffer: tensor.buffer.data.to_s,
|
70
|
+
shape: tensor.shape,
|
71
|
+
strides: tensor.strides,
|
72
|
+
dimension_names: tensor.dimension_names,
|
73
|
+
})
|
74
|
+
end
|
75
|
+
|
76
|
+
test("Array, dimension_names: Array<String>") do
|
77
|
+
tensor = Arrow::Tensor.new(@raw_tensor,
|
78
|
+
dimension_names: ["a", "b", "c"])
|
79
|
+
assert_equal({
|
80
|
+
value_data_type: Arrow::UInt8DataType.new,
|
81
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
82
|
+
shape: @shape,
|
83
|
+
strides: @strides,
|
84
|
+
dimension_names: ["a", "b", "c"],
|
85
|
+
},
|
86
|
+
{
|
87
|
+
value_data_type: tensor.value_data_type,
|
88
|
+
buffer: tensor.buffer.data.to_s,
|
89
|
+
shape: tensor.shape,
|
90
|
+
strides: tensor.strides,
|
91
|
+
dimension_names: tensor.dimension_names,
|
92
|
+
})
|
93
|
+
end
|
94
|
+
|
95
|
+
test("Array, dimension_names: Array<Symbol>") do
|
96
|
+
tensor = Arrow::Tensor.new(@raw_tensor,
|
97
|
+
dimension_names: [:a, :b, :c])
|
98
|
+
assert_equal({
|
99
|
+
value_data_type: Arrow::UInt8DataType.new,
|
100
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
101
|
+
shape: @shape,
|
102
|
+
strides: @strides,
|
103
|
+
dimension_names: ["a", "b", "c"],
|
104
|
+
},
|
105
|
+
{
|
106
|
+
value_data_type: tensor.value_data_type,
|
107
|
+
buffer: tensor.buffer.data.to_s,
|
108
|
+
shape: tensor.shape,
|
109
|
+
strides: tensor.strides,
|
110
|
+
dimension_names: tensor.dimension_names,
|
111
|
+
})
|
112
|
+
end
|
113
|
+
|
114
|
+
test("Array, strides:") do
|
115
|
+
message = "strides: is only accepted with " +
|
116
|
+
"an Arrow::Buffer or String raw tensor: #{@strides.inspect}"
|
117
|
+
assert_raise(ArgumentError.new(message)) do
|
118
|
+
Arrow::Tensor.new(@raw_tensor, strides: @strides)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
test("Arrow::Buffer, data_type:, shape:") do
|
123
|
+
data_type = :uint8
|
124
|
+
data = Arrow::Buffer.new(@raw_tensor.flatten.pack("C*").freeze)
|
125
|
+
tensor = Arrow::Tensor.new(data,
|
126
|
+
data_type: data_type,
|
127
|
+
shape: @shape)
|
128
|
+
assert_equal({
|
129
|
+
value_data_type: Arrow::UInt8DataType.new,
|
130
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
131
|
+
shape: @shape,
|
132
|
+
strides: @strides,
|
133
|
+
dimension_names: ["", "", ""],
|
134
|
+
},
|
135
|
+
{
|
136
|
+
value_data_type: tensor.value_data_type,
|
137
|
+
buffer: tensor.buffer.data.to_s,
|
138
|
+
shape: tensor.shape,
|
139
|
+
strides: tensor.strides,
|
140
|
+
dimension_names: tensor.dimension_names,
|
141
|
+
})
|
142
|
+
end
|
143
|
+
|
144
|
+
test("String, data_type:, shape:") do
|
145
|
+
data_type = :uint8
|
146
|
+
data = @raw_tensor.flatten.pack("C*").freeze
|
147
|
+
tensor = Arrow::Tensor.new(data,
|
148
|
+
data_type: data_type,
|
149
|
+
shape: @shape)
|
150
|
+
assert_equal({
|
151
|
+
value_data_type: Arrow::UInt8DataType.new,
|
152
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
153
|
+
shape: @shape,
|
154
|
+
strides: @strides,
|
155
|
+
dimension_names: ["", "", ""],
|
156
|
+
},
|
157
|
+
{
|
158
|
+
value_data_type: tensor.value_data_type,
|
159
|
+
buffer: tensor.buffer.data.to_s,
|
160
|
+
shape: tensor.shape,
|
161
|
+
strides: tensor.strides,
|
162
|
+
dimension_names: tensor.dimension_names,
|
163
|
+
})
|
164
|
+
end
|
165
|
+
|
166
|
+
test("String, data_type:") do
|
167
|
+
data_type = :uint8
|
168
|
+
data = @raw_tensor.flatten.pack("C*").freeze
|
169
|
+
message = "shape: is missing: #{data.inspect}"
|
170
|
+
assert_raise(ArgumentError.new(message)) do
|
171
|
+
Arrow::Tensor.new(data, data_type: data_type)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
test("String, shape:") do
|
176
|
+
data = @raw_tensor.flatten.pack("C*").freeze
|
177
|
+
message = "data_type: is missing: #{data.inspect}"
|
178
|
+
assert_raise(ArgumentError.new(message)) do
|
179
|
+
Arrow::Tensor.new(data, shape: @shape)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
test("String - not ASCII-8BIT") do
|
184
|
+
data = "XXX"
|
185
|
+
message = "raw tensor String must be " +
|
186
|
+
"an ASCII-8BIT encoded string: #{data.encoding.inspect}"
|
187
|
+
assert_raise(ArgumentError.new(message)) do
|
188
|
+
Arrow::Tensor.new("XXX")
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
test("Symbol, Arrow::Buffer, shape:") do
|
193
|
+
data_type = :uint8
|
194
|
+
data = Arrow::Buffer.new(@raw_tensor.flatten.pack("C*").freeze)
|
195
|
+
tensor = Arrow::Tensor.new(data_type,
|
196
|
+
data,
|
197
|
+
shape: @shape)
|
198
|
+
assert_equal({
|
199
|
+
value_data_type: Arrow::UInt8DataType.new,
|
200
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
201
|
+
shape: @shape,
|
202
|
+
strides: @strides,
|
203
|
+
dimension_names: ["", "", ""],
|
204
|
+
},
|
205
|
+
{
|
206
|
+
value_data_type: tensor.value_data_type,
|
207
|
+
buffer: tensor.buffer.data.to_s,
|
208
|
+
shape: tensor.shape,
|
209
|
+
strides: tensor.strides,
|
210
|
+
dimension_names: tensor.dimension_names,
|
211
|
+
})
|
212
|
+
end
|
213
|
+
|
214
|
+
test("Symbol, String, shape:, strides: - !contiguous and column major") do
|
215
|
+
data_type = :uint8
|
216
|
+
@shape[-1] -= 1 # Ignore the last element in @raw_tensor
|
217
|
+
@strides.reverse
|
218
|
+
tensor = Arrow::Tensor.new(data_type,
|
219
|
+
@raw_tensor.flatten.pack("C*"),
|
220
|
+
shape: @shape,
|
221
|
+
strides: @strides)
|
222
|
+
assert_equal({
|
223
|
+
value_data_type: Arrow::UInt8DataType.new,
|
224
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
225
|
+
shape: @shape,
|
226
|
+
strides: @strides,
|
227
|
+
dimension_names: ["", "", ""],
|
228
|
+
contiguous: false,
|
229
|
+
row_major: false,
|
230
|
+
column_major: false,
|
231
|
+
},
|
232
|
+
{
|
233
|
+
value_data_type: tensor.value_data_type,
|
234
|
+
buffer: tensor.buffer.data.to_s,
|
235
|
+
shape: tensor.shape,
|
236
|
+
strides: tensor.strides,
|
237
|
+
dimension_names: tensor.dimension_names,
|
238
|
+
contiguous: tensor.contiguous?,
|
239
|
+
row_major: tensor.row_major?,
|
240
|
+
column_major: tensor.column_major?,
|
241
|
+
})
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
19
246
|
sub_test_case("instance methods") do
|
20
247
|
def setup
|
21
248
|
raw_data = [
|
@@ -28,11 +255,11 @@ class TensorTest < Test::Unit::TestCase
|
|
28
255
|
9, 10,
|
29
256
|
11, 12,
|
30
257
|
]
|
31
|
-
data = Arrow::Buffer.new(raw_data.pack("c*"))
|
258
|
+
data = Arrow::Buffer.new(raw_data.pack("c*").freeze)
|
32
259
|
shape = [3, 2, 2]
|
33
260
|
strides = []
|
34
261
|
names = ["a", "b", "c"]
|
35
|
-
@tensor = Arrow::Tensor.new(
|
262
|
+
@tensor = Arrow::Tensor.new(:int8,
|
36
263
|
data,
|
37
264
|
shape,
|
38
265
|
strides,
|
@@ -52,5 +279,19 @@ class TensorTest < Test::Unit::TestCase
|
|
52
279
|
end
|
53
280
|
end
|
54
281
|
end
|
282
|
+
|
283
|
+
sub_test_case("#to_arrow_array") do
|
284
|
+
test("1 dimension") do
|
285
|
+
assert_equal(Arrow::UInt8Array.new([1, 2, 3]),
|
286
|
+
Arrow::Tensor.new([1, 2, 3]).to_arrow_array)
|
287
|
+
end
|
288
|
+
|
289
|
+
test("2 dimensions") do
|
290
|
+
message = "must be 1 dimensional tensor: [3, 1]"
|
291
|
+
assert_raise(RangeError.new(message)) do
|
292
|
+
Arrow::Tensor.new([[1], [2], [3]]).to_arrow_array
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
55
296
|
end
|
56
297
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-arrow
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 10.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Apache Arrow Developers
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|
@@ -163,6 +163,7 @@ files:
|
|
163
163
|
- lib/arrow/null-array.rb
|
164
164
|
- lib/arrow/path-extension.rb
|
165
165
|
- lib/arrow/raw-table-converter.rb
|
166
|
+
- lib/arrow/raw-tensor-converter.rb
|
166
167
|
- lib/arrow/record-batch-builder.rb
|
167
168
|
- lib/arrow/record-batch-file-reader.rb
|
168
169
|
- lib/arrow/record-batch-iterator.rb
|
@@ -307,8 +308,8 @@ homepage: https://arrow.apache.org/
|
|
307
308
|
licenses:
|
308
309
|
- Apache-2.0
|
309
310
|
metadata:
|
310
|
-
msys2_mingw_dependencies: arrow>=
|
311
|
-
post_install_message:
|
311
|
+
msys2_mingw_dependencies: arrow>=10.0.1
|
312
|
+
post_install_message:
|
312
313
|
rdoc_options: []
|
313
314
|
require_paths:
|
314
315
|
- lib
|
@@ -324,7 +325,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
324
325
|
version: '0'
|
325
326
|
requirements: []
|
326
327
|
rubygems_version: 3.3.15
|
327
|
-
signing_key:
|
328
|
+
signing_key:
|
328
329
|
specification_version: 4
|
329
330
|
summary: Red Arrow is the Ruby bindings of Apache Arrow
|
330
331
|
test_files:
|