red-arrow 8.0.0 → 10.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/arrow/converters.hpp +3 -0
- data/ext/arrow/extconf.rb +8 -4
- data/lib/arrow/array-builder.rb +40 -6
- data/lib/arrow/array-computable.rb +37 -0
- data/lib/arrow/array.rb +28 -0
- data/lib/arrow/chunked-array.rb +21 -0
- data/lib/arrow/column.rb +28 -0
- data/lib/arrow/data-type.rb +8 -3
- data/lib/arrow/decimal128-array-builder.rb +16 -6
- data/lib/arrow/decimal128.rb +14 -0
- data/lib/arrow/decimal256-array-builder.rb +16 -6
- data/lib/arrow/decimal256.rb +14 -0
- data/lib/arrow/field.rb +44 -3
- data/lib/arrow/list-data-type.rb +1 -6
- data/lib/arrow/loader.rb +3 -0
- data/lib/arrow/raw-table-converter.rb +6 -1
- data/lib/arrow/raw-tensor-converter.rb +89 -0
- data/lib/arrow/string-array-builder.rb +30 -0
- data/lib/arrow/tensor.rb +140 -0
- data/lib/arrow/time-unit.rb +31 -0
- data/lib/arrow/time32-array-builder.rb +2 -14
- data/lib/arrow/time32-data-type.rb +9 -38
- data/lib/arrow/time64-array-builder.rb +2 -14
- data/lib/arrow/time64-data-type.rb +9 -38
- data/lib/arrow/timestamp-array-builder.rb +2 -14
- data/lib/arrow/timestamp-data-type.rb +9 -34
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -1
- data/test/raw-records/test-dictionary-array.rb +341 -0
- data/test/test-array-builder.rb +62 -0
- data/test/test-chunked-array.rb +6 -0
- data/test/test-column.rb +31 -0
- data/test/test-decimal128-array-builder.rb +14 -0
- data/test/test-decimal128-array.rb +5 -2
- data/test/test-decimal128.rb +26 -2
- data/test/test-decimal256-array-builder.rb +14 -0
- data/test/test-decimal256-array.rb +5 -2
- data/test/test-decimal256.rb +26 -2
- data/test/test-field.rb +26 -0
- data/test/test-orc.rb +2 -2
- data/test/test-table.rb +16 -0
- data/test/test-tensor.rb +243 -2
- data/test/values/test-dictionary-array.rb +30 -0
- metadata +15 -9
data/test/test-table.rb
CHANGED
@@ -40,6 +40,22 @@ class TableTest < Test::Unit::TestCase
|
|
40
40
|
@table = Arrow::Table.new(schema, [@count_array, @visible_array])
|
41
41
|
end
|
42
42
|
|
43
|
+
sub_test_case(".new") do
|
44
|
+
test("{Symbol: Arrow::Tensor}") do
|
45
|
+
assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
|
46
|
+
Arrow::Table.new(numbers: Arrow::Tensor.new([1, 2, 3])))
|
47
|
+
end
|
48
|
+
|
49
|
+
test("{Symbol: #to_ary}") do
|
50
|
+
array_like = Object.new
|
51
|
+
def array_like.to_ary
|
52
|
+
[1, 2, 3]
|
53
|
+
end
|
54
|
+
assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
|
55
|
+
Arrow::Table.new(numbers: array_like))
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
43
59
|
test("#columns") do
|
44
60
|
assert_equal([
|
45
61
|
Arrow::Column.new(@table, 0),
|
data/test/test-tensor.rb
CHANGED
@@ -16,6 +16,233 @@
|
|
16
16
|
# under the License.
|
17
17
|
|
18
18
|
class TensorTest < Test::Unit::TestCase
|
19
|
+
sub_test_case("class methods") do
|
20
|
+
sub_test_case(".new") do
|
21
|
+
def setup
|
22
|
+
@raw_tensor = [
|
23
|
+
[
|
24
|
+
[1, 2, 3, 4],
|
25
|
+
[5, 6, 7, 8],
|
26
|
+
],
|
27
|
+
[
|
28
|
+
[9, 10, 11, 12],
|
29
|
+
[13, 14, 15, 16],
|
30
|
+
],
|
31
|
+
[
|
32
|
+
[17, 18, 19, 20],
|
33
|
+
[21, 22, 23, 24],
|
34
|
+
],
|
35
|
+
]
|
36
|
+
@shape = [3, 2, 4]
|
37
|
+
@strides = [8, 4, 1]
|
38
|
+
end
|
39
|
+
|
40
|
+
test("Array") do
|
41
|
+
tensor = Arrow::Tensor.new(@raw_tensor)
|
42
|
+
assert_equal({
|
43
|
+
value_data_type: Arrow::UInt8DataType.new,
|
44
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
45
|
+
shape: @shape,
|
46
|
+
strides: @strides,
|
47
|
+
dimension_names: ["", "", ""],
|
48
|
+
},
|
49
|
+
{
|
50
|
+
value_data_type: tensor.value_data_type,
|
51
|
+
buffer: tensor.buffer.data.to_s,
|
52
|
+
shape: tensor.shape,
|
53
|
+
strides: tensor.strides,
|
54
|
+
dimension_names: tensor.dimension_names,
|
55
|
+
})
|
56
|
+
end
|
57
|
+
|
58
|
+
test("Array, data_type: Symbol") do
|
59
|
+
tensor = Arrow::Tensor.new(@raw_tensor, data_type: :int32)
|
60
|
+
assert_equal({
|
61
|
+
value_data_type: Arrow::Int32DataType.new,
|
62
|
+
buffer: @raw_tensor.flatten.pack("l*"),
|
63
|
+
shape: @shape,
|
64
|
+
strides: @strides.collect {|x| x * 4},
|
65
|
+
dimension_names: ["", "", ""],
|
66
|
+
},
|
67
|
+
{
|
68
|
+
value_data_type: tensor.value_data_type,
|
69
|
+
buffer: tensor.buffer.data.to_s,
|
70
|
+
shape: tensor.shape,
|
71
|
+
strides: tensor.strides,
|
72
|
+
dimension_names: tensor.dimension_names,
|
73
|
+
})
|
74
|
+
end
|
75
|
+
|
76
|
+
test("Array, dimension_names: Array<String>") do
|
77
|
+
tensor = Arrow::Tensor.new(@raw_tensor,
|
78
|
+
dimension_names: ["a", "b", "c"])
|
79
|
+
assert_equal({
|
80
|
+
value_data_type: Arrow::UInt8DataType.new,
|
81
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
82
|
+
shape: @shape,
|
83
|
+
strides: @strides,
|
84
|
+
dimension_names: ["a", "b", "c"],
|
85
|
+
},
|
86
|
+
{
|
87
|
+
value_data_type: tensor.value_data_type,
|
88
|
+
buffer: tensor.buffer.data.to_s,
|
89
|
+
shape: tensor.shape,
|
90
|
+
strides: tensor.strides,
|
91
|
+
dimension_names: tensor.dimension_names,
|
92
|
+
})
|
93
|
+
end
|
94
|
+
|
95
|
+
test("Array, dimension_names: Array<Symbol>") do
|
96
|
+
tensor = Arrow::Tensor.new(@raw_tensor,
|
97
|
+
dimension_names: [:a, :b, :c])
|
98
|
+
assert_equal({
|
99
|
+
value_data_type: Arrow::UInt8DataType.new,
|
100
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
101
|
+
shape: @shape,
|
102
|
+
strides: @strides,
|
103
|
+
dimension_names: ["a", "b", "c"],
|
104
|
+
},
|
105
|
+
{
|
106
|
+
value_data_type: tensor.value_data_type,
|
107
|
+
buffer: tensor.buffer.data.to_s,
|
108
|
+
shape: tensor.shape,
|
109
|
+
strides: tensor.strides,
|
110
|
+
dimension_names: tensor.dimension_names,
|
111
|
+
})
|
112
|
+
end
|
113
|
+
|
114
|
+
test("Array, strides:") do
|
115
|
+
message = "strides: is only accepted with " +
|
116
|
+
"an Arrow::Buffer or String raw tensor: #{@strides.inspect}"
|
117
|
+
assert_raise(ArgumentError.new(message)) do
|
118
|
+
Arrow::Tensor.new(@raw_tensor, strides: @strides)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
test("Arrow::Buffer, data_type:, shape:") do
|
123
|
+
data_type = :uint8
|
124
|
+
data = Arrow::Buffer.new(@raw_tensor.flatten.pack("C*").freeze)
|
125
|
+
tensor = Arrow::Tensor.new(data,
|
126
|
+
data_type: data_type,
|
127
|
+
shape: @shape)
|
128
|
+
assert_equal({
|
129
|
+
value_data_type: Arrow::UInt8DataType.new,
|
130
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
131
|
+
shape: @shape,
|
132
|
+
strides: @strides,
|
133
|
+
dimension_names: ["", "", ""],
|
134
|
+
},
|
135
|
+
{
|
136
|
+
value_data_type: tensor.value_data_type,
|
137
|
+
buffer: tensor.buffer.data.to_s,
|
138
|
+
shape: tensor.shape,
|
139
|
+
strides: tensor.strides,
|
140
|
+
dimension_names: tensor.dimension_names,
|
141
|
+
})
|
142
|
+
end
|
143
|
+
|
144
|
+
test("String, data_type:, shape:") do
|
145
|
+
data_type = :uint8
|
146
|
+
data = @raw_tensor.flatten.pack("C*").freeze
|
147
|
+
tensor = Arrow::Tensor.new(data,
|
148
|
+
data_type: data_type,
|
149
|
+
shape: @shape)
|
150
|
+
assert_equal({
|
151
|
+
value_data_type: Arrow::UInt8DataType.new,
|
152
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
153
|
+
shape: @shape,
|
154
|
+
strides: @strides,
|
155
|
+
dimension_names: ["", "", ""],
|
156
|
+
},
|
157
|
+
{
|
158
|
+
value_data_type: tensor.value_data_type,
|
159
|
+
buffer: tensor.buffer.data.to_s,
|
160
|
+
shape: tensor.shape,
|
161
|
+
strides: tensor.strides,
|
162
|
+
dimension_names: tensor.dimension_names,
|
163
|
+
})
|
164
|
+
end
|
165
|
+
|
166
|
+
test("String, data_type:") do
|
167
|
+
data_type = :uint8
|
168
|
+
data = @raw_tensor.flatten.pack("C*").freeze
|
169
|
+
message = "shape: is missing: #{data.inspect}"
|
170
|
+
assert_raise(ArgumentError.new(message)) do
|
171
|
+
Arrow::Tensor.new(data, data_type: data_type)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
test("String, shape:") do
|
176
|
+
data = @raw_tensor.flatten.pack("C*").freeze
|
177
|
+
message = "data_type: is missing: #{data.inspect}"
|
178
|
+
assert_raise(ArgumentError.new(message)) do
|
179
|
+
Arrow::Tensor.new(data, shape: @shape)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
test("String - not ASCII-8BIT") do
|
184
|
+
data = "XXX"
|
185
|
+
message = "raw tensor String must be " +
|
186
|
+
"an ASCII-8BIT encoded string: #{data.encoding.inspect}"
|
187
|
+
assert_raise(ArgumentError.new(message)) do
|
188
|
+
Arrow::Tensor.new("XXX")
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
test("Symbol, Arrow::Buffer, shape:") do
|
193
|
+
data_type = :uint8
|
194
|
+
data = Arrow::Buffer.new(@raw_tensor.flatten.pack("C*").freeze)
|
195
|
+
tensor = Arrow::Tensor.new(data_type,
|
196
|
+
data,
|
197
|
+
shape: @shape)
|
198
|
+
assert_equal({
|
199
|
+
value_data_type: Arrow::UInt8DataType.new,
|
200
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
201
|
+
shape: @shape,
|
202
|
+
strides: @strides,
|
203
|
+
dimension_names: ["", "", ""],
|
204
|
+
},
|
205
|
+
{
|
206
|
+
value_data_type: tensor.value_data_type,
|
207
|
+
buffer: tensor.buffer.data.to_s,
|
208
|
+
shape: tensor.shape,
|
209
|
+
strides: tensor.strides,
|
210
|
+
dimension_names: tensor.dimension_names,
|
211
|
+
})
|
212
|
+
end
|
213
|
+
|
214
|
+
test("Symbol, String, shape:, strides: - !contiguous and column major") do
|
215
|
+
data_type = :uint8
|
216
|
+
@shape[-1] -= 1 # Ignore the last element in @raw_tensor
|
217
|
+
@strides.reverse
|
218
|
+
tensor = Arrow::Tensor.new(data_type,
|
219
|
+
@raw_tensor.flatten.pack("C*"),
|
220
|
+
shape: @shape,
|
221
|
+
strides: @strides)
|
222
|
+
assert_equal({
|
223
|
+
value_data_type: Arrow::UInt8DataType.new,
|
224
|
+
buffer: @raw_tensor.flatten.pack("C*"),
|
225
|
+
shape: @shape,
|
226
|
+
strides: @strides,
|
227
|
+
dimension_names: ["", "", ""],
|
228
|
+
contiguous: false,
|
229
|
+
row_major: false,
|
230
|
+
column_major: false,
|
231
|
+
},
|
232
|
+
{
|
233
|
+
value_data_type: tensor.value_data_type,
|
234
|
+
buffer: tensor.buffer.data.to_s,
|
235
|
+
shape: tensor.shape,
|
236
|
+
strides: tensor.strides,
|
237
|
+
dimension_names: tensor.dimension_names,
|
238
|
+
contiguous: tensor.contiguous?,
|
239
|
+
row_major: tensor.row_major?,
|
240
|
+
column_major: tensor.column_major?,
|
241
|
+
})
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
19
246
|
sub_test_case("instance methods") do
|
20
247
|
def setup
|
21
248
|
raw_data = [
|
@@ -28,11 +255,11 @@ class TensorTest < Test::Unit::TestCase
|
|
28
255
|
9, 10,
|
29
256
|
11, 12,
|
30
257
|
]
|
31
|
-
data = Arrow::Buffer.new(raw_data.pack("c*"))
|
258
|
+
data = Arrow::Buffer.new(raw_data.pack("c*").freeze)
|
32
259
|
shape = [3, 2, 2]
|
33
260
|
strides = []
|
34
261
|
names = ["a", "b", "c"]
|
35
|
-
@tensor = Arrow::Tensor.new(
|
262
|
+
@tensor = Arrow::Tensor.new(:int8,
|
36
263
|
data,
|
37
264
|
shape,
|
38
265
|
strides,
|
@@ -52,5 +279,19 @@ class TensorTest < Test::Unit::TestCase
|
|
52
279
|
end
|
53
280
|
end
|
54
281
|
end
|
282
|
+
|
283
|
+
sub_test_case("#to_arrow_array") do
|
284
|
+
test("1 dimension") do
|
285
|
+
assert_equal(Arrow::UInt8Array.new([1, 2, 3]),
|
286
|
+
Arrow::Tensor.new([1, 2, 3]).to_arrow_array)
|
287
|
+
end
|
288
|
+
|
289
|
+
test("2 dimensions") do
|
290
|
+
message = "must be 1 dimensional tensor: [3, 1]"
|
291
|
+
assert_raise(RangeError.new(message)) do
|
292
|
+
Arrow::Tensor.new([[1], [2], [3]]).to_arrow_array
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
55
296
|
end
|
56
297
|
end
|
@@ -276,6 +276,36 @@ module ValuesDictionaryArrayTests
|
|
276
276
|
target = build(Arrow::Decimal256Array.new(data_type, values))
|
277
277
|
assert_equal(values, target.values)
|
278
278
|
end
|
279
|
+
|
280
|
+
def test_month_interval
|
281
|
+
values = [
|
282
|
+
1,
|
283
|
+
nil,
|
284
|
+
12,
|
285
|
+
]
|
286
|
+
target = build(Arrow::MonthIntervalArray.new(values))
|
287
|
+
assert_equal(values, target.values)
|
288
|
+
end
|
289
|
+
|
290
|
+
def test_day_time_interval
|
291
|
+
values = [
|
292
|
+
{day: 1, millisecond: 100},
|
293
|
+
nil,
|
294
|
+
{day: 2, millisecond: 300},
|
295
|
+
]
|
296
|
+
target = build(Arrow::DayTimeIntervalArray.new(values))
|
297
|
+
assert_equal(values, target.values)
|
298
|
+
end
|
299
|
+
|
300
|
+
def test_month_day_nano_interval
|
301
|
+
values = [
|
302
|
+
{month: 1, day: 1, nanosecond: 100},
|
303
|
+
nil,
|
304
|
+
{month: 2, day: 3, nanosecond: 400},
|
305
|
+
]
|
306
|
+
target = build(Arrow::MonthDayNanoIntervalArray.new(values))
|
307
|
+
assert_equal(values, target.values)
|
308
|
+
end
|
279
309
|
end
|
280
310
|
|
281
311
|
class ValuesArrayDictionaryArrayTest < Test::Unit::TestCase
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-arrow
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 10.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Apache Arrow Developers
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 3.1.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 3.1.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: extpp
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -109,6 +109,7 @@ files:
|
|
109
109
|
- lib/arrow/aggregate-node-options.rb
|
110
110
|
- lib/arrow/aggregation.rb
|
111
111
|
- lib/arrow/array-builder.rb
|
112
|
+
- lib/arrow/array-computable.rb
|
112
113
|
- lib/arrow/array.rb
|
113
114
|
- lib/arrow/bigdecimal-extension.rb
|
114
115
|
- lib/arrow/binary-dictionary-array-builder.rb
|
@@ -162,6 +163,7 @@ files:
|
|
162
163
|
- lib/arrow/null-array.rb
|
163
164
|
- lib/arrow/path-extension.rb
|
164
165
|
- lib/arrow/raw-table-converter.rb
|
166
|
+
- lib/arrow/raw-tensor-converter.rb
|
165
167
|
- lib/arrow/record-batch-builder.rb
|
166
168
|
- lib/arrow/record-batch-file-reader.rb
|
167
169
|
- lib/arrow/record-batch-iterator.rb
|
@@ -179,6 +181,7 @@ files:
|
|
179
181
|
- lib/arrow/sort-options.rb
|
180
182
|
- lib/arrow/source-node-options.rb
|
181
183
|
- lib/arrow/sparse-union-data-type.rb
|
184
|
+
- lib/arrow/string-array-builder.rb
|
182
185
|
- lib/arrow/string-dictionary-array-builder.rb
|
183
186
|
- lib/arrow/struct-array-builder.rb
|
184
187
|
- lib/arrow/struct-array.rb
|
@@ -192,6 +195,7 @@ files:
|
|
192
195
|
- lib/arrow/table-table-formatter.rb
|
193
196
|
- lib/arrow/table.rb
|
194
197
|
- lib/arrow/tensor.rb
|
198
|
+
- lib/arrow/time-unit.rb
|
195
199
|
- lib/arrow/time.rb
|
196
200
|
- lib/arrow/time32-array-builder.rb
|
197
201
|
- lib/arrow/time32-array.rb
|
@@ -219,6 +223,7 @@ files:
|
|
219
223
|
- test/helper/omittable.rb
|
220
224
|
- test/raw-records/test-basic-arrays.rb
|
221
225
|
- test/raw-records/test-dense-union-array.rb
|
226
|
+
- test/raw-records/test-dictionary-array.rb
|
222
227
|
- test/raw-records/test-list-array.rb
|
223
228
|
- test/raw-records/test-map-array.rb
|
224
229
|
- test/raw-records/test-multiple-columns.rb
|
@@ -303,8 +308,8 @@ homepage: https://arrow.apache.org/
|
|
303
308
|
licenses:
|
304
309
|
- Apache-2.0
|
305
310
|
metadata:
|
306
|
-
msys2_mingw_dependencies: arrow>=
|
307
|
-
post_install_message:
|
311
|
+
msys2_mingw_dependencies: arrow>=10.0.0
|
312
|
+
post_install_message:
|
308
313
|
rdoc_options: []
|
309
314
|
require_paths:
|
310
315
|
- lib
|
@@ -319,8 +324,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
319
324
|
- !ruby/object:Gem::Version
|
320
325
|
version: '0'
|
321
326
|
requirements: []
|
322
|
-
rubygems_version: 3.
|
323
|
-
signing_key:
|
327
|
+
rubygems_version: 3.3.15
|
328
|
+
signing_key:
|
324
329
|
specification_version: 4
|
325
330
|
summary: Red Arrow is the Ruby bindings of Apache Arrow
|
326
331
|
test_files:
|
@@ -338,6 +343,7 @@ test_files:
|
|
338
343
|
- test/helper.rb
|
339
344
|
- test/raw-records/test-basic-arrays.rb
|
340
345
|
- test/raw-records/test-dense-union-array.rb
|
346
|
+
- test/raw-records/test-dictionary-array.rb
|
341
347
|
- test/raw-records/test-list-array.rb
|
342
348
|
- test/raw-records/test-map-array.rb
|
343
349
|
- test/raw-records/test-multiple-columns.rb
|