red-arrow 8.0.0 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +3 -0
  3. data/ext/arrow/extconf.rb +8 -4
  4. data/lib/arrow/array-builder.rb +40 -6
  5. data/lib/arrow/array-computable.rb +37 -0
  6. data/lib/arrow/array.rb +28 -0
  7. data/lib/arrow/chunked-array.rb +21 -0
  8. data/lib/arrow/column.rb +28 -0
  9. data/lib/arrow/data-type.rb +8 -3
  10. data/lib/arrow/decimal128-array-builder.rb +16 -6
  11. data/lib/arrow/decimal128.rb +14 -0
  12. data/lib/arrow/decimal256-array-builder.rb +16 -6
  13. data/lib/arrow/decimal256.rb +14 -0
  14. data/lib/arrow/field.rb +44 -3
  15. data/lib/arrow/list-data-type.rb +1 -6
  16. data/lib/arrow/loader.rb +3 -0
  17. data/lib/arrow/raw-table-converter.rb +6 -1
  18. data/lib/arrow/raw-tensor-converter.rb +89 -0
  19. data/lib/arrow/string-array-builder.rb +30 -0
  20. data/lib/arrow/tensor.rb +140 -0
  21. data/lib/arrow/time-unit.rb +31 -0
  22. data/lib/arrow/time32-array-builder.rb +2 -14
  23. data/lib/arrow/time32-data-type.rb +9 -38
  24. data/lib/arrow/time64-array-builder.rb +2 -14
  25. data/lib/arrow/time64-data-type.rb +9 -38
  26. data/lib/arrow/timestamp-array-builder.rb +2 -14
  27. data/lib/arrow/timestamp-data-type.rb +9 -34
  28. data/lib/arrow/version.rb +1 -1
  29. data/red-arrow.gemspec +1 -1
  30. data/test/raw-records/test-dictionary-array.rb +341 -0
  31. data/test/test-array-builder.rb +62 -0
  32. data/test/test-chunked-array.rb +6 -0
  33. data/test/test-column.rb +31 -0
  34. data/test/test-decimal128-array-builder.rb +14 -0
  35. data/test/test-decimal128-array.rb +5 -2
  36. data/test/test-decimal128.rb +26 -2
  37. data/test/test-decimal256-array-builder.rb +14 -0
  38. data/test/test-decimal256-array.rb +5 -2
  39. data/test/test-decimal256.rb +26 -2
  40. data/test/test-field.rb +26 -0
  41. data/test/test-orc.rb +2 -2
  42. data/test/test-table.rb +16 -0
  43. data/test/test-tensor.rb +243 -2
  44. data/test/values/test-dictionary-array.rb +30 -0
  45. metadata +15 -9
data/test/test-table.rb CHANGED
@@ -40,6 +40,22 @@ class TableTest < Test::Unit::TestCase
40
40
  @table = Arrow::Table.new(schema, [@count_array, @visible_array])
41
41
  end
42
42
 
43
+ sub_test_case(".new") do
44
+ test("{Symbol: Arrow::Tensor}") do
45
+ assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
46
+ Arrow::Table.new(numbers: Arrow::Tensor.new([1, 2, 3])))
47
+ end
48
+
49
+ test("{Symbol: #to_ary}") do
50
+ array_like = Object.new
51
+ def array_like.to_ary
52
+ [1, 2, 3]
53
+ end
54
+ assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
55
+ Arrow::Table.new(numbers: array_like))
56
+ end
57
+ end
58
+
43
59
  test("#columns") do
44
60
  assert_equal([
45
61
  Arrow::Column.new(@table, 0),
data/test/test-tensor.rb CHANGED
@@ -16,6 +16,233 @@
16
16
  # under the License.
17
17
 
18
18
  class TensorTest < Test::Unit::TestCase
19
+ sub_test_case("class methods") do
20
+ sub_test_case(".new") do
21
+ def setup
22
+ @raw_tensor = [
23
+ [
24
+ [1, 2, 3, 4],
25
+ [5, 6, 7, 8],
26
+ ],
27
+ [
28
+ [9, 10, 11, 12],
29
+ [13, 14, 15, 16],
30
+ ],
31
+ [
32
+ [17, 18, 19, 20],
33
+ [21, 22, 23, 24],
34
+ ],
35
+ ]
36
+ @shape = [3, 2, 4]
37
+ @strides = [8, 4, 1]
38
+ end
39
+
40
+ test("Array") do
41
+ tensor = Arrow::Tensor.new(@raw_tensor)
42
+ assert_equal({
43
+ value_data_type: Arrow::UInt8DataType.new,
44
+ buffer: @raw_tensor.flatten.pack("C*"),
45
+ shape: @shape,
46
+ strides: @strides,
47
+ dimension_names: ["", "", ""],
48
+ },
49
+ {
50
+ value_data_type: tensor.value_data_type,
51
+ buffer: tensor.buffer.data.to_s,
52
+ shape: tensor.shape,
53
+ strides: tensor.strides,
54
+ dimension_names: tensor.dimension_names,
55
+ })
56
+ end
57
+
58
+ test("Array, data_type: Symbol") do
59
+ tensor = Arrow::Tensor.new(@raw_tensor, data_type: :int32)
60
+ assert_equal({
61
+ value_data_type: Arrow::Int32DataType.new,
62
+ buffer: @raw_tensor.flatten.pack("l*"),
63
+ shape: @shape,
64
+ strides: @strides.collect {|x| x * 4},
65
+ dimension_names: ["", "", ""],
66
+ },
67
+ {
68
+ value_data_type: tensor.value_data_type,
69
+ buffer: tensor.buffer.data.to_s,
70
+ shape: tensor.shape,
71
+ strides: tensor.strides,
72
+ dimension_names: tensor.dimension_names,
73
+ })
74
+ end
75
+
76
+ test("Array, dimension_names: Array<String>") do
77
+ tensor = Arrow::Tensor.new(@raw_tensor,
78
+ dimension_names: ["a", "b", "c"])
79
+ assert_equal({
80
+ value_data_type: Arrow::UInt8DataType.new,
81
+ buffer: @raw_tensor.flatten.pack("C*"),
82
+ shape: @shape,
83
+ strides: @strides,
84
+ dimension_names: ["a", "b", "c"],
85
+ },
86
+ {
87
+ value_data_type: tensor.value_data_type,
88
+ buffer: tensor.buffer.data.to_s,
89
+ shape: tensor.shape,
90
+ strides: tensor.strides,
91
+ dimension_names: tensor.dimension_names,
92
+ })
93
+ end
94
+
95
+ test("Array, dimension_names: Array<Symbol>") do
96
+ tensor = Arrow::Tensor.new(@raw_tensor,
97
+ dimension_names: [:a, :b, :c])
98
+ assert_equal({
99
+ value_data_type: Arrow::UInt8DataType.new,
100
+ buffer: @raw_tensor.flatten.pack("C*"),
101
+ shape: @shape,
102
+ strides: @strides,
103
+ dimension_names: ["a", "b", "c"],
104
+ },
105
+ {
106
+ value_data_type: tensor.value_data_type,
107
+ buffer: tensor.buffer.data.to_s,
108
+ shape: tensor.shape,
109
+ strides: tensor.strides,
110
+ dimension_names: tensor.dimension_names,
111
+ })
112
+ end
113
+
114
+ test("Array, strides:") do
115
+ message = "strides: is only accepted with " +
116
+ "an Arrow::Buffer or String raw tensor: #{@strides.inspect}"
117
+ assert_raise(ArgumentError.new(message)) do
118
+ Arrow::Tensor.new(@raw_tensor, strides: @strides)
119
+ end
120
+ end
121
+
122
+ test("Arrow::Buffer, data_type:, shape:") do
123
+ data_type = :uint8
124
+ data = Arrow::Buffer.new(@raw_tensor.flatten.pack("C*").freeze)
125
+ tensor = Arrow::Tensor.new(data,
126
+ data_type: data_type,
127
+ shape: @shape)
128
+ assert_equal({
129
+ value_data_type: Arrow::UInt8DataType.new,
130
+ buffer: @raw_tensor.flatten.pack("C*"),
131
+ shape: @shape,
132
+ strides: @strides,
133
+ dimension_names: ["", "", ""],
134
+ },
135
+ {
136
+ value_data_type: tensor.value_data_type,
137
+ buffer: tensor.buffer.data.to_s,
138
+ shape: tensor.shape,
139
+ strides: tensor.strides,
140
+ dimension_names: tensor.dimension_names,
141
+ })
142
+ end
143
+
144
+ test("String, data_type:, shape:") do
145
+ data_type = :uint8
146
+ data = @raw_tensor.flatten.pack("C*").freeze
147
+ tensor = Arrow::Tensor.new(data,
148
+ data_type: data_type,
149
+ shape: @shape)
150
+ assert_equal({
151
+ value_data_type: Arrow::UInt8DataType.new,
152
+ buffer: @raw_tensor.flatten.pack("C*"),
153
+ shape: @shape,
154
+ strides: @strides,
155
+ dimension_names: ["", "", ""],
156
+ },
157
+ {
158
+ value_data_type: tensor.value_data_type,
159
+ buffer: tensor.buffer.data.to_s,
160
+ shape: tensor.shape,
161
+ strides: tensor.strides,
162
+ dimension_names: tensor.dimension_names,
163
+ })
164
+ end
165
+
166
+ test("String, data_type:") do
167
+ data_type = :uint8
168
+ data = @raw_tensor.flatten.pack("C*").freeze
169
+ message = "shape: is missing: #{data.inspect}"
170
+ assert_raise(ArgumentError.new(message)) do
171
+ Arrow::Tensor.new(data, data_type: data_type)
172
+ end
173
+ end
174
+
175
+ test("String, shape:") do
176
+ data = @raw_tensor.flatten.pack("C*").freeze
177
+ message = "data_type: is missing: #{data.inspect}"
178
+ assert_raise(ArgumentError.new(message)) do
179
+ Arrow::Tensor.new(data, shape: @shape)
180
+ end
181
+ end
182
+
183
+ test("String - not ASCII-8BIT") do
184
+ data = "XXX"
185
+ message = "raw tensor String must be " +
186
+ "an ASCII-8BIT encoded string: #{data.encoding.inspect}"
187
+ assert_raise(ArgumentError.new(message)) do
188
+ Arrow::Tensor.new("XXX")
189
+ end
190
+ end
191
+
192
+ test("Symbol, Arrow::Buffer, shape:") do
193
+ data_type = :uint8
194
+ data = Arrow::Buffer.new(@raw_tensor.flatten.pack("C*").freeze)
195
+ tensor = Arrow::Tensor.new(data_type,
196
+ data,
197
+ shape: @shape)
198
+ assert_equal({
199
+ value_data_type: Arrow::UInt8DataType.new,
200
+ buffer: @raw_tensor.flatten.pack("C*"),
201
+ shape: @shape,
202
+ strides: @strides,
203
+ dimension_names: ["", "", ""],
204
+ },
205
+ {
206
+ value_data_type: tensor.value_data_type,
207
+ buffer: tensor.buffer.data.to_s,
208
+ shape: tensor.shape,
209
+ strides: tensor.strides,
210
+ dimension_names: tensor.dimension_names,
211
+ })
212
+ end
213
+
214
+ test("Symbol, String, shape:, strides: - !contiguous and column major") do
215
+ data_type = :uint8
216
+ @shape[-1] -= 1 # Ignore the last element in @raw_tensor
217
+ @strides.reverse
218
+ tensor = Arrow::Tensor.new(data_type,
219
+ @raw_tensor.flatten.pack("C*"),
220
+ shape: @shape,
221
+ strides: @strides)
222
+ assert_equal({
223
+ value_data_type: Arrow::UInt8DataType.new,
224
+ buffer: @raw_tensor.flatten.pack("C*"),
225
+ shape: @shape,
226
+ strides: @strides,
227
+ dimension_names: ["", "", ""],
228
+ contiguous: false,
229
+ row_major: false,
230
+ column_major: false,
231
+ },
232
+ {
233
+ value_data_type: tensor.value_data_type,
234
+ buffer: tensor.buffer.data.to_s,
235
+ shape: tensor.shape,
236
+ strides: tensor.strides,
237
+ dimension_names: tensor.dimension_names,
238
+ contiguous: tensor.contiguous?,
239
+ row_major: tensor.row_major?,
240
+ column_major: tensor.column_major?,
241
+ })
242
+ end
243
+ end
244
+ end
245
+
19
246
  sub_test_case("instance methods") do
20
247
  def setup
21
248
  raw_data = [
@@ -28,11 +255,11 @@ class TensorTest < Test::Unit::TestCase
28
255
  9, 10,
29
256
  11, 12,
30
257
  ]
31
- data = Arrow::Buffer.new(raw_data.pack("c*"))
258
+ data = Arrow::Buffer.new(raw_data.pack("c*").freeze)
32
259
  shape = [3, 2, 2]
33
260
  strides = []
34
261
  names = ["a", "b", "c"]
35
- @tensor = Arrow::Tensor.new(Arrow::Int8DataType.new,
262
+ @tensor = Arrow::Tensor.new(:int8,
36
263
  data,
37
264
  shape,
38
265
  strides,
@@ -52,5 +279,19 @@ class TensorTest < Test::Unit::TestCase
52
279
  end
53
280
  end
54
281
  end
282
+
283
+ sub_test_case("#to_arrow_array") do
284
+ test("1 dimension") do
285
+ assert_equal(Arrow::UInt8Array.new([1, 2, 3]),
286
+ Arrow::Tensor.new([1, 2, 3]).to_arrow_array)
287
+ end
288
+
289
+ test("2 dimensions") do
290
+ message = "must be 1 dimensional tensor: [3, 1]"
291
+ assert_raise(RangeError.new(message)) do
292
+ Arrow::Tensor.new([[1], [2], [3]]).to_arrow_array
293
+ end
294
+ end
295
+ end
55
296
  end
56
297
  end
@@ -276,6 +276,36 @@ module ValuesDictionaryArrayTests
276
276
  target = build(Arrow::Decimal256Array.new(data_type, values))
277
277
  assert_equal(values, target.values)
278
278
  end
279
+
280
+ def test_month_interval
281
+ values = [
282
+ 1,
283
+ nil,
284
+ 12,
285
+ ]
286
+ target = build(Arrow::MonthIntervalArray.new(values))
287
+ assert_equal(values, target.values)
288
+ end
289
+
290
+ def test_day_time_interval
291
+ values = [
292
+ {day: 1, millisecond: 100},
293
+ nil,
294
+ {day: 2, millisecond: 300},
295
+ ]
296
+ target = build(Arrow::DayTimeIntervalArray.new(values))
297
+ assert_equal(values, target.values)
298
+ end
299
+
300
+ def test_month_day_nano_interval
301
+ values = [
302
+ {month: 1, day: 1, nanosecond: 100},
303
+ nil,
304
+ {month: 2, day: 3, nanosecond: 400},
305
+ ]
306
+ target = build(Arrow::MonthDayNanoIntervalArray.new(values))
307
+ assert_equal(values, target.values)
308
+ end
279
309
  end
280
310
 
281
311
  class ValuesArrayDictionaryArrayTest < Test::Unit::TestCase
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.0.0
4
+ version: 10.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-07 00:00:00.000000000 Z
11
+ date: 2022-11-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 2.0.3
19
+ version: 3.1.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 2.0.3
26
+ version: 3.1.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: extpp
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -109,6 +109,7 @@ files:
109
109
  - lib/arrow/aggregate-node-options.rb
110
110
  - lib/arrow/aggregation.rb
111
111
  - lib/arrow/array-builder.rb
112
+ - lib/arrow/array-computable.rb
112
113
  - lib/arrow/array.rb
113
114
  - lib/arrow/bigdecimal-extension.rb
114
115
  - lib/arrow/binary-dictionary-array-builder.rb
@@ -162,6 +163,7 @@ files:
162
163
  - lib/arrow/null-array.rb
163
164
  - lib/arrow/path-extension.rb
164
165
  - lib/arrow/raw-table-converter.rb
166
+ - lib/arrow/raw-tensor-converter.rb
165
167
  - lib/arrow/record-batch-builder.rb
166
168
  - lib/arrow/record-batch-file-reader.rb
167
169
  - lib/arrow/record-batch-iterator.rb
@@ -179,6 +181,7 @@ files:
179
181
  - lib/arrow/sort-options.rb
180
182
  - lib/arrow/source-node-options.rb
181
183
  - lib/arrow/sparse-union-data-type.rb
184
+ - lib/arrow/string-array-builder.rb
182
185
  - lib/arrow/string-dictionary-array-builder.rb
183
186
  - lib/arrow/struct-array-builder.rb
184
187
  - lib/arrow/struct-array.rb
@@ -192,6 +195,7 @@ files:
192
195
  - lib/arrow/table-table-formatter.rb
193
196
  - lib/arrow/table.rb
194
197
  - lib/arrow/tensor.rb
198
+ - lib/arrow/time-unit.rb
195
199
  - lib/arrow/time.rb
196
200
  - lib/arrow/time32-array-builder.rb
197
201
  - lib/arrow/time32-array.rb
@@ -219,6 +223,7 @@ files:
219
223
  - test/helper/omittable.rb
220
224
  - test/raw-records/test-basic-arrays.rb
221
225
  - test/raw-records/test-dense-union-array.rb
226
+ - test/raw-records/test-dictionary-array.rb
222
227
  - test/raw-records/test-list-array.rb
223
228
  - test/raw-records/test-map-array.rb
224
229
  - test/raw-records/test-multiple-columns.rb
@@ -303,8 +308,8 @@ homepage: https://arrow.apache.org/
303
308
  licenses:
304
309
  - Apache-2.0
305
310
  metadata:
306
- msys2_mingw_dependencies: arrow>=8.0.0
307
- post_install_message:
311
+ msys2_mingw_dependencies: arrow>=10.0.0
312
+ post_install_message:
308
313
  rdoc_options: []
309
314
  require_paths:
310
315
  - lib
@@ -319,8 +324,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
319
324
  - !ruby/object:Gem::Version
320
325
  version: '0'
321
326
  requirements: []
322
- rubygems_version: 3.4.0.dev
323
- signing_key:
327
+ rubygems_version: 3.3.15
328
+ signing_key:
324
329
  specification_version: 4
325
330
  summary: Red Arrow is the Ruby bindings of Apache Arrow
326
331
  test_files:
@@ -338,6 +343,7 @@ test_files:
338
343
  - test/helper.rb
339
344
  - test/raw-records/test-basic-arrays.rb
340
345
  - test/raw-records/test-dense-union-array.rb
346
+ - test/raw-records/test-dictionary-array.rb
341
347
  - test/raw-records/test-list-array.rb
342
348
  - test/raw-records/test-map-array.rb
343
349
  - test/raw-records/test-multiple-columns.rb