red-arrow 8.0.0 → 10.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +3 -0
  3. data/ext/arrow/extconf.rb +8 -4
  4. data/lib/arrow/array-builder.rb +40 -6
  5. data/lib/arrow/array-computable.rb +37 -0
  6. data/lib/arrow/array.rb +28 -0
  7. data/lib/arrow/chunked-array.rb +21 -0
  8. data/lib/arrow/column.rb +28 -0
  9. data/lib/arrow/data-type.rb +8 -3
  10. data/lib/arrow/decimal128-array-builder.rb +16 -6
  11. data/lib/arrow/decimal128.rb +14 -0
  12. data/lib/arrow/decimal256-array-builder.rb +16 -6
  13. data/lib/arrow/decimal256.rb +14 -0
  14. data/lib/arrow/field.rb +44 -3
  15. data/lib/arrow/list-data-type.rb +1 -6
  16. data/lib/arrow/loader.rb +3 -0
  17. data/lib/arrow/raw-table-converter.rb +6 -1
  18. data/lib/arrow/raw-tensor-converter.rb +89 -0
  19. data/lib/arrow/string-array-builder.rb +30 -0
  20. data/lib/arrow/tensor.rb +140 -0
  21. data/lib/arrow/time-unit.rb +31 -0
  22. data/lib/arrow/time32-array-builder.rb +2 -14
  23. data/lib/arrow/time32-data-type.rb +9 -38
  24. data/lib/arrow/time64-array-builder.rb +2 -14
  25. data/lib/arrow/time64-data-type.rb +9 -38
  26. data/lib/arrow/timestamp-array-builder.rb +2 -14
  27. data/lib/arrow/timestamp-data-type.rb +9 -34
  28. data/lib/arrow/version.rb +1 -1
  29. data/red-arrow.gemspec +1 -1
  30. data/test/raw-records/test-dictionary-array.rb +341 -0
  31. data/test/test-array-builder.rb +62 -0
  32. data/test/test-chunked-array.rb +6 -0
  33. data/test/test-column.rb +31 -0
  34. data/test/test-decimal128-array-builder.rb +14 -0
  35. data/test/test-decimal128-array.rb +5 -2
  36. data/test/test-decimal128.rb +26 -2
  37. data/test/test-decimal256-array-builder.rb +14 -0
  38. data/test/test-decimal256-array.rb +5 -2
  39. data/test/test-decimal256.rb +26 -2
  40. data/test/test-field.rb +26 -0
  41. data/test/test-orc.rb +2 -2
  42. data/test/test-table.rb +16 -0
  43. data/test/test-tensor.rb +243 -2
  44. data/test/values/test-dictionary-array.rb +30 -0
  45. metadata +15 -9
data/test/test-table.rb CHANGED
@@ -40,6 +40,22 @@ class TableTest < Test::Unit::TestCase
40
40
  @table = Arrow::Table.new(schema, [@count_array, @visible_array])
41
41
  end
42
42
 
43
+ sub_test_case(".new") do
44
+ test("{Symbol: Arrow::Tensor}") do
45
+ assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
46
+ Arrow::Table.new(numbers: Arrow::Tensor.new([1, 2, 3])))
47
+ end
48
+
49
+ test("{Symbol: #to_ary}") do
50
+ array_like = Object.new
51
+ def array_like.to_ary
52
+ [1, 2, 3]
53
+ end
54
+ assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
55
+ Arrow::Table.new(numbers: array_like))
56
+ end
57
+ end
58
+
43
59
  test("#columns") do
44
60
  assert_equal([
45
61
  Arrow::Column.new(@table, 0),
data/test/test-tensor.rb CHANGED
@@ -16,6 +16,233 @@
16
16
  # under the License.
17
17
 
18
18
  class TensorTest < Test::Unit::TestCase
19
+ sub_test_case("class methods") do
20
+ sub_test_case(".new") do
21
+ def setup
22
+ @raw_tensor = [
23
+ [
24
+ [1, 2, 3, 4],
25
+ [5, 6, 7, 8],
26
+ ],
27
+ [
28
+ [9, 10, 11, 12],
29
+ [13, 14, 15, 16],
30
+ ],
31
+ [
32
+ [17, 18, 19, 20],
33
+ [21, 22, 23, 24],
34
+ ],
35
+ ]
36
+ @shape = [3, 2, 4]
37
+ @strides = [8, 4, 1]
38
+ end
39
+
40
+ test("Array") do
41
+ tensor = Arrow::Tensor.new(@raw_tensor)
42
+ assert_equal({
43
+ value_data_type: Arrow::UInt8DataType.new,
44
+ buffer: @raw_tensor.flatten.pack("C*"),
45
+ shape: @shape,
46
+ strides: @strides,
47
+ dimension_names: ["", "", ""],
48
+ },
49
+ {
50
+ value_data_type: tensor.value_data_type,
51
+ buffer: tensor.buffer.data.to_s,
52
+ shape: tensor.shape,
53
+ strides: tensor.strides,
54
+ dimension_names: tensor.dimension_names,
55
+ })
56
+ end
57
+
58
+ test("Array, data_type: Symbol") do
59
+ tensor = Arrow::Tensor.new(@raw_tensor, data_type: :int32)
60
+ assert_equal({
61
+ value_data_type: Arrow::Int32DataType.new,
62
+ buffer: @raw_tensor.flatten.pack("l*"),
63
+ shape: @shape,
64
+ strides: @strides.collect {|x| x * 4},
65
+ dimension_names: ["", "", ""],
66
+ },
67
+ {
68
+ value_data_type: tensor.value_data_type,
69
+ buffer: tensor.buffer.data.to_s,
70
+ shape: tensor.shape,
71
+ strides: tensor.strides,
72
+ dimension_names: tensor.dimension_names,
73
+ })
74
+ end
75
+
76
+ test("Array, dimension_names: Array<String>") do
77
+ tensor = Arrow::Tensor.new(@raw_tensor,
78
+ dimension_names: ["a", "b", "c"])
79
+ assert_equal({
80
+ value_data_type: Arrow::UInt8DataType.new,
81
+ buffer: @raw_tensor.flatten.pack("C*"),
82
+ shape: @shape,
83
+ strides: @strides,
84
+ dimension_names: ["a", "b", "c"],
85
+ },
86
+ {
87
+ value_data_type: tensor.value_data_type,
88
+ buffer: tensor.buffer.data.to_s,
89
+ shape: tensor.shape,
90
+ strides: tensor.strides,
91
+ dimension_names: tensor.dimension_names,
92
+ })
93
+ end
94
+
95
+ test("Array, dimension_names: Array<Symbol>") do
96
+ tensor = Arrow::Tensor.new(@raw_tensor,
97
+ dimension_names: [:a, :b, :c])
98
+ assert_equal({
99
+ value_data_type: Arrow::UInt8DataType.new,
100
+ buffer: @raw_tensor.flatten.pack("C*"),
101
+ shape: @shape,
102
+ strides: @strides,
103
+ dimension_names: ["a", "b", "c"],
104
+ },
105
+ {
106
+ value_data_type: tensor.value_data_type,
107
+ buffer: tensor.buffer.data.to_s,
108
+ shape: tensor.shape,
109
+ strides: tensor.strides,
110
+ dimension_names: tensor.dimension_names,
111
+ })
112
+ end
113
+
114
+ test("Array, strides:") do
115
+ message = "strides: is only accepted with " +
116
+ "an Arrow::Buffer or String raw tensor: #{@strides.inspect}"
117
+ assert_raise(ArgumentError.new(message)) do
118
+ Arrow::Tensor.new(@raw_tensor, strides: @strides)
119
+ end
120
+ end
121
+
122
+ test("Arrow::Buffer, data_type:, shape:") do
123
+ data_type = :uint8
124
+ data = Arrow::Buffer.new(@raw_tensor.flatten.pack("C*").freeze)
125
+ tensor = Arrow::Tensor.new(data,
126
+ data_type: data_type,
127
+ shape: @shape)
128
+ assert_equal({
129
+ value_data_type: Arrow::UInt8DataType.new,
130
+ buffer: @raw_tensor.flatten.pack("C*"),
131
+ shape: @shape,
132
+ strides: @strides,
133
+ dimension_names: ["", "", ""],
134
+ },
135
+ {
136
+ value_data_type: tensor.value_data_type,
137
+ buffer: tensor.buffer.data.to_s,
138
+ shape: tensor.shape,
139
+ strides: tensor.strides,
140
+ dimension_names: tensor.dimension_names,
141
+ })
142
+ end
143
+
144
+ test("String, data_type:, shape:") do
145
+ data_type = :uint8
146
+ data = @raw_tensor.flatten.pack("C*").freeze
147
+ tensor = Arrow::Tensor.new(data,
148
+ data_type: data_type,
149
+ shape: @shape)
150
+ assert_equal({
151
+ value_data_type: Arrow::UInt8DataType.new,
152
+ buffer: @raw_tensor.flatten.pack("C*"),
153
+ shape: @shape,
154
+ strides: @strides,
155
+ dimension_names: ["", "", ""],
156
+ },
157
+ {
158
+ value_data_type: tensor.value_data_type,
159
+ buffer: tensor.buffer.data.to_s,
160
+ shape: tensor.shape,
161
+ strides: tensor.strides,
162
+ dimension_names: tensor.dimension_names,
163
+ })
164
+ end
165
+
166
+ test("String, data_type:") do
167
+ data_type = :uint8
168
+ data = @raw_tensor.flatten.pack("C*").freeze
169
+ message = "shape: is missing: #{data.inspect}"
170
+ assert_raise(ArgumentError.new(message)) do
171
+ Arrow::Tensor.new(data, data_type: data_type)
172
+ end
173
+ end
174
+
175
+ test("String, shape:") do
176
+ data = @raw_tensor.flatten.pack("C*").freeze
177
+ message = "data_type: is missing: #{data.inspect}"
178
+ assert_raise(ArgumentError.new(message)) do
179
+ Arrow::Tensor.new(data, shape: @shape)
180
+ end
181
+ end
182
+
183
+ test("String - not ASCII-8BIT") do
184
+ data = "XXX"
185
+ message = "raw tensor String must be " +
186
+ "an ASCII-8BIT encoded string: #{data.encoding.inspect}"
187
+ assert_raise(ArgumentError.new(message)) do
188
+ Arrow::Tensor.new("XXX")
189
+ end
190
+ end
191
+
192
+ test("Symbol, Arrow::Buffer, shape:") do
193
+ data_type = :uint8
194
+ data = Arrow::Buffer.new(@raw_tensor.flatten.pack("C*").freeze)
195
+ tensor = Arrow::Tensor.new(data_type,
196
+ data,
197
+ shape: @shape)
198
+ assert_equal({
199
+ value_data_type: Arrow::UInt8DataType.new,
200
+ buffer: @raw_tensor.flatten.pack("C*"),
201
+ shape: @shape,
202
+ strides: @strides,
203
+ dimension_names: ["", "", ""],
204
+ },
205
+ {
206
+ value_data_type: tensor.value_data_type,
207
+ buffer: tensor.buffer.data.to_s,
208
+ shape: tensor.shape,
209
+ strides: tensor.strides,
210
+ dimension_names: tensor.dimension_names,
211
+ })
212
+ end
213
+
214
+ test("Symbol, String, shape:, strides: - !contiguous and column major") do
215
+ data_type = :uint8
216
+ @shape[-1] -= 1 # Ignore the last element in @raw_tensor
217
+ @strides.reverse
218
+ tensor = Arrow::Tensor.new(data_type,
219
+ @raw_tensor.flatten.pack("C*"),
220
+ shape: @shape,
221
+ strides: @strides)
222
+ assert_equal({
223
+ value_data_type: Arrow::UInt8DataType.new,
224
+ buffer: @raw_tensor.flatten.pack("C*"),
225
+ shape: @shape,
226
+ strides: @strides,
227
+ dimension_names: ["", "", ""],
228
+ contiguous: false,
229
+ row_major: false,
230
+ column_major: false,
231
+ },
232
+ {
233
+ value_data_type: tensor.value_data_type,
234
+ buffer: tensor.buffer.data.to_s,
235
+ shape: tensor.shape,
236
+ strides: tensor.strides,
237
+ dimension_names: tensor.dimension_names,
238
+ contiguous: tensor.contiguous?,
239
+ row_major: tensor.row_major?,
240
+ column_major: tensor.column_major?,
241
+ })
242
+ end
243
+ end
244
+ end
245
+
19
246
  sub_test_case("instance methods") do
20
247
  def setup
21
248
  raw_data = [
@@ -28,11 +255,11 @@ class TensorTest < Test::Unit::TestCase
28
255
  9, 10,
29
256
  11, 12,
30
257
  ]
31
- data = Arrow::Buffer.new(raw_data.pack("c*"))
258
+ data = Arrow::Buffer.new(raw_data.pack("c*").freeze)
32
259
  shape = [3, 2, 2]
33
260
  strides = []
34
261
  names = ["a", "b", "c"]
35
- @tensor = Arrow::Tensor.new(Arrow::Int8DataType.new,
262
+ @tensor = Arrow::Tensor.new(:int8,
36
263
  data,
37
264
  shape,
38
265
  strides,
@@ -52,5 +279,19 @@ class TensorTest < Test::Unit::TestCase
52
279
  end
53
280
  end
54
281
  end
282
+
283
+ sub_test_case("#to_arrow_array") do
284
+ test("1 dimension") do
285
+ assert_equal(Arrow::UInt8Array.new([1, 2, 3]),
286
+ Arrow::Tensor.new([1, 2, 3]).to_arrow_array)
287
+ end
288
+
289
+ test("2 dimensions") do
290
+ message = "must be 1 dimensional tensor: [3, 1]"
291
+ assert_raise(RangeError.new(message)) do
292
+ Arrow::Tensor.new([[1], [2], [3]]).to_arrow_array
293
+ end
294
+ end
295
+ end
55
296
  end
56
297
  end
@@ -276,6 +276,36 @@ module ValuesDictionaryArrayTests
276
276
  target = build(Arrow::Decimal256Array.new(data_type, values))
277
277
  assert_equal(values, target.values)
278
278
  end
279
+
280
+ def test_month_interval
281
+ values = [
282
+ 1,
283
+ nil,
284
+ 12,
285
+ ]
286
+ target = build(Arrow::MonthIntervalArray.new(values))
287
+ assert_equal(values, target.values)
288
+ end
289
+
290
+ def test_day_time_interval
291
+ values = [
292
+ {day: 1, millisecond: 100},
293
+ nil,
294
+ {day: 2, millisecond: 300},
295
+ ]
296
+ target = build(Arrow::DayTimeIntervalArray.new(values))
297
+ assert_equal(values, target.values)
298
+ end
299
+
300
+ def test_month_day_nano_interval
301
+ values = [
302
+ {month: 1, day: 1, nanosecond: 100},
303
+ nil,
304
+ {month: 2, day: 3, nanosecond: 400},
305
+ ]
306
+ target = build(Arrow::MonthDayNanoIntervalArray.new(values))
307
+ assert_equal(values, target.values)
308
+ end
279
309
  end
280
310
 
281
311
  class ValuesArrayDictionaryArrayTest < Test::Unit::TestCase
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.0.0
4
+ version: 10.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-07 00:00:00.000000000 Z
11
+ date: 2022-11-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 2.0.3
19
+ version: 3.1.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 2.0.3
26
+ version: 3.1.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: extpp
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -109,6 +109,7 @@ files:
109
109
  - lib/arrow/aggregate-node-options.rb
110
110
  - lib/arrow/aggregation.rb
111
111
  - lib/arrow/array-builder.rb
112
+ - lib/arrow/array-computable.rb
112
113
  - lib/arrow/array.rb
113
114
  - lib/arrow/bigdecimal-extension.rb
114
115
  - lib/arrow/binary-dictionary-array-builder.rb
@@ -162,6 +163,7 @@ files:
162
163
  - lib/arrow/null-array.rb
163
164
  - lib/arrow/path-extension.rb
164
165
  - lib/arrow/raw-table-converter.rb
166
+ - lib/arrow/raw-tensor-converter.rb
165
167
  - lib/arrow/record-batch-builder.rb
166
168
  - lib/arrow/record-batch-file-reader.rb
167
169
  - lib/arrow/record-batch-iterator.rb
@@ -179,6 +181,7 @@ files:
179
181
  - lib/arrow/sort-options.rb
180
182
  - lib/arrow/source-node-options.rb
181
183
  - lib/arrow/sparse-union-data-type.rb
184
+ - lib/arrow/string-array-builder.rb
182
185
  - lib/arrow/string-dictionary-array-builder.rb
183
186
  - lib/arrow/struct-array-builder.rb
184
187
  - lib/arrow/struct-array.rb
@@ -192,6 +195,7 @@ files:
192
195
  - lib/arrow/table-table-formatter.rb
193
196
  - lib/arrow/table.rb
194
197
  - lib/arrow/tensor.rb
198
+ - lib/arrow/time-unit.rb
195
199
  - lib/arrow/time.rb
196
200
  - lib/arrow/time32-array-builder.rb
197
201
  - lib/arrow/time32-array.rb
@@ -219,6 +223,7 @@ files:
219
223
  - test/helper/omittable.rb
220
224
  - test/raw-records/test-basic-arrays.rb
221
225
  - test/raw-records/test-dense-union-array.rb
226
+ - test/raw-records/test-dictionary-array.rb
222
227
  - test/raw-records/test-list-array.rb
223
228
  - test/raw-records/test-map-array.rb
224
229
  - test/raw-records/test-multiple-columns.rb
@@ -303,8 +308,8 @@ homepage: https://arrow.apache.org/
303
308
  licenses:
304
309
  - Apache-2.0
305
310
  metadata:
306
- msys2_mingw_dependencies: arrow>=8.0.0
307
- post_install_message:
311
+ msys2_mingw_dependencies: arrow>=10.0.0
312
+ post_install_message:
308
313
  rdoc_options: []
309
314
  require_paths:
310
315
  - lib
@@ -319,8 +324,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
319
324
  - !ruby/object:Gem::Version
320
325
  version: '0'
321
326
  requirements: []
322
- rubygems_version: 3.4.0.dev
323
- signing_key:
327
+ rubygems_version: 3.3.15
328
+ signing_key:
324
329
  specification_version: 4
325
330
  summary: Red Arrow is the Ruby bindings of Apache Arrow
326
331
  test_files:
@@ -338,6 +343,7 @@ test_files:
338
343
  - test/helper.rb
339
344
  - test/raw-records/test-basic-arrays.rb
340
345
  - test/raw-records/test-dense-union-array.rb
346
+ - test/raw-records/test-dictionary-array.rb
341
347
  - test/raw-records/test-list-array.rb
342
348
  - test/raw-records/test-map-array.rb
343
349
  - test/raw-records/test-multiple-columns.rb