red-arrow 10.0.0 → 16.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/arrow.cpp +31 -0
  4. data/ext/arrow/converters.hpp +45 -41
  5. data/ext/arrow/extconf.rb +16 -4
  6. data/ext/arrow/raw-records.cpp +155 -2
  7. data/ext/arrow/red-arrow.hpp +2 -0
  8. data/ext/arrow/values.cpp +1 -2
  9. data/lib/arrow/array-computable.rb +13 -0
  10. data/lib/arrow/array.rb +6 -1
  11. data/lib/arrow/chunked-array.rb +35 -1
  12. data/lib/arrow/column-containable.rb +9 -0
  13. data/lib/arrow/column.rb +1 -0
  14. data/lib/arrow/data-type.rb +9 -0
  15. data/lib/arrow/dense-union-array-builder.rb +49 -0
  16. data/lib/arrow/dense-union-array.rb +26 -0
  17. data/lib/arrow/expression.rb +6 -2
  18. data/lib/arrow/function.rb +0 -1
  19. data/lib/arrow/half-float-array-builder.rb +32 -0
  20. data/lib/arrow/half-float-array.rb +24 -0
  21. data/lib/arrow/half-float.rb +118 -0
  22. data/lib/arrow/input-referable.rb +29 -0
  23. data/lib/arrow/loader.rb +11 -0
  24. data/lib/arrow/raw-table-converter.rb +7 -5
  25. data/lib/arrow/record-batch-file-reader.rb +2 -0
  26. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  27. data/lib/arrow/record-batch.rb +6 -2
  28. data/lib/arrow/scalar.rb +67 -0
  29. data/lib/arrow/slicer.rb +61 -0
  30. data/lib/arrow/sort-key.rb +3 -3
  31. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  32. data/lib/arrow/sparse-union-array.rb +26 -0
  33. data/lib/arrow/struct-array-builder.rb +0 -5
  34. data/lib/arrow/table-loader.rb +11 -5
  35. data/lib/arrow/table-saver.rb +1 -0
  36. data/lib/arrow/table.rb +180 -33
  37. data/lib/arrow/tensor.rb +4 -0
  38. data/lib/arrow/timestamp-parser.rb +33 -0
  39. data/lib/arrow/union-array-builder.rb +59 -0
  40. data/lib/arrow/version.rb +1 -1
  41. data/red-arrow.gemspec +2 -1
  42. data/test/each-raw-record/test-basic-arrays.rb +411 -0
  43. data/test/each-raw-record/test-dense-union-array.rb +566 -0
  44. data/test/each-raw-record/test-dictionary-array.rb +341 -0
  45. data/test/each-raw-record/test-list-array.rb +628 -0
  46. data/test/each-raw-record/test-map-array.rb +507 -0
  47. data/test/each-raw-record/test-multiple-columns.rb +72 -0
  48. data/test/each-raw-record/test-sparse-union-array.rb +528 -0
  49. data/test/each-raw-record/test-struct-array.rb +529 -0
  50. data/test/each-raw-record/test-table.rb +47 -0
  51. data/test/helper/omittable.rb +13 -0
  52. data/test/helper.rb +1 -0
  53. data/test/raw-records/test-basic-arrays.rb +11 -1
  54. data/test/raw-records/test-dense-union-array.rb +90 -45
  55. data/test/raw-records/test-list-array.rb +28 -10
  56. data/test/raw-records/test-map-array.rb +39 -10
  57. data/test/raw-records/test-sparse-union-array.rb +86 -41
  58. data/test/raw-records/test-struct-array.rb +22 -8
  59. data/test/test-array.rb +7 -0
  60. data/test/test-chunked-array.rb +9 -0
  61. data/test/test-csv-loader.rb +39 -0
  62. data/test/test-data-type.rb +2 -1
  63. data/test/test-dense-union-array.rb +42 -0
  64. data/test/test-dense-union-data-type.rb +1 -1
  65. data/test/test-expression.rb +11 -0
  66. data/test/test-function.rb +7 -7
  67. data/test/test-group.rb +58 -58
  68. data/test/test-half-float-array.rb +43 -0
  69. data/test/test-half-float.rb +130 -0
  70. data/test/test-ractor.rb +34 -0
  71. data/test/test-record-batch-file-reader.rb +21 -0
  72. data/test/test-record-batch-stream-reader.rb +129 -0
  73. data/test/test-scalar.rb +65 -0
  74. data/test/test-slicer.rb +194 -129
  75. data/test/test-sparse-union-array.rb +38 -0
  76. data/test/test-table.rb +356 -40
  77. data/test/values/test-basic-arrays.rb +10 -0
  78. data/test/values/test-dense-union-array.rb +88 -45
  79. data/test/values/test-list-array.rb +26 -10
  80. data/test/values/test-map-array.rb +33 -10
  81. data/test/values/test-sparse-union-array.rb +84 -41
  82. data/test/values/test-struct-array.rb +20 -8
  83. metadata +62 -9
@@ -0,0 +1,341 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module EachRawRecordDictionaryArrayTests
19
+ def build_record_batch(array)
20
+ dictionary = array.dictionary_encode
21
+ schema = Arrow::Schema.new(column: dictionary.value_data_type)
22
+ Arrow::RecordBatch.new(schema, array.length, [dictionary])
23
+ end
24
+
25
+ def test_null
26
+ records = [
27
+ [nil],
28
+ [nil],
29
+ [nil],
30
+ [nil],
31
+ ]
32
+ target = build(Arrow::NullArray.new(records.collect(&:first)))
33
+ assert_equal(records, target.each_raw_record.to_a)
34
+ end
35
+
36
+ def test_boolean
37
+ records = [
38
+ [true],
39
+ [nil],
40
+ [false],
41
+ ]
42
+ target = build(Arrow::BooleanArray.new(records.collect(&:first)))
43
+ assert_equal(records, target.each_raw_record.to_a)
44
+ end
45
+
46
+ def test_int8
47
+ records = [
48
+ [-(2 ** 7)],
49
+ [nil],
50
+ [(2 ** 7) - 1],
51
+ ]
52
+ target = build(Arrow::Int8Array.new(records.collect(&:first)))
53
+ assert_equal(records, target.each_raw_record.to_a)
54
+ end
55
+
56
+ def test_uint8
57
+ records = [
58
+ [0],
59
+ [nil],
60
+ [(2 ** 8) - 1],
61
+ ]
62
+ target = build(Arrow::UInt8Array.new(records.collect(&:first)))
63
+ assert_equal(records, target.each_raw_record.to_a)
64
+ end
65
+
66
+ def test_int16
67
+ records = [
68
+ [-(2 ** 15)],
69
+ [nil],
70
+ [(2 ** 15) - 1],
71
+ ]
72
+ target = build(Arrow::Int16Array.new(records.collect(&:first)))
73
+ assert_equal(records, target.each_raw_record.to_a)
74
+ end
75
+
76
+ def test_uint16
77
+ records = [
78
+ [0],
79
+ [nil],
80
+ [(2 ** 16) - 1],
81
+ ]
82
+ target = build(Arrow::UInt16Array.new(records.collect(&:first)))
83
+ assert_equal(records, target.each_raw_record.to_a)
84
+ end
85
+
86
+ def test_int32
87
+ records = [
88
+ [-(2 ** 31)],
89
+ [nil],
90
+ [(2 ** 31) - 1],
91
+ ]
92
+ target = build(Arrow::Int32Array.new(records.collect(&:first)))
93
+ assert_equal(records, target.each_raw_record.to_a)
94
+ end
95
+
96
+ def test_uint32
97
+ records = [
98
+ [0],
99
+ [nil],
100
+ [(2 ** 32) - 1],
101
+ ]
102
+ target = build(Arrow::UInt32Array.new(records.collect(&:first)))
103
+ assert_equal(records, target.each_raw_record.to_a)
104
+ end
105
+
106
+ def test_int64
107
+ records = [
108
+ [-(2 ** 63)],
109
+ [nil],
110
+ [(2 ** 63) - 1],
111
+ ]
112
+ target = build(Arrow::Int64Array.new(records.collect(&:first)))
113
+ assert_equal(records, target.each_raw_record.to_a)
114
+ end
115
+
116
+ def test_uint64
117
+ records = [
118
+ [0],
119
+ [nil],
120
+ [(2 ** 64) - 1],
121
+ ]
122
+ target = build(Arrow::UInt64Array.new(records.collect(&:first)))
123
+ assert_equal(records, target.each_raw_record.to_a)
124
+ end
125
+
126
+ def test_float
127
+ records = [
128
+ [-1.0],
129
+ [nil],
130
+ [1.0],
131
+ ]
132
+ target = build(Arrow::FloatArray.new(records.collect(&:first)))
133
+ assert_equal(records, target.each_raw_record.to_a)
134
+ end
135
+
136
+ def test_double
137
+ records = [
138
+ [-1.0],
139
+ [nil],
140
+ [1.0],
141
+ ]
142
+ target = build(Arrow::DoubleArray.new(records.collect(&:first)))
143
+ assert_equal(records, target.each_raw_record.to_a)
144
+ end
145
+
146
+ def test_binary
147
+ records = [
148
+ ["\x00".b],
149
+ [nil],
150
+ ["\xff".b],
151
+ ]
152
+ target = build(Arrow::BinaryArray.new(records.collect(&:first)))
153
+ assert_equal(records, target.each_raw_record.to_a)
154
+ end
155
+
156
+ def test_string
157
+ records = [
158
+ ["Ruby"],
159
+ [nil],
160
+ ["\u3042"], # U+3042 HIRAGANA LETTER A
161
+ ]
162
+ target = build(Arrow::StringArray.new(records.collect(&:first)))
163
+ assert_equal(records, target.each_raw_record.to_a)
164
+ end
165
+
166
+ def test_date32
167
+ records = [
168
+ [Date.new(1960, 1, 1)],
169
+ [nil],
170
+ [Date.new(2017, 8, 23)],
171
+ ]
172
+ target = build(Arrow::Date32Array.new(records.collect(&:first)))
173
+ assert_equal(records, target.each_raw_record.to_a)
174
+ end
175
+
176
+ def test_date64
177
+ records = [
178
+ [DateTime.new(1960, 1, 1, 2, 9, 30)],
179
+ [nil],
180
+ [DateTime.new(2017, 8, 23, 14, 57, 2)],
181
+ ]
182
+ target = build(Arrow::Date64Array.new(records.collect(&:first)))
183
+ assert_equal(records, target.each_raw_record.to_a)
184
+ end
185
+
186
+ def test_timestamp_second
187
+ records = [
188
+ [Time.parse("1960-01-01T02:09:30Z")],
189
+ [nil],
190
+ [Time.parse("2017-08-23T14:57:02Z")],
191
+ ]
192
+ target = build(Arrow::TimestampArray.new(:second, records.collect(&:first)))
193
+ assert_equal(records, target.each_raw_record.to_a)
194
+ end
195
+
196
+ def test_timestamp_milli
197
+ records = [
198
+ [Time.parse("1960-01-01T02:09:30.123Z")],
199
+ [nil],
200
+ [Time.parse("2017-08-23T14:57:02.987Z")],
201
+ ]
202
+ target = build(Arrow::TimestampArray.new(:milli, records.collect(&:first)))
203
+ assert_equal(records, target.each_raw_record.to_a)
204
+ end
205
+
206
+ def test_timestamp_micro
207
+ records = [
208
+ [Time.parse("1960-01-01T02:09:30.123456Z")],
209
+ [nil],
210
+ [Time.parse("2017-08-23T14:57:02.987654Z")],
211
+ ]
212
+ target = build(Arrow::TimestampArray.new(:micro, records.collect(&:first)))
213
+ assert_equal(records, target.each_raw_record.to_a)
214
+ end
215
+
216
+ def test_timestamp_nano
217
+ records = [
218
+ [Time.parse("1960-01-01T02:09:30.123456789Z")],
219
+ [nil],
220
+ [Time.parse("2017-08-23T14:57:02.987654321Z")],
221
+ ]
222
+ target = build(Arrow::TimestampArray.new(:nano, records.collect(&:first)))
223
+ assert_equal(records, target.each_raw_record.to_a)
224
+ end
225
+
226
+ def test_time32_second
227
+ unit = Arrow::TimeUnit::SECOND
228
+ records = [
229
+ [Arrow::Time.new(unit, 60 * 10)], # 00:10:00
230
+ [nil],
231
+ [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09
232
+ ]
233
+ target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
234
+ assert_equal(records, target.each_raw_record.to_a)
235
+ end
236
+
237
+ def test_time32_milli
238
+ unit = Arrow::TimeUnit::MILLI
239
+ records = [
240
+ [Arrow::Time.new(unit, (60 * 10) * 1000 + 123)], # 00:10:00.123
241
+ [nil],
242
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987
243
+ ]
244
+ target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
245
+ assert_equal(records, target.each_raw_record.to_a)
246
+ end
247
+
248
+ def test_time64_micro
249
+ unit = Arrow::TimeUnit::MICRO
250
+ records = [
251
+ # 00:10:00.123456
252
+ [Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)],
253
+ [nil],
254
+ # 02:00:09.987654
255
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)],
256
+ ]
257
+ target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
258
+ assert_equal(records, target.each_raw_record.to_a)
259
+ end
260
+
261
+ def test_time64_nano
262
+ unit = Arrow::TimeUnit::NANO
263
+ records = [
264
+ # 00:10:00.123456789
265
+ [Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)],
266
+ [nil],
267
+ # 02:00:09.987654321
268
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)],
269
+ ]
270
+ target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
271
+ assert_equal(records, target.each_raw_record.to_a)
272
+ end
273
+
274
+ def test_decimal128
275
+ records = [
276
+ [BigDecimal("92.92")],
277
+ [nil],
278
+ [BigDecimal("29.29")],
279
+ ]
280
+ data_type = Arrow::Decimal128DataType.new(8, 2)
281
+ target = build(Arrow::Decimal128Array.new(data_type, records.collect(&:first)))
282
+ assert_equal(records, target.each_raw_record.to_a)
283
+ end
284
+
285
+ def test_decimal256
286
+ records = [
287
+ [BigDecimal("92.92")],
288
+ [nil],
289
+ [BigDecimal("29.29")],
290
+ ]
291
+ data_type = Arrow::Decimal256DataType.new(38, 2)
292
+ target = build(Arrow::Decimal256Array.new(data_type, records.collect(&:first)))
293
+ assert_equal(records, target.each_raw_record.to_a)
294
+ end
295
+
296
+ def test_month_interval
297
+ records = [
298
+ [1],
299
+ [nil],
300
+ [12],
301
+ ]
302
+ target = build(Arrow::MonthIntervalArray.new(records.collect(&:first)))
303
+ assert_equal(records, target.each_raw_record.to_a)
304
+ end
305
+
306
+ def test_day_time_interval
307
+ records = [
308
+ [{day: 1, millisecond: 100}],
309
+ [nil],
310
+ [{day: 2, millisecond: 300}],
311
+ ]
312
+ target = build(Arrow::DayTimeIntervalArray.new(records.collect(&:first)))
313
+ assert_equal(records, target.each_raw_record.to_a)
314
+ end
315
+
316
+ def test_month_day_nano_interval
317
+ records = [
318
+ [{month: 1, day: 1, nanosecond: 100}],
319
+ [nil],
320
+ [{month: 2, day: 3, nanosecond: 400}],
321
+ ]
322
+ target = build(Arrow::MonthDayNanoIntervalArray.new(records.collect(&:first)))
323
+ assert_equal(records, target.each_raw_record.to_a)
324
+ end
325
+ end
326
+
327
+ class EachRawRecordRecordBatchDictionaryArraysTest < Test::Unit::TestCase
328
+ include EachRawRecordDictionaryArrayTests
329
+
330
+ def build(array)
331
+ build_record_batch(array)
332
+ end
333
+ end
334
+
335
+ class EachRawRecordTableDictionaryArraysTest < Test::Unit::TestCase
336
+ include EachRawRecordDictionaryArrayTests
337
+
338
+ def build(array)
339
+ build_record_batch(array).to_table
340
+ end
341
+ end