red-arrow 10.0.0 → 16.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/arrow.cpp +31 -0
  4. data/ext/arrow/converters.hpp +45 -41
  5. data/ext/arrow/extconf.rb +16 -4
  6. data/ext/arrow/raw-records.cpp +155 -2
  7. data/ext/arrow/red-arrow.hpp +2 -0
  8. data/ext/arrow/values.cpp +1 -2
  9. data/lib/arrow/array-computable.rb +13 -0
  10. data/lib/arrow/array.rb +6 -1
  11. data/lib/arrow/chunked-array.rb +35 -1
  12. data/lib/arrow/column-containable.rb +9 -0
  13. data/lib/arrow/column.rb +1 -0
  14. data/lib/arrow/data-type.rb +9 -0
  15. data/lib/arrow/dense-union-array-builder.rb +49 -0
  16. data/lib/arrow/dense-union-array.rb +26 -0
  17. data/lib/arrow/expression.rb +6 -2
  18. data/lib/arrow/function.rb +0 -1
  19. data/lib/arrow/half-float-array-builder.rb +32 -0
  20. data/lib/arrow/half-float-array.rb +24 -0
  21. data/lib/arrow/half-float.rb +118 -0
  22. data/lib/arrow/input-referable.rb +29 -0
  23. data/lib/arrow/loader.rb +11 -0
  24. data/lib/arrow/raw-table-converter.rb +7 -5
  25. data/lib/arrow/record-batch-file-reader.rb +2 -0
  26. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  27. data/lib/arrow/record-batch.rb +6 -2
  28. data/lib/arrow/scalar.rb +67 -0
  29. data/lib/arrow/slicer.rb +61 -0
  30. data/lib/arrow/sort-key.rb +3 -3
  31. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  32. data/lib/arrow/sparse-union-array.rb +26 -0
  33. data/lib/arrow/struct-array-builder.rb +0 -5
  34. data/lib/arrow/table-loader.rb +11 -5
  35. data/lib/arrow/table-saver.rb +1 -0
  36. data/lib/arrow/table.rb +180 -33
  37. data/lib/arrow/tensor.rb +4 -0
  38. data/lib/arrow/timestamp-parser.rb +33 -0
  39. data/lib/arrow/union-array-builder.rb +59 -0
  40. data/lib/arrow/version.rb +1 -1
  41. data/red-arrow.gemspec +2 -1
  42. data/test/each-raw-record/test-basic-arrays.rb +411 -0
  43. data/test/each-raw-record/test-dense-union-array.rb +566 -0
  44. data/test/each-raw-record/test-dictionary-array.rb +341 -0
  45. data/test/each-raw-record/test-list-array.rb +628 -0
  46. data/test/each-raw-record/test-map-array.rb +507 -0
  47. data/test/each-raw-record/test-multiple-columns.rb +72 -0
  48. data/test/each-raw-record/test-sparse-union-array.rb +528 -0
  49. data/test/each-raw-record/test-struct-array.rb +529 -0
  50. data/test/each-raw-record/test-table.rb +47 -0
  51. data/test/helper/omittable.rb +13 -0
  52. data/test/helper.rb +1 -0
  53. data/test/raw-records/test-basic-arrays.rb +11 -1
  54. data/test/raw-records/test-dense-union-array.rb +90 -45
  55. data/test/raw-records/test-list-array.rb +28 -10
  56. data/test/raw-records/test-map-array.rb +39 -10
  57. data/test/raw-records/test-sparse-union-array.rb +86 -41
  58. data/test/raw-records/test-struct-array.rb +22 -8
  59. data/test/test-array.rb +7 -0
  60. data/test/test-chunked-array.rb +9 -0
  61. data/test/test-csv-loader.rb +39 -0
  62. data/test/test-data-type.rb +2 -1
  63. data/test/test-dense-union-array.rb +42 -0
  64. data/test/test-dense-union-data-type.rb +1 -1
  65. data/test/test-expression.rb +11 -0
  66. data/test/test-function.rb +7 -7
  67. data/test/test-group.rb +58 -58
  68. data/test/test-half-float-array.rb +43 -0
  69. data/test/test-half-float.rb +130 -0
  70. data/test/test-ractor.rb +34 -0
  71. data/test/test-record-batch-file-reader.rb +21 -0
  72. data/test/test-record-batch-stream-reader.rb +129 -0
  73. data/test/test-scalar.rb +65 -0
  74. data/test/test-slicer.rb +194 -129
  75. data/test/test-sparse-union-array.rb +38 -0
  76. data/test/test-table.rb +356 -40
  77. data/test/values/test-basic-arrays.rb +10 -0
  78. data/test/values/test-dense-union-array.rb +88 -45
  79. data/test/values/test-list-array.rb +26 -10
  80. data/test/values/test-map-array.rb +33 -10
  81. data/test/values/test-sparse-union-array.rb +84 -41
  82. data/test/values/test-struct-array.rb +20 -8
  83. metadata +62 -9
@@ -0,0 +1,341 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module EachRawRecordDictionaryArrayTests
19
+ def build_record_batch(array)
20
+ dictionary = array.dictionary_encode
21
+ schema = Arrow::Schema.new(column: dictionary.value_data_type)
22
+ Arrow::RecordBatch.new(schema, array.length, [dictionary])
23
+ end
24
+
25
+ def test_null
26
+ records = [
27
+ [nil],
28
+ [nil],
29
+ [nil],
30
+ [nil],
31
+ ]
32
+ target = build(Arrow::NullArray.new(records.collect(&:first)))
33
+ assert_equal(records, target.each_raw_record.to_a)
34
+ end
35
+
36
+ def test_boolean
37
+ records = [
38
+ [true],
39
+ [nil],
40
+ [false],
41
+ ]
42
+ target = build(Arrow::BooleanArray.new(records.collect(&:first)))
43
+ assert_equal(records, target.each_raw_record.to_a)
44
+ end
45
+
46
+ def test_int8
47
+ records = [
48
+ [-(2 ** 7)],
49
+ [nil],
50
+ [(2 ** 7) - 1],
51
+ ]
52
+ target = build(Arrow::Int8Array.new(records.collect(&:first)))
53
+ assert_equal(records, target.each_raw_record.to_a)
54
+ end
55
+
56
+ def test_uint8
57
+ records = [
58
+ [0],
59
+ [nil],
60
+ [(2 ** 8) - 1],
61
+ ]
62
+ target = build(Arrow::UInt8Array.new(records.collect(&:first)))
63
+ assert_equal(records, target.each_raw_record.to_a)
64
+ end
65
+
66
+ def test_int16
67
+ records = [
68
+ [-(2 ** 15)],
69
+ [nil],
70
+ [(2 ** 15) - 1],
71
+ ]
72
+ target = build(Arrow::Int16Array.new(records.collect(&:first)))
73
+ assert_equal(records, target.each_raw_record.to_a)
74
+ end
75
+
76
+ def test_uint16
77
+ records = [
78
+ [0],
79
+ [nil],
80
+ [(2 ** 16) - 1],
81
+ ]
82
+ target = build(Arrow::UInt16Array.new(records.collect(&:first)))
83
+ assert_equal(records, target.each_raw_record.to_a)
84
+ end
85
+
86
+ def test_int32
87
+ records = [
88
+ [-(2 ** 31)],
89
+ [nil],
90
+ [(2 ** 31) - 1],
91
+ ]
92
+ target = build(Arrow::Int32Array.new(records.collect(&:first)))
93
+ assert_equal(records, target.each_raw_record.to_a)
94
+ end
95
+
96
+ def test_uint32
97
+ records = [
98
+ [0],
99
+ [nil],
100
+ [(2 ** 32) - 1],
101
+ ]
102
+ target = build(Arrow::UInt32Array.new(records.collect(&:first)))
103
+ assert_equal(records, target.each_raw_record.to_a)
104
+ end
105
+
106
+ def test_int64
107
+ records = [
108
+ [-(2 ** 63)],
109
+ [nil],
110
+ [(2 ** 63) - 1],
111
+ ]
112
+ target = build(Arrow::Int64Array.new(records.collect(&:first)))
113
+ assert_equal(records, target.each_raw_record.to_a)
114
+ end
115
+
116
+ def test_uint64
117
+ records = [
118
+ [0],
119
+ [nil],
120
+ [(2 ** 64) - 1],
121
+ ]
122
+ target = build(Arrow::UInt64Array.new(records.collect(&:first)))
123
+ assert_equal(records, target.each_raw_record.to_a)
124
+ end
125
+
126
+ def test_float
127
+ records = [
128
+ [-1.0],
129
+ [nil],
130
+ [1.0],
131
+ ]
132
+ target = build(Arrow::FloatArray.new(records.collect(&:first)))
133
+ assert_equal(records, target.each_raw_record.to_a)
134
+ end
135
+
136
+ def test_double
137
+ records = [
138
+ [-1.0],
139
+ [nil],
140
+ [1.0],
141
+ ]
142
+ target = build(Arrow::DoubleArray.new(records.collect(&:first)))
143
+ assert_equal(records, target.each_raw_record.to_a)
144
+ end
145
+
146
+ def test_binary
147
+ records = [
148
+ ["\x00".b],
149
+ [nil],
150
+ ["\xff".b],
151
+ ]
152
+ target = build(Arrow::BinaryArray.new(records.collect(&:first)))
153
+ assert_equal(records, target.each_raw_record.to_a)
154
+ end
155
+
156
+ def test_string
157
+ records = [
158
+ ["Ruby"],
159
+ [nil],
160
+ ["\u3042"], # U+3042 HIRAGANA LETTER A
161
+ ]
162
+ target = build(Arrow::StringArray.new(records.collect(&:first)))
163
+ assert_equal(records, target.each_raw_record.to_a)
164
+ end
165
+
166
+ def test_date32
167
+ records = [
168
+ [Date.new(1960, 1, 1)],
169
+ [nil],
170
+ [Date.new(2017, 8, 23)],
171
+ ]
172
+ target = build(Arrow::Date32Array.new(records.collect(&:first)))
173
+ assert_equal(records, target.each_raw_record.to_a)
174
+ end
175
+
176
+ def test_date64
177
+ records = [
178
+ [DateTime.new(1960, 1, 1, 2, 9, 30)],
179
+ [nil],
180
+ [DateTime.new(2017, 8, 23, 14, 57, 2)],
181
+ ]
182
+ target = build(Arrow::Date64Array.new(records.collect(&:first)))
183
+ assert_equal(records, target.each_raw_record.to_a)
184
+ end
185
+
186
+ def test_timestamp_second
187
+ records = [
188
+ [Time.parse("1960-01-01T02:09:30Z")],
189
+ [nil],
190
+ [Time.parse("2017-08-23T14:57:02Z")],
191
+ ]
192
+ target = build(Arrow::TimestampArray.new(:second, records.collect(&:first)))
193
+ assert_equal(records, target.each_raw_record.to_a)
194
+ end
195
+
196
+ def test_timestamp_milli
197
+ records = [
198
+ [Time.parse("1960-01-01T02:09:30.123Z")],
199
+ [nil],
200
+ [Time.parse("2017-08-23T14:57:02.987Z")],
201
+ ]
202
+ target = build(Arrow::TimestampArray.new(:milli, records.collect(&:first)))
203
+ assert_equal(records, target.each_raw_record.to_a)
204
+ end
205
+
206
+ def test_timestamp_micro
207
+ records = [
208
+ [Time.parse("1960-01-01T02:09:30.123456Z")],
209
+ [nil],
210
+ [Time.parse("2017-08-23T14:57:02.987654Z")],
211
+ ]
212
+ target = build(Arrow::TimestampArray.new(:micro, records.collect(&:first)))
213
+ assert_equal(records, target.each_raw_record.to_a)
214
+ end
215
+
216
+ def test_timestamp_nano
217
+ records = [
218
+ [Time.parse("1960-01-01T02:09:30.123456789Z")],
219
+ [nil],
220
+ [Time.parse("2017-08-23T14:57:02.987654321Z")],
221
+ ]
222
+ target = build(Arrow::TimestampArray.new(:nano, records.collect(&:first)))
223
+ assert_equal(records, target.each_raw_record.to_a)
224
+ end
225
+
226
+ def test_time32_second
227
+ unit = Arrow::TimeUnit::SECOND
228
+ records = [
229
+ [Arrow::Time.new(unit, 60 * 10)], # 00:10:00
230
+ [nil],
231
+ [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09
232
+ ]
233
+ target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
234
+ assert_equal(records, target.each_raw_record.to_a)
235
+ end
236
+
237
+ def test_time32_milli
238
+ unit = Arrow::TimeUnit::MILLI
239
+ records = [
240
+ [Arrow::Time.new(unit, (60 * 10) * 1000 + 123)], # 00:10:00.123
241
+ [nil],
242
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987
243
+ ]
244
+ target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
245
+ assert_equal(records, target.each_raw_record.to_a)
246
+ end
247
+
248
+ def test_time64_micro
249
+ unit = Arrow::TimeUnit::MICRO
250
+ records = [
251
+ # 00:10:00.123456
252
+ [Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)],
253
+ [nil],
254
+ # 02:00:09.987654
255
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)],
256
+ ]
257
+ target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
258
+ assert_equal(records, target.each_raw_record.to_a)
259
+ end
260
+
261
+ def test_time64_nano
262
+ unit = Arrow::TimeUnit::NANO
263
+ records = [
264
+ # 00:10:00.123456789
265
+ [Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)],
266
+ [nil],
267
+ # 02:00:09.987654321
268
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)],
269
+ ]
270
+ target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
271
+ assert_equal(records, target.each_raw_record.to_a)
272
+ end
273
+
274
+ def test_decimal128
275
+ records = [
276
+ [BigDecimal("92.92")],
277
+ [nil],
278
+ [BigDecimal("29.29")],
279
+ ]
280
+ data_type = Arrow::Decimal128DataType.new(8, 2)
281
+ target = build(Arrow::Decimal128Array.new(data_type, records.collect(&:first)))
282
+ assert_equal(records, target.each_raw_record.to_a)
283
+ end
284
+
285
+ def test_decimal256
286
+ records = [
287
+ [BigDecimal("92.92")],
288
+ [nil],
289
+ [BigDecimal("29.29")],
290
+ ]
291
+ data_type = Arrow::Decimal256DataType.new(38, 2)
292
+ target = build(Arrow::Decimal256Array.new(data_type, records.collect(&:first)))
293
+ assert_equal(records, target.each_raw_record.to_a)
294
+ end
295
+
296
+ def test_month_interval
297
+ records = [
298
+ [1],
299
+ [nil],
300
+ [12],
301
+ ]
302
+ target = build(Arrow::MonthIntervalArray.new(records.collect(&:first)))
303
+ assert_equal(records, target.each_raw_record.to_a)
304
+ end
305
+
306
+ def test_day_time_interval
307
+ records = [
308
+ [{day: 1, millisecond: 100}],
309
+ [nil],
310
+ [{day: 2, millisecond: 300}],
311
+ ]
312
+ target = build(Arrow::DayTimeIntervalArray.new(records.collect(&:first)))
313
+ assert_equal(records, target.each_raw_record.to_a)
314
+ end
315
+
316
+ def test_month_day_nano_interval
317
+ records = [
318
+ [{month: 1, day: 1, nanosecond: 100}],
319
+ [nil],
320
+ [{month: 2, day: 3, nanosecond: 400}],
321
+ ]
322
+ target = build(Arrow::MonthDayNanoIntervalArray.new(records.collect(&:first)))
323
+ assert_equal(records, target.each_raw_record.to_a)
324
+ end
325
+ end
326
+
327
+ class EachRawRecordRecordBatchDictionaryArraysTest < Test::Unit::TestCase
328
+ include EachRawRecordDictionaryArrayTests
329
+
330
+ def build(array)
331
+ build_record_batch(array)
332
+ end
333
+ end
334
+
335
+ class EachRawRecordTableDictionaryArraysTest < Test::Unit::TestCase
336
+ include EachRawRecordDictionaryArrayTests
337
+
338
+ def build(array)
339
+ build_record_batch(array).to_table
340
+ end
341
+ end