red-arrow 10.0.0 → 16.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/arrow.cpp +31 -0
  4. data/ext/arrow/converters.hpp +45 -41
  5. data/ext/arrow/extconf.rb +16 -4
  6. data/ext/arrow/raw-records.cpp +155 -2
  7. data/ext/arrow/red-arrow.hpp +2 -0
  8. data/ext/arrow/values.cpp +1 -2
  9. data/lib/arrow/array-computable.rb +13 -0
  10. data/lib/arrow/array.rb +6 -1
  11. data/lib/arrow/chunked-array.rb +35 -1
  12. data/lib/arrow/column-containable.rb +9 -0
  13. data/lib/arrow/column.rb +1 -0
  14. data/lib/arrow/data-type.rb +9 -0
  15. data/lib/arrow/dense-union-array-builder.rb +49 -0
  16. data/lib/arrow/dense-union-array.rb +26 -0
  17. data/lib/arrow/expression.rb +6 -2
  18. data/lib/arrow/function.rb +0 -1
  19. data/lib/arrow/half-float-array-builder.rb +32 -0
  20. data/lib/arrow/half-float-array.rb +24 -0
  21. data/lib/arrow/half-float.rb +118 -0
  22. data/lib/arrow/input-referable.rb +29 -0
  23. data/lib/arrow/loader.rb +11 -0
  24. data/lib/arrow/raw-table-converter.rb +7 -5
  25. data/lib/arrow/record-batch-file-reader.rb +2 -0
  26. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  27. data/lib/arrow/record-batch.rb +6 -2
  28. data/lib/arrow/scalar.rb +67 -0
  29. data/lib/arrow/slicer.rb +61 -0
  30. data/lib/arrow/sort-key.rb +3 -3
  31. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  32. data/lib/arrow/sparse-union-array.rb +26 -0
  33. data/lib/arrow/struct-array-builder.rb +0 -5
  34. data/lib/arrow/table-loader.rb +11 -5
  35. data/lib/arrow/table-saver.rb +1 -0
  36. data/lib/arrow/table.rb +180 -33
  37. data/lib/arrow/tensor.rb +4 -0
  38. data/lib/arrow/timestamp-parser.rb +33 -0
  39. data/lib/arrow/union-array-builder.rb +59 -0
  40. data/lib/arrow/version.rb +1 -1
  41. data/red-arrow.gemspec +2 -1
  42. data/test/each-raw-record/test-basic-arrays.rb +411 -0
  43. data/test/each-raw-record/test-dense-union-array.rb +566 -0
  44. data/test/each-raw-record/test-dictionary-array.rb +341 -0
  45. data/test/each-raw-record/test-list-array.rb +628 -0
  46. data/test/each-raw-record/test-map-array.rb +507 -0
  47. data/test/each-raw-record/test-multiple-columns.rb +72 -0
  48. data/test/each-raw-record/test-sparse-union-array.rb +528 -0
  49. data/test/each-raw-record/test-struct-array.rb +529 -0
  50. data/test/each-raw-record/test-table.rb +47 -0
  51. data/test/helper/omittable.rb +13 -0
  52. data/test/helper.rb +1 -0
  53. data/test/raw-records/test-basic-arrays.rb +11 -1
  54. data/test/raw-records/test-dense-union-array.rb +90 -45
  55. data/test/raw-records/test-list-array.rb +28 -10
  56. data/test/raw-records/test-map-array.rb +39 -10
  57. data/test/raw-records/test-sparse-union-array.rb +86 -41
  58. data/test/raw-records/test-struct-array.rb +22 -8
  59. data/test/test-array.rb +7 -0
  60. data/test/test-chunked-array.rb +9 -0
  61. data/test/test-csv-loader.rb +39 -0
  62. data/test/test-data-type.rb +2 -1
  63. data/test/test-dense-union-array.rb +42 -0
  64. data/test/test-dense-union-data-type.rb +1 -1
  65. data/test/test-expression.rb +11 -0
  66. data/test/test-function.rb +7 -7
  67. data/test/test-group.rb +58 -58
  68. data/test/test-half-float-array.rb +43 -0
  69. data/test/test-half-float.rb +130 -0
  70. data/test/test-ractor.rb +34 -0
  71. data/test/test-record-batch-file-reader.rb +21 -0
  72. data/test/test-record-batch-stream-reader.rb +129 -0
  73. data/test/test-scalar.rb +65 -0
  74. data/test/test-slicer.rb +194 -129
  75. data/test/test-sparse-union-array.rb +38 -0
  76. data/test/test-table.rb +356 -40
  77. data/test/values/test-basic-arrays.rb +10 -0
  78. data/test/values/test-dense-union-array.rb +88 -45
  79. data/test/values/test-list-array.rb +26 -10
  80. data/test/values/test-map-array.rb +33 -10
  81. data/test/values/test-sparse-union-array.rb +84 -41
  82. data/test/values/test-struct-array.rb +20 -8
  83. metadata +62 -9
@@ -0,0 +1,411 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module EachRawRecordBasicArraysTests
19
+ def test_null
20
+ records = [
21
+ [nil],
22
+ [nil],
23
+ [nil],
24
+ ]
25
+ target = build({column: :null}, records)
26
+ assert_equal(records, target.each_raw_record.to_a)
27
+ end
28
+
29
+ def test_boolean
30
+ records = [
31
+ [true],
32
+ [nil],
33
+ [false],
34
+ ]
35
+ target = build({column: :boolean}, records)
36
+ assert_equal(records, target.each_raw_record.to_a)
37
+ end
38
+
39
+ def test_int8
40
+ records = [
41
+ [-(2 ** 7)],
42
+ [nil],
43
+ [(2 ** 7) - 1],
44
+ ]
45
+ target = build({column: :int8}, records)
46
+ assert_equal(records, target.each_raw_record.to_a)
47
+ end
48
+
49
+ def test_uint8
50
+ records = [
51
+ [0],
52
+ [nil],
53
+ [(2 ** 8) - 1],
54
+ ]
55
+ target = build({column: :uint8}, records)
56
+ assert_equal(records, target.each_raw_record.to_a)
57
+ end
58
+
59
+ def test_int16
60
+ records = [
61
+ [-(2 ** 15)],
62
+ [nil],
63
+ [(2 ** 15) - 1],
64
+ ]
65
+ target = build({column: :int16}, records)
66
+ assert_equal(records, target.each_raw_record.to_a)
67
+ end
68
+
69
+ def test_uint16
70
+ records = [
71
+ [0],
72
+ [nil],
73
+ [(2 ** 16) - 1],
74
+ ]
75
+ target = build({column: :uint16}, records)
76
+ assert_equal(records, target.each_raw_record.to_a)
77
+ end
78
+
79
+ def test_int32
80
+ records = [
81
+ [-(2 ** 31)],
82
+ [nil],
83
+ [(2 ** 31) - 1],
84
+ ]
85
+ target = build({column: :int32}, records)
86
+ assert_equal(records, target.each_raw_record.to_a)
87
+ end
88
+
89
+ def test_uint32
90
+ records = [
91
+ [0],
92
+ [nil],
93
+ [(2 ** 32) - 1],
94
+ ]
95
+ target = build({column: :uint32}, records)
96
+ assert_equal(records, target.each_raw_record.to_a)
97
+ end
98
+
99
+ def test_int64
100
+ records = [
101
+ [-(2 ** 63)],
102
+ [nil],
103
+ [(2 ** 63) - 1],
104
+ ]
105
+ target = build({column: :int64}, records)
106
+ assert_equal(records, target.each_raw_record.to_a)
107
+ end
108
+
109
+ def test_uint64
110
+ records = [
111
+ [0],
112
+ [nil],
113
+ [(2 ** 64) - 1],
114
+ ]
115
+ target = build({column: :uint64}, records)
116
+ assert_equal(records, target.each_raw_record.to_a)
117
+ end
118
+
119
+ def test_half_float
120
+ records = [
121
+ [-1.5],
122
+ [nil],
123
+ [1.5],
124
+ ]
125
+ target = build({column: :half_float}, records)
126
+ assert_equal(records, target.each_raw_record.to_a)
127
+ end
128
+
129
+ def test_float
130
+ records = [
131
+ [-1.0],
132
+ [nil],
133
+ [1.0],
134
+ ]
135
+ target = build({column: :float}, records)
136
+ assert_equal(records, target.each_raw_record.to_a)
137
+ end
138
+
139
+ def test_double
140
+ records = [
141
+ [-1.0],
142
+ [nil],
143
+ [1.0],
144
+ ]
145
+ target = build({column: :double}, records)
146
+ assert_equal(records, target.each_raw_record.to_a)
147
+ end
148
+
149
+ def test_binary
150
+ records = [
151
+ ["\x00".b],
152
+ [nil],
153
+ ["\xff".b],
154
+ ]
155
+ target = build({column: :binary}, records)
156
+ assert_equal(records, target.each_raw_record.to_a)
157
+ end
158
+
159
+ def test_string
160
+ records = [
161
+ ["Ruby"],
162
+ [nil],
163
+ ["\u3042"], # U+3042 HIRAGANA LETTER A
164
+ ]
165
+ target = build({column: :string}, records)
166
+ assert_equal(records, target.each_raw_record.to_a)
167
+ end
168
+
169
+ def test_date32
170
+ records = [
171
+ [Date.new(1960, 1, 1)],
172
+ [nil],
173
+ [Date.new(2017, 8, 23)],
174
+ ]
175
+ target = build({column: :date32}, records)
176
+ assert_equal(records, target.each_raw_record.to_a)
177
+ end
178
+
179
+ def test_date64
180
+ records = [
181
+ [DateTime.new(1960, 1, 1, 2, 9, 30)],
182
+ [nil],
183
+ [DateTime.new(2017, 8, 23, 14, 57, 2)],
184
+ ]
185
+ target = build({column: :date64}, records)
186
+ assert_equal(records, target.each_raw_record.to_a)
187
+ end
188
+
189
+ def test_timestamp_second
190
+ records = [
191
+ [Time.parse("1960-01-01T02:09:30Z")],
192
+ [nil],
193
+ [Time.parse("2017-08-23T14:57:02Z")],
194
+ ]
195
+ target = build({
196
+ column: {
197
+ type: :timestamp,
198
+ unit: :second,
199
+ }
200
+ },
201
+ records)
202
+ assert_equal(records, target.each_raw_record.to_a)
203
+ end
204
+
205
+ def test_timestamp_milli
206
+ records = [
207
+ [Time.parse("1960-01-01T02:09:30.123Z")],
208
+ [nil],
209
+ [Time.parse("2017-08-23T14:57:02.987Z")],
210
+ ]
211
+ target = build({
212
+ column: {
213
+ type: :timestamp,
214
+ unit: :milli,
215
+ }
216
+ },
217
+ records)
218
+ assert_equal(records, target.each_raw_record.to_a)
219
+ end
220
+
221
+ def test_timestamp_micro
222
+ records = [
223
+ [Time.parse("1960-01-01T02:09:30.123456Z")],
224
+ [nil],
225
+ [Time.parse("2017-08-23T14:57:02.987654Z")],
226
+ ]
227
+ target = build({
228
+ column: {
229
+ type: :timestamp,
230
+ unit: :micro,
231
+ }
232
+ },
233
+ records)
234
+ assert_equal(records, target.each_raw_record.to_a)
235
+ end
236
+
237
+ def test_timestamp_nano
238
+ records = [
239
+ [Time.parse("1960-01-01T02:09:30.123456789Z")],
240
+ [nil],
241
+ [Time.parse("2017-08-23T14:57:02.987654321Z")],
242
+ ]
243
+ target = build({
244
+ column: {
245
+ type: :timestamp,
246
+ unit: :nano,
247
+ }
248
+ },
249
+ records)
250
+ assert_equal(records, target.each_raw_record.to_a)
251
+ end
252
+
253
+ def test_time32_second
254
+ unit = Arrow::TimeUnit::SECOND
255
+ records = [
256
+ [Arrow::Time.new(unit, 60 * 10)], # 00:10:00
257
+ [nil],
258
+ [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09
259
+ ]
260
+ target = build({
261
+ column: {
262
+ type: :time32,
263
+ unit: :second,
264
+ }
265
+ },
266
+ records)
267
+ assert_equal(records, target.each_raw_record.to_a)
268
+ end
269
+
270
+ def test_time32_milli
271
+ unit = Arrow::TimeUnit::MILLI
272
+ records = [
273
+ [Arrow::Time.new(unit, (60 * 10) * 1000 + 123)], # 00:10:00.123
274
+ [nil],
275
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987
276
+ ]
277
+ target = build({
278
+ column: {
279
+ type: :time32,
280
+ unit: :milli,
281
+ }
282
+ },
283
+ records)
284
+ assert_equal(records, target.each_raw_record.to_a)
285
+ end
286
+
287
+ def test_time64_micro
288
+ unit = Arrow::TimeUnit::MICRO
289
+ records = [
290
+ # 00:10:00.123456
291
+ [Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)],
292
+ [nil],
293
+ # 02:00:09.987654
294
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)],
295
+ ]
296
+ target = build({
297
+ column: {
298
+ type: :time64,
299
+ unit: :micro,
300
+ }
301
+ },
302
+ records)
303
+ assert_equal(records, target.each_raw_record.to_a)
304
+ end
305
+
306
+ def test_time64_nano
307
+ unit = Arrow::TimeUnit::NANO
308
+ records = [
309
+ # 00:10:00.123456789
310
+ [Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)],
311
+ [nil],
312
+ # 02:00:09.987654321
313
+ [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)],
314
+ ]
315
+ target = build({
316
+ column: {
317
+ type: :time64,
318
+ unit: :nano,
319
+ }
320
+ },
321
+ records)
322
+ assert_equal(records, target.each_raw_record.to_a)
323
+ end
324
+
325
+ def test_decimal128
326
+ records = [
327
+ [BigDecimal("92.92")],
328
+ [nil],
329
+ [BigDecimal("29.29")],
330
+ ]
331
+ target = build({
332
+ column: {
333
+ type: :decimal128,
334
+ precision: 8,
335
+ scale: 2,
336
+ }
337
+ },
338
+ records)
339
+ assert_equal(records, target.each_raw_record.to_a)
340
+ end
341
+
342
+ def test_decimal256
343
+ records = [
344
+ [BigDecimal("92.92")],
345
+ [nil],
346
+ [BigDecimal("29.29")],
347
+ ]
348
+ target = build({
349
+ column: {
350
+ type: :decimal256,
351
+ precision: 38,
352
+ scale: 2,
353
+ }
354
+ },
355
+ records)
356
+ assert_equal(records, target.each_raw_record.to_a)
357
+ end
358
+
359
+ def test_month_interval
360
+ records = [
361
+ [1],
362
+ [nil],
363
+ [12],
364
+ ]
365
+ target = build({column: :month_interval}, records)
366
+ assert_equal(records, target.each_raw_record.to_a)
367
+ end
368
+
369
+ def test_day_time_interval
370
+ records = [
371
+ [{day: 1, millisecond: 100}],
372
+ [nil],
373
+ [{day: 2, millisecond: 300}],
374
+ ]
375
+ target = build({column: :day_time_interval}, records)
376
+ assert_equal(records, target.each_raw_record.to_a)
377
+ end
378
+
379
+ def test_month_day_nano_interval
380
+ records = [
381
+ [{month: 1, day: 1, nanosecond: 100}],
382
+ [nil],
383
+ [{month: 2, day: 3, nanosecond: 400}],
384
+ ]
385
+ target = build({column: :month_day_nano_interval}, records)
386
+ assert_equal(records, target.each_raw_record.to_a)
387
+ end
388
+ end
389
+
390
+ class EachRawRecordRecordBatchBasicArraysTest< Test::Unit::TestCase
391
+ include EachRawRecordBasicArraysTests
392
+
393
+ def build(schema, records)
394
+ Arrow::RecordBatch.new(schema, records)
395
+ end
396
+ end
397
+
398
+ class EachRawRecordTableBasicArraysTest < Test::Unit::TestCase
399
+ include EachRawRecordBasicArraysTests
400
+
401
+ def build(schema, records)
402
+ record_batch = Arrow::RecordBatch.new(schema, records)
403
+ # Multiple chunks
404
+ record_batches = [
405
+ record_batch.slice(0, 2),
406
+ record_batch.slice(2, 0), # Empty chunk
407
+ record_batch.slice(2, record_batch.length - 2),
408
+ ]
409
+ Arrow::Table.new(schema, record_batches)
410
+ end
411
+ end