red-arrow 6.0.1 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +10 -0
  3. data/ext/arrow/arrow.cpp +12 -0
  4. data/ext/arrow/converters.hpp +49 -10
  5. data/ext/arrow/extconf.rb +7 -5
  6. data/ext/arrow/raw-records.cpp +3 -2
  7. data/ext/arrow/red-arrow.hpp +7 -0
  8. data/ext/arrow/values.cpp +3 -2
  9. data/lib/arrow/array-builder.rb +40 -6
  10. data/lib/arrow/array-computable.rb +37 -0
  11. data/lib/arrow/array.rb +16 -0
  12. data/lib/arrow/chunked-array.rb +21 -0
  13. data/lib/arrow/column.rb +28 -0
  14. data/lib/arrow/data-type.rb +2 -1
  15. data/lib/arrow/datum.rb +2 -0
  16. data/lib/arrow/day-time-interval-array-builder.rb +29 -0
  17. data/lib/arrow/decimal128-array-builder.rb +16 -6
  18. data/lib/arrow/decimal128.rb +14 -0
  19. data/lib/arrow/decimal256-array-builder.rb +16 -6
  20. data/lib/arrow/decimal256.rb +14 -0
  21. data/lib/arrow/field.rb +44 -3
  22. data/lib/arrow/function.rb +52 -0
  23. data/lib/arrow/list-data-type.rb +1 -6
  24. data/lib/arrow/loader.rb +19 -0
  25. data/lib/arrow/month-day-nano-interval-array-builder.rb +29 -0
  26. data/lib/arrow/s3-global-options.rb +38 -0
  27. data/lib/arrow/sort-key.rb +61 -55
  28. data/lib/arrow/sort-options.rb +8 -8
  29. data/lib/arrow/string-array-builder.rb +30 -0
  30. data/lib/arrow/table-loader.rb +99 -62
  31. data/lib/arrow/table-saver.rb +7 -2
  32. data/lib/arrow/table.rb +78 -0
  33. data/lib/arrow/time-unit.rb +31 -0
  34. data/lib/arrow/time32-array-builder.rb +2 -14
  35. data/lib/arrow/time32-data-type.rb +9 -38
  36. data/lib/arrow/time64-array-builder.rb +2 -14
  37. data/lib/arrow/time64-data-type.rb +9 -38
  38. data/lib/arrow/timestamp-array-builder.rb +2 -14
  39. data/lib/arrow/timestamp-data-type.rb +9 -34
  40. data/lib/arrow/version.rb +1 -1
  41. data/red-arrow.gemspec +2 -11
  42. data/test/helper.rb +2 -0
  43. data/test/raw-records/test-basic-arrays.rb +30 -0
  44. data/test/raw-records/test-dense-union-array.rb +27 -0
  45. data/test/raw-records/test-dictionary-array.rb +341 -0
  46. data/test/raw-records/test-list-array.rb +39 -0
  47. data/test/raw-records/test-map-array.rb +37 -0
  48. data/test/raw-records/test-sparse-union-array.rb +27 -0
  49. data/test/raw-records/test-struct-array.rb +30 -0
  50. data/test/test-array-builder.rb +62 -0
  51. data/test/test-chunked-array.rb +6 -0
  52. data/test/test-column.rb +31 -0
  53. data/test/test-decimal128-array-builder.rb +14 -0
  54. data/test/test-decimal128-array.rb +5 -2
  55. data/test/test-decimal128.rb +26 -2
  56. data/test/test-decimal256-array-builder.rb +14 -0
  57. data/test/test-decimal256-array.rb +5 -2
  58. data/test/test-decimal256.rb +26 -2
  59. data/test/test-field.rb +26 -0
  60. data/test/test-function.rb +48 -14
  61. data/test/test-table.rb +204 -6
  62. data/test/values/test-basic-arrays.rb +30 -0
  63. data/test/values/test-dense-union-array.rb +27 -0
  64. data/test/values/test-dictionary-array.rb +325 -0
  65. data/test/values/test-list-array.rb +39 -0
  66. data/test/values/test-map-array.rb +33 -0
  67. data/test/values/test-sparse-union-array.rb +27 -0
  68. data/test/values/test-struct-array.rb +30 -0
  69. metadata +95 -196
@@ -0,0 +1,325 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module ValuesDictionaryArrayTests
19
+ def test_null
20
+ target = build(Arrow::NullArray.new(4))
21
+ assert_equal([nil] * 4, target.values)
22
+ end
23
+
24
+ def test_boolean
25
+ values = [true, nil, false]
26
+ target = build(Arrow::BooleanArray.new(values))
27
+ assert_equal(values, target.values)
28
+ end
29
+
30
+ def test_int8
31
+ values = [
32
+ -(2 ** 7),
33
+ nil,
34
+ (2 ** 7) - 1,
35
+ ]
36
+ target = build(Arrow::Int8Array.new(values))
37
+ assert_equal(values, target.values)
38
+ end
39
+
40
+ def test_uint8
41
+ values = [
42
+ 0,
43
+ nil,
44
+ (2 ** 8) - 1,
45
+ ]
46
+ target = build(Arrow::UInt8Array.new(values))
47
+ assert_equal(values, target.values)
48
+ end
49
+
50
+ def test_int16
51
+ values = [
52
+ -(2 ** 15),
53
+ nil,
54
+ (2 ** 15) - 1,
55
+ ]
56
+ target = build(Arrow::Int16Array.new(values))
57
+ assert_equal(values, target.values)
58
+ end
59
+
60
+ def test_uint16
61
+ values = [
62
+ 0,
63
+ nil,
64
+ (2 ** 16) - 1,
65
+ ]
66
+ target = build(Arrow::UInt16Array.new(values))
67
+ assert_equal(values, target.values)
68
+ end
69
+
70
+ def test_int32
71
+ values = [
72
+ -(2 ** 31),
73
+ nil,
74
+ (2 ** 31) - 1,
75
+ ]
76
+ target = build(Arrow::Int32Array.new(values))
77
+ assert_equal(values, target.values)
78
+ end
79
+
80
+ def test_uint32
81
+ values = [
82
+ 0,
83
+ nil,
84
+ (2 ** 32) - 1,
85
+ ]
86
+ target = build(Arrow::UInt32Array.new(values))
87
+ assert_equal(values, target.values)
88
+ end
89
+
90
+ def test_int64
91
+ values = [
92
+ -(2 ** 63),
93
+ nil,
94
+ (2 ** 63) - 1,
95
+ ]
96
+ target = build(Arrow::Int64Array.new(values))
97
+ assert_equal(values, target.values)
98
+ end
99
+
100
+ def test_uint64
101
+ values = [
102
+ 0,
103
+ nil,
104
+ (2 ** 64) - 1,
105
+ ]
106
+ target = build(Arrow::UInt64Array.new(values))
107
+ assert_equal(values, target.values)
108
+ end
109
+
110
+ def test_float
111
+ values = [
112
+ -1.0,
113
+ nil,
114
+ 1.0,
115
+ ]
116
+ target = build(Arrow::FloatArray.new(values))
117
+ assert_equal(values, target.values)
118
+ end
119
+
120
+ def test_double
121
+ values = [
122
+ -1.0,
123
+ nil,
124
+ 1.0,
125
+ ]
126
+ target = build(Arrow::DoubleArray.new(values))
127
+ assert_equal(values, target.values)
128
+ end
129
+
130
+ def test_binary
131
+ values = [
132
+ "\x00".b,
133
+ nil,
134
+ "\xff".b,
135
+ ]
136
+ target = build(Arrow::BinaryArray.new(values))
137
+ assert_equal(values, target.values)
138
+ end
139
+
140
+ def test_string
141
+ values = [
142
+ "Ruby",
143
+ nil,
144
+ "\u3042", # U+3042 HIRAGANA LETTER A
145
+ ]
146
+ target = build(Arrow::StringArray.new(values))
147
+ assert_equal(values, target.values)
148
+ end
149
+
150
+ def test_date32
151
+ values = [
152
+ Date.new(1960, 1, 1),
153
+ nil,
154
+ Date.new(2017, 8, 23),
155
+ ]
156
+ target = build(Arrow::Date32Array.new(values))
157
+ assert_equal(values, target.values)
158
+ end
159
+
160
+ def test_date64
161
+ values = [
162
+ DateTime.new(1960, 1, 1, 2, 9, 30),
163
+ nil,
164
+ DateTime.new(2017, 8, 23, 14, 57, 2),
165
+ ]
166
+ target = build(Arrow::Date64Array.new(values))
167
+ assert_equal(values, target.values)
168
+ end
169
+
170
+ def test_timestamp_second
171
+ values = [
172
+ Time.parse("1960-01-01T02:09:30Z"),
173
+ nil,
174
+ Time.parse("2017-08-23T14:57:02Z"),
175
+ ]
176
+ target = build(Arrow::TimestampArray.new(:second, values))
177
+ assert_equal(values, target.values)
178
+ end
179
+
180
+ def test_timestamp_milli
181
+ values = [
182
+ Time.parse("1960-01-01T02:09:30.123Z"),
183
+ nil,
184
+ Time.parse("2017-08-23T14:57:02.987Z"),
185
+ ]
186
+ target = build(Arrow::TimestampArray.new(:milli, values))
187
+ assert_equal(values, target.values)
188
+ end
189
+
190
+ def test_timestamp_micro
191
+ values = [
192
+ Time.parse("1960-01-01T02:09:30.123456Z"),
193
+ nil,
194
+ Time.parse("2017-08-23T14:57:02.987654Z"),
195
+ ]
196
+ target = build(Arrow::TimestampArray.new(:micro, values))
197
+ assert_equal(values, target.values)
198
+ end
199
+
200
+ def test_timestamp_nano
201
+ values = [
202
+ Time.parse("1960-01-01T02:09:30.123456789Z"),
203
+ nil,
204
+ Time.parse("2017-08-23T14:57:02.987654321Z"),
205
+ ]
206
+ target = build(Arrow::TimestampArray.new(:nano, values))
207
+ assert_equal(values, target.values)
208
+ end
209
+
210
+ def test_time32_second
211
+ unit = Arrow::TimeUnit::SECOND
212
+ values = [
213
+ Arrow::Time.new(unit, 60 * 10), # 00:10:00
214
+ nil,
215
+ Arrow::Time.new(unit, 60 * 60 * 2 + 9), # 02:00:09
216
+ ]
217
+ target = build(Arrow::Time32Array.new(unit, values))
218
+ assert_equal(values, target.values)
219
+ end
220
+
221
+ def test_time32_milli
222
+ unit = Arrow::TimeUnit::MILLI
223
+ values = [
224
+ Arrow::Time.new(unit, (60 * 10) * 1000 + 123), # 00:10:00.123
225
+ nil,
226
+ Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987), # 02:00:09.987
227
+ ]
228
+ target = build(Arrow::Time32Array.new(unit, values))
229
+ assert_equal(values, target.values)
230
+ end
231
+
232
+ def test_time64_micro
233
+ unit = Arrow::TimeUnit::MICRO
234
+ values = [
235
+ # 00:10:00.123456
236
+ Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456),
237
+ nil,
238
+ # 02:00:09.987654
239
+ Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654),
240
+ ]
241
+ target = build(Arrow::Time64Array.new(unit, values))
242
+ assert_equal(values, target.values)
243
+ end
244
+
245
+ def test_time64_nano
246
+ unit = Arrow::TimeUnit::NANO
247
+ values = [
248
+ # 00:10:00.123456789
249
+ Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789),
250
+ nil,
251
+ # 02:00:09.987654321
252
+ Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321),
253
+ ]
254
+ target = build(Arrow::Time64Array.new(unit, values))
255
+ assert_equal(values, target.values)
256
+ end
257
+
258
+ def test_decimal128
259
+ values = [
260
+ BigDecimal("92.92"),
261
+ nil,
262
+ BigDecimal("29.29"),
263
+ ]
264
+ data_type = Arrow::Decimal128DataType.new(8, 2)
265
+ target = build(Arrow::Decimal128Array.new(data_type, values))
266
+ assert_equal(values, target.values)
267
+ end
268
+
269
+ def test_decimal256
270
+ values = [
271
+ BigDecimal("92.92"),
272
+ nil,
273
+ BigDecimal("29.29"),
274
+ ]
275
+ data_type = Arrow::Decimal256DataType.new(38, 2)
276
+ target = build(Arrow::Decimal256Array.new(data_type, values))
277
+ assert_equal(values, target.values)
278
+ end
279
+
280
+ def test_month_interval
281
+ values = [
282
+ 1,
283
+ nil,
284
+ 12,
285
+ ]
286
+ target = build(Arrow::MonthIntervalArray.new(values))
287
+ assert_equal(values, target.values)
288
+ end
289
+
290
+ def test_day_time_interval
291
+ values = [
292
+ {day: 1, millisecond: 100},
293
+ nil,
294
+ {day: 2, millisecond: 300},
295
+ ]
296
+ target = build(Arrow::DayTimeIntervalArray.new(values))
297
+ assert_equal(values, target.values)
298
+ end
299
+
300
+ def test_month_day_nano_interval
301
+ values = [
302
+ {month: 1, day: 1, nanosecond: 100},
303
+ nil,
304
+ {month: 2, day: 3, nanosecond: 400},
305
+ ]
306
+ target = build(Arrow::MonthDayNanoIntervalArray.new(values))
307
+ assert_equal(values, target.values)
308
+ end
309
+ end
310
+
311
+ class ValuesArrayDictionaryArrayTest < Test::Unit::TestCase
312
+ include ValuesDictionaryArrayTests
313
+
314
+ def build(values)
315
+ values.dictionary_encode
316
+ end
317
+ end
318
+
319
+ class ValuesChunkedArrayDictionaryArrayTest < Test::Unit::TestCase
320
+ include ValuesDictionaryArrayTests
321
+
322
+ def build(values)
323
+ Arrow::ChunkedArray.new([values.dictionary_encode])
324
+ end
325
+ end
@@ -372,6 +372,45 @@ module ValuesListArrayTests
372
372
  assert_equal(values, target.values)
373
373
  end
374
374
 
375
+ def test_month_interval
376
+ values = [
377
+ [
378
+ 1,
379
+ nil,
380
+ 12,
381
+ ],
382
+ nil,
383
+ ]
384
+ target = build(:month_interval, values)
385
+ assert_equal(values, target.values)
386
+ end
387
+
388
+ def test_day_time_interval
389
+ values = [
390
+ [
391
+ {day: 1, millisecond: 100},
392
+ nil,
393
+ {day: 2, millisecond: 300},
394
+ ],
395
+ nil,
396
+ ]
397
+ target = build(:day_time_interval, values)
398
+ assert_equal(values, target.values)
399
+ end
400
+
401
+ def test_month_day_nano_interval
402
+ values = [
403
+ [
404
+ {month: 1, day: 1, nanosecond: 100},
405
+ nil,
406
+ {month: 2, day: 3, nanosecond: 400},
407
+ ],
408
+ nil,
409
+ ]
410
+ target = build(:month_day_nano_interval, values)
411
+ assert_equal(values, target.values)
412
+ end
413
+
375
414
  def test_list
376
415
  values = [
377
416
  [
@@ -302,6 +302,39 @@ module ValuesMapArrayTests
302
302
  assert_equal(values, target.values)
303
303
  end
304
304
 
305
+ def test_month_interval
306
+ values = [
307
+ {"key1" => 1, "key2" => nil},
308
+ nil,
309
+ ]
310
+ target = build(:month_interval, values)
311
+ assert_equal(values, target.values)
312
+ end
313
+
314
+ def test_day_time_interval
315
+ values = [
316
+ {
317
+ "key1" => {day: 1, millisecond: 100},
318
+ "key2" => nil,
319
+ },
320
+ nil,
321
+ ]
322
+ target = build(:day_time_interval, values)
323
+ assert_equal(values, target.values)
324
+ end
325
+
326
+ def test_month_day_nano_interval
327
+ values = [
328
+ {
329
+ "key1" => {month: 1, day: 1, nanosecond: 100},
330
+ "key2" => nil,
331
+ },
332
+ nil,
333
+ ]
334
+ target = build(:month_day_nano_interval, values)
335
+ assert_equal(values, target.values)
336
+ end
337
+
305
338
  def test_list
306
339
  values = [
307
340
  {"key1" => [true, nil, false], "key2" => nil},
@@ -324,6 +324,33 @@ module ValuesSparseUnionArrayTests
324
324
  assert_equal(values, target.values)
325
325
  end
326
326
 
327
+ def test_month_interval
328
+ values = [
329
+ {"0" => 1},
330
+ {"1" => nil},
331
+ ]
332
+ target = build(:month_interval, values)
333
+ assert_equal(values, target.values)
334
+ end
335
+
336
+ def test_day_time_interval
337
+ values = [
338
+ {"0" => {day: 1, millisecond: 100}},
339
+ {"1" => nil},
340
+ ]
341
+ target = build(:day_time_interval, values)
342
+ assert_equal(values, target.values)
343
+ end
344
+
345
+ def test_month_day_nano_interval
346
+ values = [
347
+ {"0" => {month: 1, day: 1, nanosecond: 100}},
348
+ {"1" => nil},
349
+ ]
350
+ target = build(:month_day_nano_interval, values)
351
+ assert_equal(values, target.values)
352
+ end
353
+
327
354
  def test_decimal256
328
355
  values = [
329
356
  {"0" => BigDecimal("92.92")},
@@ -341,6 +341,36 @@ module ValuesStructArrayTests
341
341
  assert_equal(values, target.values)
342
342
  end
343
343
 
344
+ def test_month_interval
345
+ values = [
346
+ {"field" => 1},
347
+ nil,
348
+ {"field" => nil},
349
+ ]
350
+ target = build(:month_interval, values)
351
+ assert_equal(values, target.values)
352
+ end
353
+
354
+ def test_day_time_interval
355
+ values = [
356
+ {"field" => {day: 1, millisecond: 100}},
357
+ nil,
358
+ {"field" => nil},
359
+ ]
360
+ target = build(:day_time_interval, values)
361
+ assert_equal(values, target.values)
362
+ end
363
+
364
+ def test_month_day_nano_interval
365
+ values = [
366
+ {"field" => {month: 1, day: 1, nanosecond: 100}},
367
+ nil,
368
+ {"field" => nil},
369
+ ]
370
+ target = build(:month_day_nano_interval, values)
371
+ assert_equal(values, target.values)
372
+ end
373
+
344
374
  def test_list
345
375
  values = [
346
376
  {"field" => [true, nil, false]},