red-arrow 6.0.0 → 8.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +10 -0
  3. data/ext/arrow/arrow.cpp +12 -0
  4. data/ext/arrow/converters.hpp +46 -10
  5. data/ext/arrow/extconf.rb +1 -1
  6. data/ext/arrow/raw-records.cpp +3 -2
  7. data/ext/arrow/red-arrow.hpp +7 -0
  8. data/ext/arrow/values.cpp +3 -2
  9. data/lib/arrow/datum.rb +2 -0
  10. data/lib/arrow/day-time-interval-array-builder.rb +29 -0
  11. data/lib/arrow/function.rb +52 -0
  12. data/lib/arrow/loader.rb +16 -0
  13. data/lib/arrow/month-day-nano-interval-array-builder.rb +29 -0
  14. data/lib/arrow/s3-global-options.rb +38 -0
  15. data/lib/arrow/sort-key.rb +61 -55
  16. data/lib/arrow/sort-options.rb +8 -8
  17. data/lib/arrow/table-loader.rb +99 -62
  18. data/lib/arrow/table-saver.rb +7 -2
  19. data/lib/arrow/table.rb +78 -0
  20. data/lib/arrow/version.rb +1 -1
  21. data/red-arrow.gemspec +1 -10
  22. data/test/helper.rb +2 -0
  23. data/test/raw-records/test-basic-arrays.rb +30 -0
  24. data/test/raw-records/test-dense-union-array.rb +27 -0
  25. data/test/raw-records/test-list-array.rb +39 -0
  26. data/test/raw-records/test-map-array.rb +37 -0
  27. data/test/raw-records/test-sparse-union-array.rb +27 -0
  28. data/test/raw-records/test-struct-array.rb +30 -0
  29. data/test/test-function.rb +48 -14
  30. data/test/test-table.rb +204 -6
  31. data/test/values/test-basic-arrays.rb +30 -0
  32. data/test/values/test-dense-union-array.rb +27 -0
  33. data/test/values/test-dictionary-array.rb +295 -0
  34. data/test/values/test-list-array.rb +39 -0
  35. data/test/values/test-map-array.rb +33 -0
  36. data/test/values/test-sparse-union-array.rb +27 -0
  37. data/test/values/test-struct-array.rb +30 -0
  38. metadata +88 -194
@@ -0,0 +1,295 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module ValuesDictionaryArrayTests
19
+ def test_null
20
+ target = build(Arrow::NullArray.new(4))
21
+ assert_equal([nil] * 4, target.values)
22
+ end
23
+
24
+ def test_boolean
25
+ values = [true, nil, false]
26
+ target = build(Arrow::BooleanArray.new(values))
27
+ assert_equal(values, target.values)
28
+ end
29
+
30
+ def test_int8
31
+ values = [
32
+ -(2 ** 7),
33
+ nil,
34
+ (2 ** 7) - 1,
35
+ ]
36
+ target = build(Arrow::Int8Array.new(values))
37
+ assert_equal(values, target.values)
38
+ end
39
+
40
+ def test_uint8
41
+ values = [
42
+ 0,
43
+ nil,
44
+ (2 ** 8) - 1,
45
+ ]
46
+ target = build(Arrow::UInt8Array.new(values))
47
+ assert_equal(values, target.values)
48
+ end
49
+
50
+ def test_int16
51
+ values = [
52
+ -(2 ** 15),
53
+ nil,
54
+ (2 ** 15) - 1,
55
+ ]
56
+ target = build(Arrow::Int16Array.new(values))
57
+ assert_equal(values, target.values)
58
+ end
59
+
60
+ def test_uint16
61
+ values = [
62
+ 0,
63
+ nil,
64
+ (2 ** 16) - 1,
65
+ ]
66
+ target = build(Arrow::UInt16Array.new(values))
67
+ assert_equal(values, target.values)
68
+ end
69
+
70
+ def test_int32
71
+ values = [
72
+ -(2 ** 31),
73
+ nil,
74
+ (2 ** 31) - 1,
75
+ ]
76
+ target = build(Arrow::Int32Array.new(values))
77
+ assert_equal(values, target.values)
78
+ end
79
+
80
+ def test_uint32
81
+ values = [
82
+ 0,
83
+ nil,
84
+ (2 ** 32) - 1,
85
+ ]
86
+ target = build(Arrow::UInt32Array.new(values))
87
+ assert_equal(values, target.values)
88
+ end
89
+
90
+ def test_int64
91
+ values = [
92
+ -(2 ** 63),
93
+ nil,
94
+ (2 ** 63) - 1,
95
+ ]
96
+ target = build(Arrow::Int64Array.new(values))
97
+ assert_equal(values, target.values)
98
+ end
99
+
100
+ def test_uint64
101
+ values = [
102
+ 0,
103
+ nil,
104
+ (2 ** 64) - 1,
105
+ ]
106
+ target = build(Arrow::UInt64Array.new(values))
107
+ assert_equal(values, target.values)
108
+ end
109
+
110
+ def test_float
111
+ values = [
112
+ -1.0,
113
+ nil,
114
+ 1.0,
115
+ ]
116
+ target = build(Arrow::FloatArray.new(values))
117
+ assert_equal(values, target.values)
118
+ end
119
+
120
+ def test_double
121
+ values = [
122
+ -1.0,
123
+ nil,
124
+ 1.0,
125
+ ]
126
+ target = build(Arrow::DoubleArray.new(values))
127
+ assert_equal(values, target.values)
128
+ end
129
+
130
+ def test_binary
131
+ values = [
132
+ "\x00".b,
133
+ nil,
134
+ "\xff".b,
135
+ ]
136
+ target = build(Arrow::BinaryArray.new(values))
137
+ assert_equal(values, target.values)
138
+ end
139
+
140
+ def test_string
141
+ values = [
142
+ "Ruby",
143
+ nil,
144
+ "\u3042", # U+3042 HIRAGANA LETTER A
145
+ ]
146
+ target = build(Arrow::StringArray.new(values))
147
+ assert_equal(values, target.values)
148
+ end
149
+
150
+ def test_date32
151
+ values = [
152
+ Date.new(1960, 1, 1),
153
+ nil,
154
+ Date.new(2017, 8, 23),
155
+ ]
156
+ target = build(Arrow::Date32Array.new(values))
157
+ assert_equal(values, target.values)
158
+ end
159
+
160
+ def test_date64
161
+ values = [
162
+ DateTime.new(1960, 1, 1, 2, 9, 30),
163
+ nil,
164
+ DateTime.new(2017, 8, 23, 14, 57, 2),
165
+ ]
166
+ target = build(Arrow::Date64Array.new(values))
167
+ assert_equal(values, target.values)
168
+ end
169
+
170
+ def test_timestamp_second
171
+ values = [
172
+ Time.parse("1960-01-01T02:09:30Z"),
173
+ nil,
174
+ Time.parse("2017-08-23T14:57:02Z"),
175
+ ]
176
+ target = build(Arrow::TimestampArray.new(:second, values))
177
+ assert_equal(values, target.values)
178
+ end
179
+
180
+ def test_timestamp_milli
181
+ values = [
182
+ Time.parse("1960-01-01T02:09:30.123Z"),
183
+ nil,
184
+ Time.parse("2017-08-23T14:57:02.987Z"),
185
+ ]
186
+ target = build(Arrow::TimestampArray.new(:milli, values))
187
+ assert_equal(values, target.values)
188
+ end
189
+
190
+ def test_timestamp_micro
191
+ values = [
192
+ Time.parse("1960-01-01T02:09:30.123456Z"),
193
+ nil,
194
+ Time.parse("2017-08-23T14:57:02.987654Z"),
195
+ ]
196
+ target = build(Arrow::TimestampArray.new(:micro, values))
197
+ assert_equal(values, target.values)
198
+ end
199
+
200
+ def test_timestamp_nano
201
+ values = [
202
+ Time.parse("1960-01-01T02:09:30.123456789Z"),
203
+ nil,
204
+ Time.parse("2017-08-23T14:57:02.987654321Z"),
205
+ ]
206
+ target = build(Arrow::TimestampArray.new(:nano, values))
207
+ assert_equal(values, target.values)
208
+ end
209
+
210
+ def test_time32_second
211
+ unit = Arrow::TimeUnit::SECOND
212
+ values = [
213
+ Arrow::Time.new(unit, 60 * 10), # 00:10:00
214
+ nil,
215
+ Arrow::Time.new(unit, 60 * 60 * 2 + 9), # 02:00:09
216
+ ]
217
+ target = build(Arrow::Time32Array.new(unit, values))
218
+ assert_equal(values, target.values)
219
+ end
220
+
221
+ def test_time32_milli
222
+ unit = Arrow::TimeUnit::MILLI
223
+ values = [
224
+ Arrow::Time.new(unit, (60 * 10) * 1000 + 123), # 00:10:00.123
225
+ nil,
226
+ Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987), # 02:00:09.987
227
+ ]
228
+ target = build(Arrow::Time32Array.new(unit, values))
229
+ assert_equal(values, target.values)
230
+ end
231
+
232
+ def test_time64_micro
233
+ unit = Arrow::TimeUnit::MICRO
234
+ values = [
235
+ # 00:10:00.123456
236
+ Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456),
237
+ nil,
238
+ # 02:00:09.987654
239
+ Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654),
240
+ ]
241
+ target = build(Arrow::Time64Array.new(unit, values))
242
+ assert_equal(values, target.values)
243
+ end
244
+
245
+ def test_time64_nano
246
+ unit = Arrow::TimeUnit::NANO
247
+ values = [
248
+ # 00:10:00.123456789
249
+ Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789),
250
+ nil,
251
+ # 02:00:09.987654321
252
+ Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321),
253
+ ]
254
+ target = build(Arrow::Time64Array.new(unit, values))
255
+ assert_equal(values, target.values)
256
+ end
257
+
258
+ def test_decimal128
259
+ values = [
260
+ BigDecimal("92.92"),
261
+ nil,
262
+ BigDecimal("29.29"),
263
+ ]
264
+ data_type = Arrow::Decimal128DataType.new(8, 2)
265
+ target = build(Arrow::Decimal128Array.new(data_type, values))
266
+ assert_equal(values, target.values)
267
+ end
268
+
269
+ def test_decimal256
270
+ values = [
271
+ BigDecimal("92.92"),
272
+ nil,
273
+ BigDecimal("29.29"),
274
+ ]
275
+ data_type = Arrow::Decimal256DataType.new(38, 2)
276
+ target = build(Arrow::Decimal256Array.new(data_type, values))
277
+ assert_equal(values, target.values)
278
+ end
279
+ end
280
+
281
+ class ValuesArrayDictionaryArrayTest < Test::Unit::TestCase
282
+ include ValuesDictionaryArrayTests
283
+
284
+ def build(values)
285
+ values.dictionary_encode
286
+ end
287
+ end
288
+
289
+ class ValuesChunkedArrayDictionaryArrayTest < Test::Unit::TestCase
290
+ include ValuesDictionaryArrayTests
291
+
292
+ def build(values)
293
+ Arrow::ChunkedArray.new([values.dictionary_encode])
294
+ end
295
+ end
@@ -372,6 +372,45 @@ module ValuesListArrayTests
372
372
  assert_equal(values, target.values)
373
373
  end
374
374
 
375
+ def test_month_interval
376
+ values = [
377
+ [
378
+ 1,
379
+ nil,
380
+ 12,
381
+ ],
382
+ nil,
383
+ ]
384
+ target = build(:month_interval, values)
385
+ assert_equal(values, target.values)
386
+ end
387
+
388
+ def test_day_time_interval
389
+ values = [
390
+ [
391
+ {day: 1, millisecond: 100},
392
+ nil,
393
+ {day: 2, millisecond: 300},
394
+ ],
395
+ nil,
396
+ ]
397
+ target = build(:day_time_interval, values)
398
+ assert_equal(values, target.values)
399
+ end
400
+
401
+ def test_month_day_nano_interval
402
+ values = [
403
+ [
404
+ {month: 1, day: 1, nanosecond: 100},
405
+ nil,
406
+ {month: 2, day: 3, nanosecond: 400},
407
+ ],
408
+ nil,
409
+ ]
410
+ target = build(:month_day_nano_interval, values)
411
+ assert_equal(values, target.values)
412
+ end
413
+
375
414
  def test_list
376
415
  values = [
377
416
  [
@@ -302,6 +302,39 @@ module ValuesMapArrayTests
302
302
  assert_equal(values, target.values)
303
303
  end
304
304
 
305
+ def test_month_interval
306
+ values = [
307
+ {"key1" => 1, "key2" => nil},
308
+ nil,
309
+ ]
310
+ target = build(:month_interval, values)
311
+ assert_equal(values, target.values)
312
+ end
313
+
314
+ def test_day_time_interval
315
+ values = [
316
+ {
317
+ "key1" => {day: 1, millisecond: 100},
318
+ "key2" => nil,
319
+ },
320
+ nil,
321
+ ]
322
+ target = build(:day_time_interval, values)
323
+ assert_equal(values, target.values)
324
+ end
325
+
326
+ def test_month_day_nano_interval
327
+ values = [
328
+ {
329
+ "key1" => {month: 1, day: 1, nanosecond: 100},
330
+ "key2" => nil,
331
+ },
332
+ nil,
333
+ ]
334
+ target = build(:month_day_nano_interval, values)
335
+ assert_equal(values, target.values)
336
+ end
337
+
305
338
  def test_list
306
339
  values = [
307
340
  {"key1" => [true, nil, false], "key2" => nil},
@@ -324,6 +324,33 @@ module ValuesSparseUnionArrayTests
324
324
  assert_equal(values, target.values)
325
325
  end
326
326
 
327
+ def test_month_interval
328
+ values = [
329
+ {"0" => 1},
330
+ {"1" => nil},
331
+ ]
332
+ target = build(:month_interval, values)
333
+ assert_equal(values, target.values)
334
+ end
335
+
336
+ def test_day_time_interval
337
+ values = [
338
+ {"0" => {day: 1, millisecond: 100}},
339
+ {"1" => nil},
340
+ ]
341
+ target = build(:day_time_interval, values)
342
+ assert_equal(values, target.values)
343
+ end
344
+
345
+ def test_month_day_nano_interval
346
+ values = [
347
+ {"0" => {month: 1, day: 1, nanosecond: 100}},
348
+ {"1" => nil},
349
+ ]
350
+ target = build(:month_day_nano_interval, values)
351
+ assert_equal(values, target.values)
352
+ end
353
+
327
354
  def test_decimal256
328
355
  values = [
329
356
  {"0" => BigDecimal("92.92")},
@@ -341,6 +341,36 @@ module ValuesStructArrayTests
341
341
  assert_equal(values, target.values)
342
342
  end
343
343
 
344
+ def test_month_interval
345
+ values = [
346
+ {"field" => 1},
347
+ nil,
348
+ {"field" => nil},
349
+ ]
350
+ target = build(:month_interval, values)
351
+ assert_equal(values, target.values)
352
+ end
353
+
354
+ def test_day_time_interval
355
+ values = [
356
+ {"field" => {day: 1, millisecond: 100}},
357
+ nil,
358
+ {"field" => nil},
359
+ ]
360
+ target = build(:day_time_interval, values)
361
+ assert_equal(values, target.values)
362
+ end
363
+
364
+ def test_month_day_nano_interval
365
+ values = [
366
+ {"field" => {month: 1, day: 1, nanosecond: 100}},
367
+ nil,
368
+ {"field" => nil},
369
+ ]
370
+ target = build(:month_day_nano_interval, values)
371
+ assert_equal(values, target.values)
372
+ end
373
+
344
374
  def test_list
345
375
  values = [
346
376
  {"field" => [true, nil, false]},