red-arrow 18.1.0 → 19.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
@@ -1,628 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module RawRecordsListArrayTests
19
- def build_schema(type)
20
- field_description = {
21
- name: :element,
22
- }
23
- if type.is_a?(Hash)
24
- field_description = field_description.merge(type)
25
- else
26
- field_description[:type] = type
27
- end
28
- {
29
- column: {
30
- type: :list,
31
- field: field_description,
32
- },
33
- }
34
- end
35
-
36
- def test_null
37
- records = [
38
- [[nil, nil, nil]],
39
- [nil],
40
- ]
41
- target = build(:null, records)
42
- assert_equal(records, target.raw_records)
43
- end
44
-
45
- def test_boolean
46
- records = [
47
- [[true, nil, false]],
48
- [nil],
49
- ]
50
- target = build(:boolean, records)
51
- assert_equal(records, target.raw_records)
52
- end
53
-
54
- def test_int8
55
- records = [
56
- [[-(2 ** 7), nil, (2 ** 7) - 1]],
57
- [nil],
58
- ]
59
- target = build(:int8, records)
60
- assert_equal(records, target.raw_records)
61
- end
62
-
63
- def test_uint8
64
- records = [
65
- [[0, nil, (2 ** 8) - 1]],
66
- [nil],
67
- ]
68
- target = build(:uint8, records)
69
- assert_equal(records, target.raw_records)
70
- end
71
-
72
- def test_int16
73
- records = [
74
- [[-(2 ** 15), nil, (2 ** 15) - 1]],
75
- [nil],
76
- ]
77
- target = build(:int16, records)
78
- assert_equal(records, target.raw_records)
79
- end
80
-
81
- def test_uint16
82
- records = [
83
- [[0, nil, (2 ** 16) - 1]],
84
- [nil],
85
- ]
86
- target = build(:uint16, records)
87
- assert_equal(records, target.raw_records)
88
- end
89
-
90
- def test_int32
91
- records = [
92
- [[-(2 ** 31), nil, (2 ** 31) - 1]],
93
- [nil],
94
- ]
95
- target = build(:int32, records)
96
- assert_equal(records, target.raw_records)
97
- end
98
-
99
- def test_uint32
100
- records = [
101
- [[0, nil, (2 ** 32) - 1]],
102
- [nil],
103
- ]
104
- target = build(:uint32, records)
105
- assert_equal(records, target.raw_records)
106
- end
107
-
108
- def test_int64
109
- records = [
110
- [[-(2 ** 63), nil, (2 ** 63) - 1]],
111
- [nil],
112
- ]
113
- target = build(:int64, records)
114
- assert_equal(records, target.raw_records)
115
- end
116
-
117
- def test_uint64
118
- records = [
119
- [[0, nil, (2 ** 64) - 1]],
120
- [nil],
121
- ]
122
- target = build(:uint64, records)
123
- assert_equal(records, target.raw_records)
124
- end
125
-
126
- def test_float
127
- records = [
128
- [[-1.0, nil, 1.0]],
129
- [nil],
130
- ]
131
- target = build(:float, records)
132
- assert_equal(records, target.raw_records)
133
- end
134
-
135
- def test_double
136
- records = [
137
- [[-1.0, nil, 1.0]],
138
- [nil],
139
- ]
140
- target = build(:double, records)
141
- assert_equal(records, target.raw_records)
142
- end
143
-
144
- def test_binary
145
- records = [
146
- [["\x00".b, nil, "\xff".b]],
147
- [nil],
148
- ]
149
- target = build(:binary, records)
150
- assert_equal(records, target.raw_records)
151
- end
152
-
153
- def test_string
154
- records = [
155
- [
156
- [
157
- "Ruby",
158
- nil,
159
- "\u3042", # U+3042 HIRAGANA LETTER A
160
- ],
161
- ],
162
- [nil],
163
- ]
164
- target = build(:string, records)
165
- assert_equal(records, target.raw_records)
166
- end
167
-
168
- def test_date32
169
- records = [
170
- [
171
- [
172
- Date.new(1960, 1, 1),
173
- nil,
174
- Date.new(2017, 8, 23),
175
- ],
176
- ],
177
- [nil],
178
- ]
179
- target = build(:date32, records)
180
- assert_equal(records, target.raw_records)
181
- end
182
-
183
- def test_date64
184
- records = [
185
- [
186
- [
187
- DateTime.new(1960, 1, 1, 2, 9, 30),
188
- nil,
189
- DateTime.new(2017, 8, 23, 14, 57, 2),
190
- ],
191
- ],
192
- [nil],
193
- ]
194
- target = build(:date64, records)
195
- assert_equal(records, target.raw_records)
196
- end
197
-
198
- def test_timestamp_second
199
- records = [
200
- [
201
- [
202
- Time.parse("1960-01-01T02:09:30Z"),
203
- nil,
204
- Time.parse("2017-08-23T14:57:02Z"),
205
- ],
206
- ],
207
- [nil],
208
- ]
209
- target = build({
210
- type: :timestamp,
211
- unit: :second,
212
- },
213
- records)
214
- assert_equal(records, target.raw_records)
215
- end
216
-
217
- def test_timestamp_milli
218
- records = [
219
- [
220
- [
221
- Time.parse("1960-01-01T02:09:30.123Z"),
222
- nil,
223
- Time.parse("2017-08-23T14:57:02.987Z"),
224
- ],
225
- ],
226
- [nil],
227
- ]
228
- target = build({
229
- type: :timestamp,
230
- unit: :milli,
231
- },
232
- records)
233
- assert_equal(records, target.raw_records)
234
- end
235
-
236
- def test_timestamp_micro
237
- records = [
238
- [
239
- [
240
- Time.parse("1960-01-01T02:09:30.123456Z"),
241
- nil,
242
- Time.parse("2017-08-23T14:57:02.987654Z"),
243
- ],
244
- ],
245
- [nil],
246
- ]
247
- target = build({
248
- type: :timestamp,
249
- unit: :micro,
250
- },
251
- records)
252
- assert_equal(records, target.raw_records)
253
- end
254
-
255
- def test_timestamp_nano
256
- records = [
257
- [
258
- [
259
- Time.parse("1960-01-01T02:09:30.123456789Z"),
260
- nil,
261
- Time.parse("2017-08-23T14:57:02.987654321Z"),
262
- ],
263
- ],
264
- [nil],
265
- ]
266
- target = build({
267
- type: :timestamp,
268
- unit: :nano,
269
- },
270
- records)
271
- assert_equal(records, target.raw_records)
272
- end
273
-
274
- def test_time32_second
275
- unit = Arrow::TimeUnit::SECOND
276
- records = [
277
- [
278
- [
279
- # 00:10:00
280
- Arrow::Time.new(unit, 60 * 10),
281
- nil,
282
- # 02:00:09
283
- Arrow::Time.new(unit, 60 * 60 * 2 + 9),
284
- ],
285
- ],
286
- [nil],
287
- ]
288
- target = build({
289
- type: :time32,
290
- unit: :second,
291
- },
292
- records)
293
- assert_equal(records, target.raw_records)
294
- end
295
-
296
- def test_time32_milli
297
- unit = Arrow::TimeUnit::MILLI
298
- records = [
299
- [
300
- [
301
- # 00:10:00.123
302
- Arrow::Time.new(unit, (60 * 10) * 1000 + 123),
303
- nil,
304
- # 02:00:09.987
305
- Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987),
306
- ],
307
- ],
308
- [nil],
309
- ]
310
- target = build({
311
- type: :time32,
312
- unit: :milli,
313
- },
314
- records)
315
- assert_equal(records, target.raw_records)
316
- end
317
-
318
- def test_time64_micro
319
- unit = Arrow::TimeUnit::MICRO
320
- records = [
321
- [
322
- [
323
- # 00:10:00.123456
324
- Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456),
325
- nil,
326
- # 02:00:09.987654
327
- Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654),
328
- ],
329
- ],
330
- [nil],
331
- ]
332
- target = build({
333
- type: :time64,
334
- unit: :micro,
335
- },
336
- records)
337
- assert_equal(records, target.raw_records)
338
- end
339
-
340
- def test_time64_nano
341
- unit = Arrow::TimeUnit::NANO
342
- records = [
343
- [
344
- [
345
- # 00:10:00.123456789
346
- Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789),
347
- nil,
348
- # 02:00:09.987654321
349
- Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321),
350
- ],
351
- ],
352
- [nil],
353
- ]
354
- target = build({
355
- type: :time64,
356
- unit: :nano,
357
- },
358
- records)
359
- assert_equal(records, target.raw_records)
360
- end
361
-
362
- def test_decimal128
363
- records = [
364
- [
365
- [
366
- BigDecimal("92.92"),
367
- nil,
368
- BigDecimal("29.29"),
369
- ],
370
- ],
371
- [nil],
372
- ]
373
- target = build({
374
- type: :decimal128,
375
- precision: 8,
376
- scale: 2,
377
- },
378
- records)
379
- assert_equal(records, target.raw_records)
380
- end
381
-
382
- def test_decimal256
383
- records = [
384
- [
385
- [
386
- BigDecimal("92.92"),
387
- nil,
388
- BigDecimal("29.29"),
389
- ],
390
- ],
391
- [nil],
392
- ]
393
- target = build({
394
- type: :decimal256,
395
- precision: 38,
396
- scale: 2,
397
- },
398
- records)
399
- assert_equal(records, target.raw_records)
400
- end
401
-
402
- def test_month_interval
403
- records = [
404
- [[1, nil, 12]],
405
- [nil],
406
- ]
407
- target = build(:month_interval, records)
408
- assert_equal(records, target.raw_records)
409
- end
410
-
411
- def test_day_time_interval
412
- records = [
413
- [
414
- [
415
- {day: 1, millisecond: 100},
416
- nil,
417
- {day: 2, millisecond: 300},
418
- ]
419
- ],
420
- [nil],
421
- ]
422
- target = build(:day_time_interval, records)
423
- assert_equal(records, target.raw_records)
424
- end
425
-
426
- def test_month_day_nano_interval
427
- records = [
428
- [
429
- [
430
- {month: 1, day: 1, nanosecond: 100},
431
- nil,
432
- {month: 2, day: 3, nanosecond: 400},
433
- ]
434
- ],
435
- [nil],
436
- ]
437
- target = build(:month_day_nano_interval, records)
438
- assert_equal(records, target.raw_records)
439
- end
440
-
441
- def test_list
442
- records = [
443
- [
444
- [
445
- [
446
- true,
447
- nil,
448
- ],
449
- nil,
450
- [
451
- nil,
452
- false,
453
- ],
454
- ],
455
- ],
456
- [nil],
457
- ]
458
- target = build({
459
- type: :list,
460
- field: {
461
- name: :sub_element,
462
- type: :boolean,
463
- },
464
- },
465
- records)
466
- assert_equal(records, target.raw_records)
467
- end
468
-
469
- def test_struct
470
- records = [
471
- [
472
- [
473
- {"field" => true},
474
- nil,
475
- {"field" => nil},
476
- ],
477
- ],
478
- [nil],
479
- ]
480
- target = build({
481
- type: :struct,
482
- fields: [
483
- {
484
- name: :field,
485
- type: :boolean,
486
- },
487
- ],
488
- },
489
- records)
490
- assert_equal(records, target.raw_records)
491
- end
492
-
493
- def test_map
494
- records = [
495
- [
496
- [
497
- {"key1" => true, "key2" => nil},
498
- nil,
499
- ],
500
- ],
501
- [nil],
502
- ]
503
- target = build({
504
- type: :map,
505
- key: :string,
506
- item: :boolean,
507
- },
508
- records)
509
- assert_equal(records, target.raw_records)
510
- end
511
-
512
- def remove_union_field_names(records)
513
- records.collect do |record|
514
- record.collect do |column|
515
- if column.nil?
516
- column
517
- else
518
- column.collect do |value|
519
- if value.nil?
520
- value
521
- else
522
- value.values[0]
523
- end
524
- end
525
- end
526
- end
527
- end
528
- end
529
-
530
- def test_sparse_union
531
- records = [
532
- [
533
- [
534
- {"field1" => true},
535
- nil,
536
- {"field2" => 29},
537
- {"field2" => nil},
538
- ],
539
- ],
540
- [nil],
541
- ]
542
- target = build({
543
- type: :sparse_union,
544
- fields: [
545
- {
546
- name: :field1,
547
- type: :boolean,
548
- },
549
- {
550
- name: :field2,
551
- type: :uint8,
552
- },
553
- ],
554
- type_codes: [0, 1],
555
- },
556
- records)
557
- assert_equal(remove_union_field_names(records),
558
- target.raw_records)
559
- end
560
-
561
- def test_dense_union
562
- records = [
563
- [
564
- [
565
- {"field1" => true},
566
- nil,
567
- {"field2" => 29},
568
- {"field2" => nil},
569
- ],
570
- ],
571
- [nil],
572
- ]
573
- target = build({
574
- type: :dense_union,
575
- fields: [
576
- {
577
- name: :field1,
578
- type: :boolean,
579
- },
580
- {
581
- name: :field2,
582
- type: :uint8,
583
- },
584
- ],
585
- type_codes: [0, 1],
586
- },
587
- records)
588
- assert_equal(remove_union_field_names(records),
589
- target.raw_records)
590
- end
591
-
592
- def test_dictionary
593
- records = [
594
- [
595
- [
596
- "Ruby",
597
- nil,
598
- "GLib",
599
- ],
600
- ],
601
- [nil],
602
- ]
603
- target = build({
604
- type: :dictionary,
605
- index_data_type: :int8,
606
- value_data_type: :string,
607
- ordered: false,
608
- },
609
- records)
610
- assert_equal(records, target.raw_records)
611
- end
612
- end
613
-
614
- class RawRecordsRecordBatchListArrayTest < Test::Unit::TestCase
615
- include RawRecordsListArrayTests
616
-
617
- def build(type, records)
618
- Arrow::RecordBatch.new(build_schema(type), records)
619
- end
620
- end
621
-
622
- class RawRecordsTableListArrayTest < Test::Unit::TestCase
623
- include RawRecordsListArrayTests
624
-
625
- def build(type, records)
626
- Arrow::Table.new(build_schema(type), records)
627
- end
628
- end