red-arrow 18.1.0 → 19.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
@@ -1,628 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module EachRawRecordListArrayTests
19
- def build_schema(type)
20
- field_description = {
21
- name: :element,
22
- }
23
- if type.is_a?(Hash)
24
- field_description = field_description.merge(type)
25
- else
26
- field_description[:type] = type
27
- end
28
- {
29
- column: {
30
- type: :list,
31
- field: field_description,
32
- },
33
- }
34
- end
35
-
36
- def test_null
37
- records = [
38
- [[nil, nil, nil]],
39
- [nil],
40
- ]
41
- target = build(:null, records)
42
- assert_equal(records, target.each_raw_record.to_a)
43
- end
44
-
45
- def test_boolean
46
- records = [
47
- [[true, nil, false]],
48
- [nil],
49
- ]
50
- target = build(:boolean, records)
51
- assert_equal(records, target.each_raw_record.to_a)
52
- end
53
-
54
- def test_int8
55
- records = [
56
- [[-(2 ** 7), nil, (2 ** 7) - 1]],
57
- [nil],
58
- ]
59
- target = build(:int8, records)
60
- assert_equal(records, target.each_raw_record.to_a)
61
- end
62
-
63
- def test_uint8
64
- records = [
65
- [[0, nil, (2 ** 8) - 1]],
66
- [nil],
67
- ]
68
- target = build(:uint8, records)
69
- assert_equal(records, target.each_raw_record.to_a)
70
- end
71
-
72
- def test_int16
73
- records = [
74
- [[-(2 ** 15), nil, (2 ** 15) - 1]],
75
- [nil],
76
- ]
77
- target = build(:int16, records)
78
- assert_equal(records, target.each_raw_record.to_a)
79
- end
80
-
81
- def test_uint16
82
- records = [
83
- [[0, nil, (2 ** 16) - 1]],
84
- [nil],
85
- ]
86
- target = build(:uint16, records)
87
- assert_equal(records, target.each_raw_record.to_a)
88
- end
89
-
90
- def test_int32
91
- records = [
92
- [[-(2 ** 31), nil, (2 ** 31) - 1]],
93
- [nil],
94
- ]
95
- target = build(:int32, records)
96
- assert_equal(records, target.each_raw_record.to_a)
97
- end
98
-
99
- def test_uint32
100
- records = [
101
- [[0, nil, (2 ** 32) - 1]],
102
- [nil],
103
- ]
104
- target = build(:uint32, records)
105
- assert_equal(records, target.each_raw_record.to_a)
106
- end
107
-
108
- def test_int64
109
- records = [
110
- [[-(2 ** 63), nil, (2 ** 63) - 1]],
111
- [nil],
112
- ]
113
- target = build(:int64, records)
114
- assert_equal(records, target.each_raw_record.to_a)
115
- end
116
-
117
- def test_uint64
118
- records = [
119
- [[0, nil, (2 ** 64) - 1]],
120
- [nil],
121
- ]
122
- target = build(:uint64, records)
123
- assert_equal(records, target.each_raw_record.to_a)
124
- end
125
-
126
- def test_float
127
- records = [
128
- [[-1.0, nil, 1.0]],
129
- [nil],
130
- ]
131
- target = build(:float, records)
132
- assert_equal(records, target.each_raw_record.to_a)
133
- end
134
-
135
- def test_double
136
- records = [
137
- [[-1.0, nil, 1.0]],
138
- [nil],
139
- ]
140
- target = build(:double, records)
141
- assert_equal(records, target.each_raw_record.to_a)
142
- end
143
-
144
- def test_binary
145
- records = [
146
- [["\x00".b, nil, "\xff".b]],
147
- [nil],
148
- ]
149
- target = build(:binary, records)
150
- assert_equal(records, target.each_raw_record.to_a)
151
- end
152
-
153
- def test_string
154
- records = [
155
- [
156
- [
157
- "Ruby",
158
- nil,
159
- "\u3042", # U+3042 HIRAGANA LETTER A
160
- ],
161
- ],
162
- [nil],
163
- ]
164
- target = build(:string, records)
165
- assert_equal(records, target.each_raw_record.to_a)
166
- end
167
-
168
- def test_date32
169
- records = [
170
- [
171
- [
172
- Date.new(1960, 1, 1),
173
- nil,
174
- Date.new(2017, 8, 23),
175
- ],
176
- ],
177
- [nil],
178
- ]
179
- target = build(:date32, records)
180
- assert_equal(records, target.each_raw_record.to_a)
181
- end
182
-
183
- def test_date64
184
- records = [
185
- [
186
- [
187
- DateTime.new(1960, 1, 1, 2, 9, 30),
188
- nil,
189
- DateTime.new(2017, 8, 23, 14, 57, 2),
190
- ],
191
- ],
192
- [nil],
193
- ]
194
- target = build(:date64, records)
195
- assert_equal(records, target.each_raw_record.to_a)
196
- end
197
-
198
- def test_timestamp_second
199
- records = [
200
- [
201
- [
202
- Time.parse("1960-01-01T02:09:30Z"),
203
- nil,
204
- Time.parse("2017-08-23T14:57:02Z"),
205
- ],
206
- ],
207
- [nil],
208
- ]
209
- target = build({
210
- type: :timestamp,
211
- unit: :second,
212
- },
213
- records)
214
- assert_equal(records, target.each_raw_record.to_a)
215
- end
216
-
217
- def test_timestamp_milli
218
- records = [
219
- [
220
- [
221
- Time.parse("1960-01-01T02:09:30.123Z"),
222
- nil,
223
- Time.parse("2017-08-23T14:57:02.987Z"),
224
- ],
225
- ],
226
- [nil],
227
- ]
228
- target = build({
229
- type: :timestamp,
230
- unit: :milli,
231
- },
232
- records)
233
- assert_equal(records, target.each_raw_record.to_a)
234
- end
235
-
236
- def test_timestamp_micro
237
- records = [
238
- [
239
- [
240
- Time.parse("1960-01-01T02:09:30.123456Z"),
241
- nil,
242
- Time.parse("2017-08-23T14:57:02.987654Z"),
243
- ],
244
- ],
245
- [nil],
246
- ]
247
- target = build({
248
- type: :timestamp,
249
- unit: :micro,
250
- },
251
- records)
252
- assert_equal(records, target.each_raw_record.to_a)
253
- end
254
-
255
- def test_timestamp_nano
256
- records = [
257
- [
258
- [
259
- Time.parse("1960-01-01T02:09:30.123456789Z"),
260
- nil,
261
- Time.parse("2017-08-23T14:57:02.987654321Z"),
262
- ],
263
- ],
264
- [nil],
265
- ]
266
- target = build({
267
- type: :timestamp,
268
- unit: :nano,
269
- },
270
- records)
271
- assert_equal(records, target.each_raw_record.to_a)
272
- end
273
-
274
- def test_time32_second
275
- unit = Arrow::TimeUnit::SECOND
276
- records = [
277
- [
278
- [
279
- # 00:10:00
280
- Arrow::Time.new(unit, 60 * 10),
281
- nil,
282
- # 02:00:09
283
- Arrow::Time.new(unit, 60 * 60 * 2 + 9),
284
- ],
285
- ],
286
- [nil],
287
- ]
288
- target = build({
289
- type: :time32,
290
- unit: :second,
291
- },
292
- records)
293
- assert_equal(records, target.each_raw_record.to_a)
294
- end
295
-
296
- def test_time32_milli
297
- unit = Arrow::TimeUnit::MILLI
298
- records = [
299
- [
300
- [
301
- # 00:10:00.123
302
- Arrow::Time.new(unit, (60 * 10) * 1000 + 123),
303
- nil,
304
- # 02:00:09.987
305
- Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987),
306
- ],
307
- ],
308
- [nil],
309
- ]
310
- target = build({
311
- type: :time32,
312
- unit: :milli,
313
- },
314
- records)
315
- assert_equal(records, target.each_raw_record.to_a)
316
- end
317
-
318
- def test_time64_micro
319
- unit = Arrow::TimeUnit::MICRO
320
- records = [
321
- [
322
- [
323
- # 00:10:00.123456
324
- Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456),
325
- nil,
326
- # 02:00:09.987654
327
- Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654),
328
- ],
329
- ],
330
- [nil],
331
- ]
332
- target = build({
333
- type: :time64,
334
- unit: :micro,
335
- },
336
- records)
337
- assert_equal(records, target.each_raw_record.to_a)
338
- end
339
-
340
- def test_time64_nano
341
- unit = Arrow::TimeUnit::NANO
342
- records = [
343
- [
344
- [
345
- # 00:10:00.123456789
346
- Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789),
347
- nil,
348
- # 02:00:09.987654321
349
- Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321),
350
- ],
351
- ],
352
- [nil],
353
- ]
354
- target = build({
355
- type: :time64,
356
- unit: :nano,
357
- },
358
- records)
359
- assert_equal(records, target.each_raw_record.to_a)
360
- end
361
-
362
- def test_decimal128
363
- records = [
364
- [
365
- [
366
- BigDecimal("92.92"),
367
- nil,
368
- BigDecimal("29.29"),
369
- ],
370
- ],
371
- [nil],
372
- ]
373
- target = build({
374
- type: :decimal128,
375
- precision: 8,
376
- scale: 2,
377
- },
378
- records)
379
- assert_equal(records, target.each_raw_record.to_a)
380
- end
381
-
382
- def test_decimal256
383
- records = [
384
- [
385
- [
386
- BigDecimal("92.92"),
387
- nil,
388
- BigDecimal("29.29"),
389
- ],
390
- ],
391
- [nil],
392
- ]
393
- target = build({
394
- type: :decimal256,
395
- precision: 38,
396
- scale: 2,
397
- },
398
- records)
399
- assert_equal(records, target.each_raw_record.to_a)
400
- end
401
-
402
- def test_month_interval
403
- records = [
404
- [[1, nil, 12]],
405
- [nil],
406
- ]
407
- target = build(:month_interval, records)
408
- assert_equal(records, target.each_raw_record.to_a)
409
- end
410
-
411
- def test_day_time_interval
412
- records = [
413
- [
414
- [
415
- {day: 1, millisecond: 100},
416
- nil,
417
- {day: 2, millisecond: 300},
418
- ]
419
- ],
420
- [nil],
421
- ]
422
- target = build(:day_time_interval, records)
423
- assert_equal(records, target.each_raw_record.to_a)
424
- end
425
-
426
- def test_month_day_nano_interval
427
- records = [
428
- [
429
- [
430
- {month: 1, day: 1, nanosecond: 100},
431
- nil,
432
- {month: 2, day: 3, nanosecond: 400},
433
- ]
434
- ],
435
- [nil],
436
- ]
437
- target = build(:month_day_nano_interval, records)
438
- assert_equal(records, target.each_raw_record.to_a)
439
- end
440
-
441
- def test_list
442
- records = [
443
- [
444
- [
445
- [
446
- true,
447
- nil,
448
- ],
449
- nil,
450
- [
451
- nil,
452
- false,
453
- ],
454
- ],
455
- ],
456
- [nil],
457
- ]
458
- target = build({
459
- type: :list,
460
- field: {
461
- name: :sub_element,
462
- type: :boolean,
463
- },
464
- },
465
- records)
466
- assert_equal(records, target.each_raw_record.to_a)
467
- end
468
-
469
- def test_struct
470
- records = [
471
- [
472
- [
473
- {"field" => true},
474
- nil,
475
- {"field" => nil},
476
- ],
477
- ],
478
- [nil],
479
- ]
480
- target = build({
481
- type: :struct,
482
- fields: [
483
- {
484
- name: :field,
485
- type: :boolean,
486
- },
487
- ],
488
- },
489
- records)
490
- assert_equal(records, target.each_raw_record.to_a)
491
- end
492
-
493
- def test_map
494
- records = [
495
- [
496
- [
497
- {"key1" => true, "key2" => nil},
498
- nil,
499
- ],
500
- ],
501
- [nil],
502
- ]
503
- target = build({
504
- type: :map,
505
- key: :string,
506
- item: :boolean,
507
- },
508
- records)
509
- assert_equal(records, target.each_raw_record.to_a)
510
- end
511
-
512
- def remove_union_field_names(records)
513
- records.collect do |record|
514
- record.collect do |column|
515
- if column.nil?
516
- column
517
- else
518
- column.collect do |value|
519
- if value.nil?
520
- value
521
- else
522
- value.values[0]
523
- end
524
- end
525
- end
526
- end
527
- end
528
- end
529
-
530
- def test_sparse_union
531
- records = [
532
- [
533
- [
534
- {"field1" => true},
535
- nil,
536
- {"field2" => 29},
537
- {"field2" => nil},
538
- ],
539
- ],
540
- [nil],
541
- ]
542
- target = build({
543
- type: :sparse_union,
544
- fields: [
545
- {
546
- name: :field1,
547
- type: :boolean,
548
- },
549
- {
550
- name: :field2,
551
- type: :uint8,
552
- },
553
- ],
554
- type_codes: [0, 1],
555
- },
556
- records)
557
- assert_equal(remove_union_field_names(records),
558
- target.each_raw_record.to_a)
559
- end
560
-
561
- def test_dense_union
562
- records = [
563
- [
564
- [
565
- {"field1" => true},
566
- nil,
567
- {"field2" => 29},
568
- {"field2" => nil},
569
- ],
570
- ],
571
- [nil],
572
- ]
573
- target = build({
574
- type: :dense_union,
575
- fields: [
576
- {
577
- name: :field1,
578
- type: :boolean,
579
- },
580
- {
581
- name: :field2,
582
- type: :uint8,
583
- },
584
- ],
585
- type_codes: [0, 1],
586
- },
587
- records)
588
- assert_equal(remove_union_field_names(records),
589
- target.each_raw_record.to_a)
590
- end
591
-
592
- def test_dictionary
593
- records = [
594
- [
595
- [
596
- "Ruby",
597
- nil,
598
- "GLib",
599
- ],
600
- ],
601
- [nil],
602
- ]
603
- target = build({
604
- type: :dictionary,
605
- index_data_type: :int8,
606
- value_data_type: :string,
607
- ordered: false,
608
- },
609
- records)
610
- assert_equal(records, target.each_raw_record.to_a)
611
- end
612
- end
613
-
614
- class EachRawRecordRecordBatchListArrayTest < Test::Unit::TestCase
615
- include EachRawRecordListArrayTests
616
-
617
- def build(type, records)
618
- Arrow::RecordBatch.new(build_schema(type), records)
619
- end
620
- end
621
-
622
- class EachRawRecordTableListArrayTest < Test::Unit::TestCase
623
- include EachRawRecordListArrayTests
624
-
625
- def build(type, records)
626
- Arrow::Table.new(build_schema(type), records)
627
- end
628
- end