red-arrow 18.1.0 → 19.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
@@ -1,543 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module ValuesSparseUnionArrayTests
19
- def build_data_type(type, type_codes)
20
- field_description = {}
21
- if type.is_a?(Hash)
22
- field_description = field_description.merge(type)
23
- else
24
- field_description[:type] = type
25
- end
26
- Arrow::SparseUnionDataType.new(fields: [
27
- field_description.merge(name: "0"),
28
- field_description.merge(name: "1"),
29
- ],
30
- type_codes: type_codes)
31
- end
32
-
33
- def build_array(type, values)
34
- type_codes = [0, 1]
35
- data_type = build_data_type(type, type_codes)
36
- type_ids = []
37
- arrays = data_type.fields.collect do |field|
38
- sub_schema = Arrow::Schema.new([field])
39
- sub_records = values.collect do |value|
40
- [value.nil? ? nil : value[field.name]]
41
- end
42
- sub_record_batch = Arrow::RecordBatch.new(sub_schema,
43
- sub_records)
44
- sub_record_batch.columns[0].data
45
- end
46
- values.each do |value|
47
- if value.key?("0")
48
- type_ids << type_codes[0]
49
- elsif value.key?("1")
50
- type_ids << type_codes[1]
51
- end
52
- end
53
- Arrow::SparseUnionArray.new(data_type,
54
- Arrow::Int8Array.new(type_ids),
55
- arrays)
56
- end
57
-
58
- def remove_field_names(values)
59
- values.collect do |value|
60
- if value.nil?
61
- value
62
- else
63
- value.values[0]
64
- end
65
- end
66
- end
67
-
68
- def test_null
69
- values = [
70
- {"0" => nil},
71
- ]
72
- target = build(:null, values)
73
- assert_equal(remove_field_names(values),
74
- target.values)
75
- end
76
-
77
- def test_boolean
78
- values = [
79
- {"0" => true},
80
- {"1" => nil},
81
- ]
82
- target = build(:boolean, values)
83
- assert_equal(remove_field_names(values),
84
- target.values)
85
- end
86
-
87
- def test_int8
88
- values = [
89
- {"0" => -(2 ** 7)},
90
- {"1" => nil},
91
- ]
92
- target = build(:int8, values)
93
- assert_equal(remove_field_names(values),
94
- target.values)
95
- end
96
-
97
- def test_uint8
98
- values = [
99
- {"0" => (2 ** 8) - 1},
100
- {"1" => nil},
101
- ]
102
- target = build(:uint8, values)
103
- assert_equal(remove_field_names(values),
104
- target.values)
105
- end
106
-
107
- def test_int16
108
- values = [
109
- {"0" => -(2 ** 15)},
110
- {"1" => nil},
111
- ]
112
- target = build(:int16, values)
113
- assert_equal(remove_field_names(values),
114
- target.values)
115
- end
116
-
117
- def test_uint16
118
- values = [
119
- {"0" => (2 ** 16) - 1},
120
- {"1" => nil},
121
- ]
122
- target = build(:uint16, values)
123
- assert_equal(remove_field_names(values),
124
- target.values)
125
- end
126
-
127
- def test_int32
128
- values = [
129
- {"0" => -(2 ** 31)},
130
- {"1" => nil},
131
- ]
132
- target = build(:int32, values)
133
- assert_equal(remove_field_names(values),
134
- target.values)
135
- end
136
-
137
- def test_uint32
138
- values = [
139
- {"0" => (2 ** 32) - 1},
140
- {"1" => nil},
141
- ]
142
- target = build(:uint32, values)
143
- assert_equal(remove_field_names(values),
144
- target.values)
145
- end
146
-
147
- def test_int64
148
- values = [
149
- {"0" => -(2 ** 63)},
150
- {"1" => nil},
151
- ]
152
- target = build(:int64, values)
153
- assert_equal(remove_field_names(values),
154
- target.values)
155
- end
156
-
157
- def test_uint64
158
- values = [
159
- {"0" => (2 ** 64) - 1},
160
- {"1" => nil},
161
- ]
162
- target = build(:uint64, values)
163
- assert_equal(remove_field_names(values),
164
- target.values)
165
- end
166
-
167
- def test_float
168
- values = [
169
- {"0" => -1.0},
170
- {"1" => nil},
171
- ]
172
- target = build(:float, values)
173
- assert_equal(remove_field_names(values),
174
- target.values)
175
- end
176
-
177
- def test_double
178
- values = [
179
- {"0" => -1.0},
180
- {"1" => nil},
181
- ]
182
- target = build(:double, values)
183
- assert_equal(remove_field_names(values),
184
- target.values)
185
- end
186
-
187
- def test_binary
188
- values = [
189
- {"0" => "\xff".b},
190
- {"1" => nil},
191
- ]
192
- target = build(:binary, values)
193
- assert_equal(remove_field_names(values),
194
- target.values)
195
- end
196
-
197
- def test_string
198
- values = [
199
- {"0" => "Ruby"},
200
- {"1" => nil},
201
- ]
202
- target = build(:string, values)
203
- assert_equal(remove_field_names(values),
204
- target.values)
205
- end
206
-
207
- def test_date32
208
- values = [
209
- {"0" => Date.new(1960, 1, 1)},
210
- {"1" => nil},
211
- ]
212
- target = build(:date32, values)
213
- assert_equal(remove_field_names(values),
214
- target.values)
215
- end
216
-
217
- def test_date64
218
- values = [
219
- {"0" => DateTime.new(1960, 1, 1, 2, 9, 30)},
220
- {"1" => nil},
221
- ]
222
- target = build(:date64, values)
223
- assert_equal(remove_field_names(values),
224
- target.values)
225
- end
226
-
227
- def test_timestamp_second
228
- values = [
229
- {"0" => Time.parse("1960-01-01T02:09:30Z")},
230
- {"1" => nil},
231
- ]
232
- target = build({
233
- type: :timestamp,
234
- unit: :second,
235
- },
236
- values)
237
- assert_equal(remove_field_names(values),
238
- target.values)
239
- end
240
-
241
- def test_timestamp_milli
242
- values = [
243
- {"0" => Time.parse("1960-01-01T02:09:30.123Z")},
244
- {"1" => nil},
245
- ]
246
- target = build({
247
- type: :timestamp,
248
- unit: :milli,
249
- },
250
- values)
251
- assert_equal(remove_field_names(values),
252
- target.values)
253
- end
254
-
255
- def test_timestamp_micro
256
- values = [
257
- {"0" => Time.parse("1960-01-01T02:09:30.123456Z")},
258
- {"1" => nil},
259
- ]
260
- target = build({
261
- type: :timestamp,
262
- unit: :micro,
263
- },
264
- values)
265
- assert_equal(remove_field_names(values),
266
- target.values)
267
- end
268
-
269
- def test_timestamp_nano
270
- values = [
271
- {"0" => Time.parse("1960-01-01T02:09:30.123456789Z")},
272
- {"1" => nil},
273
- ]
274
- target = build({
275
- type: :timestamp,
276
- unit: :nano,
277
- },
278
- values)
279
- assert_equal(remove_field_names(values),
280
- target.values)
281
- end
282
-
283
- def test_time32_second
284
- unit = Arrow::TimeUnit::SECOND
285
- values = [
286
- # 00:10:00
287
- {"0" => Arrow::Time.new(unit, 60 * 10)},
288
- {"1" => nil},
289
- ]
290
- target = build({
291
- type: :time32,
292
- unit: :second,
293
- },
294
- values)
295
- assert_equal(remove_field_names(values),
296
- target.values)
297
- end
298
-
299
- def test_time32_milli
300
- unit = Arrow::TimeUnit::MILLI
301
- values = [
302
- # 00:10:00.123
303
- {"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)},
304
- {"1" => nil},
305
- ]
306
- target = build({
307
- type: :time32,
308
- unit: :milli,
309
- },
310
- values)
311
- assert_equal(remove_field_names(values),
312
- target.values)
313
- end
314
-
315
- def test_time64_micro
316
- unit = Arrow::TimeUnit::MICRO
317
- values = [
318
- # 00:10:00.123456
319
- {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)},
320
- {"1" => nil},
321
- ]
322
- target = build({
323
- type: :time64,
324
- unit: :micro,
325
- },
326
- values)
327
- assert_equal(remove_field_names(values),
328
- target.values)
329
- end
330
-
331
- def test_time64_nano
332
- unit = Arrow::TimeUnit::NANO
333
- values = [
334
- # 00:10:00.123456789
335
- {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)},
336
- {"1" => nil},
337
- ]
338
- target = build({
339
- type: :time64,
340
- unit: :nano,
341
- },
342
- values)
343
- assert_equal(remove_field_names(values),
344
- target.values)
345
- end
346
-
347
- def test_decimal128
348
- values = [
349
- {"0" => BigDecimal("92.92")},
350
- {"1" => nil},
351
- ]
352
- target = build({
353
- type: :decimal128,
354
- precision: 8,
355
- scale: 2,
356
- },
357
- values)
358
- assert_equal(remove_field_names(values),
359
- target.values)
360
- end
361
-
362
- def test_month_interval
363
- values = [
364
- {"0" => 1},
365
- {"1" => nil},
366
- ]
367
- target = build(:month_interval, values)
368
- assert_equal(remove_field_names(values),
369
- target.values)
370
- end
371
-
372
- def test_day_time_interval
373
- values = [
374
- {"0" => {day: 1, millisecond: 100}},
375
- {"1" => nil},
376
- ]
377
- target = build(:day_time_interval, values)
378
- assert_equal(remove_field_names(values),
379
- target.values)
380
- end
381
-
382
- def test_month_day_nano_interval
383
- values = [
384
- {"0" => {month: 1, day: 1, nanosecond: 100}},
385
- {"1" => nil},
386
- ]
387
- target = build(:month_day_nano_interval, values)
388
- assert_equal(remove_field_names(values),
389
- target.values)
390
- end
391
-
392
- def test_decimal256
393
- values = [
394
- {"0" => BigDecimal("92.92")},
395
- {"1" => nil},
396
- ]
397
- target = build({
398
- type: :decimal256,
399
- precision: 38,
400
- scale: 2,
401
- },
402
- values)
403
- assert_equal(remove_field_names(values),
404
- target.values)
405
- end
406
-
407
- def test_list
408
- values = [
409
- {"0" => [true, nil, false]},
410
- {"1" => nil},
411
- ]
412
- target = build({
413
- type: :list,
414
- field: {
415
- name: :sub_element,
416
- type: :boolean,
417
- },
418
- },
419
- values)
420
- assert_equal(remove_field_names(values),
421
- target.values)
422
- end
423
-
424
- def test_struct
425
- values = [
426
- {"0" => {"sub_field" => true}},
427
- {"1" => nil},
428
- {"0" => {"sub_field" => nil}},
429
- ]
430
- target = build({
431
- type: :struct,
432
- fields: [
433
- {
434
- name: :sub_field,
435
- type: :boolean,
436
- },
437
- ],
438
- },
439
- values)
440
- assert_equal(remove_field_names(values),
441
- target.values)
442
- end
443
-
444
- def test_map
445
- values = [
446
- {"0" => {"key1" => true, "key2" => nil}},
447
- {"1" => nil},
448
- ]
449
- target = build({
450
- type: :map,
451
- key: :string,
452
- item: :boolean,
453
- },
454
- values)
455
- assert_equal(remove_field_names(values),
456
- target.values)
457
- end
458
-
459
- def test_sparse_union
460
- values = [
461
- {"0" => {"field1" => true}},
462
- {"1" => nil},
463
- {"0" => {"field2" => 29}},
464
- {"0" => {"field2" => nil}},
465
- ]
466
- target = build({
467
- type: :sparse_union,
468
- fields: [
469
- {
470
- name: :field1,
471
- type: :boolean,
472
- },
473
- {
474
- name: :field2,
475
- type: :uint8,
476
- },
477
- ],
478
- type_codes: [0, 1],
479
- },
480
- values)
481
- assert_equal(remove_field_names(remove_field_names(values)),
482
- target.values)
483
- end
484
-
485
- def test_dense_union
486
- values = [
487
- {"0" => {"field1" => true}},
488
- {"1" => nil},
489
- {"0" => {"field2" => 29}},
490
- {"0" => {"field2" => nil}},
491
- ]
492
- target = build({
493
- type: :dense_union,
494
- fields: [
495
- {
496
- name: :field1,
497
- type: :boolean,
498
- },
499
- {
500
- name: :field2,
501
- type: :uint8,
502
- },
503
- ],
504
- type_codes: [0, 1],
505
- },
506
- values)
507
- assert_equal(remove_field_names(remove_field_names(values)),
508
- target.values)
509
- end
510
-
511
- def test_dictionary
512
- values = [
513
- {"0" => "Ruby"},
514
- {"1" => nil},
515
- {"0" => "GLib"},
516
- ]
517
- target = build({
518
- type: :dictionary,
519
- index_data_type: :int8,
520
- value_data_type: :string,
521
- ordered: false,
522
- },
523
- values)
524
- assert_equal(remove_field_names(values),
525
- target.values)
526
- end
527
- end
528
-
529
- class ValuesArraySparseUnionArrayTest < Test::Unit::TestCase
530
- include ValuesSparseUnionArrayTests
531
-
532
- def build(type, values)
533
- build_array(type, values)
534
- end
535
- end
536
-
537
- class ValuesChunkedArraySparseUnionArrayTest < Test::Unit::TestCase
538
- include ValuesSparseUnionArrayTests
539
-
540
- def build(type, values)
541
- Arrow::ChunkedArray.new([build_array(type, values)])
542
- end
543
- end