red-arrow 18.1.0 → 19.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
@@ -1,552 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module ValuesDenseUnionArrayTests
19
- def build_data_type(type, type_codes)
20
- field_description = {}
21
- if type.is_a?(Hash)
22
- field_description = field_description.merge(type)
23
- else
24
- field_description[:type] = type
25
- end
26
- Arrow::DenseUnionDataType.new(fields: [
27
- field_description.merge(name: "0"),
28
- field_description.merge(name: "1"),
29
- ],
30
- type_codes: type_codes)
31
- end
32
-
33
- def build_array(type, values)
34
- type_codes = [0, 1]
35
- data_type = build_data_type(type, type_codes)
36
- type_ids = []
37
- offsets = []
38
- arrays = data_type.fields.collect do |field|
39
- sub_schema = Arrow::Schema.new([field])
40
- sub_records = []
41
- values.each do |value|
42
- next if value.nil?
43
- next unless value.key?(field.name)
44
- sub_records << [value[field.name]]
45
- end
46
- sub_record_batch = Arrow::RecordBatch.new(sub_schema,
47
- sub_records)
48
- sub_record_batch.columns[0].data
49
- end
50
- values.each do |value|
51
- if value.key?("0")
52
- type_id = type_codes[0]
53
- type_ids << type_id
54
- offsets << (type_ids.count(type_id) - 1)
55
- elsif value.key?("1")
56
- type_id = type_codes[1]
57
- type_ids << type_id
58
- offsets << (type_ids.count(type_id) - 1)
59
- end
60
- end
61
- Arrow::DenseUnionArray.new(data_type,
62
- Arrow::Int8Array.new(type_ids),
63
- Arrow::Int32Array.new(offsets),
64
- arrays)
65
- end
66
-
67
- def remove_field_names(values)
68
- values.collect do |value|
69
- if value.nil?
70
- value
71
- else
72
- value.values[0]
73
- end
74
- end
75
- end
76
-
77
- def test_null
78
- values = [
79
- {"0" => nil},
80
- ]
81
- target = build(:null, values)
82
- assert_equal(remove_field_names(values),
83
- target.values)
84
- end
85
-
86
- def test_boolean
87
- values = [
88
- {"0" => true},
89
- {"1" => nil},
90
- ]
91
- target = build(:boolean, values)
92
- assert_equal(remove_field_names(values),
93
- target.values)
94
- end
95
-
96
- def test_int8
97
- values = [
98
- {"0" => -(2 ** 7)},
99
- {"1" => nil},
100
- ]
101
- target = build(:int8, values)
102
- assert_equal(remove_field_names(values),
103
- target.values)
104
- end
105
-
106
- def test_uint8
107
- values = [
108
- {"0" => (2 ** 8) - 1},
109
- {"1" => nil},
110
- ]
111
- target = build(:uint8, values)
112
- assert_equal(remove_field_names(values),
113
- target.values)
114
- end
115
-
116
- def test_int16
117
- values = [
118
- {"0" => -(2 ** 15)},
119
- {"1" => nil},
120
- ]
121
- target = build(:int16, values)
122
- assert_equal(remove_field_names(values),
123
- target.values)
124
- end
125
-
126
- def test_uint16
127
- values = [
128
- {"0" => (2 ** 16) - 1},
129
- {"1" => nil},
130
- ]
131
- target = build(:uint16, values)
132
- assert_equal(remove_field_names(values),
133
- target.values)
134
- end
135
-
136
- def test_int32
137
- values = [
138
- {"0" => -(2 ** 31)},
139
- {"1" => nil},
140
- ]
141
- target = build(:int32, values)
142
- assert_equal(remove_field_names(values),
143
- target.values)
144
- end
145
-
146
- def test_uint32
147
- values = [
148
- {"0" => (2 ** 32) - 1},
149
- {"1" => nil},
150
- ]
151
- target = build(:uint32, values)
152
- assert_equal(remove_field_names(values),
153
- target.values)
154
- end
155
-
156
- def test_int64
157
- values = [
158
- {"0" => -(2 ** 63)},
159
- {"1" => nil},
160
- ]
161
- target = build(:int64, values)
162
- assert_equal(remove_field_names(values),
163
- target.values)
164
- end
165
-
166
- def test_uint64
167
- values = [
168
- {"0" => (2 ** 64) - 1},
169
- {"1" => nil},
170
- ]
171
- target = build(:uint64, values)
172
- assert_equal(remove_field_names(values),
173
- target.values)
174
- end
175
-
176
- def test_float
177
- values = [
178
- {"0" => -1.0},
179
- {"1" => nil},
180
- ]
181
- target = build(:float, values)
182
- assert_equal(remove_field_names(values),
183
- target.values)
184
- end
185
-
186
- def test_double
187
- values = [
188
- {"0" => -1.0},
189
- {"1" => nil},
190
- ]
191
- target = build(:double, values)
192
- assert_equal(remove_field_names(values),
193
- target.values)
194
- end
195
-
196
- def test_binary
197
- values = [
198
- {"0" => "\xff".b},
199
- {"1" => nil},
200
- ]
201
- target = build(:binary, values)
202
- assert_equal(remove_field_names(values),
203
- target.values)
204
- end
205
-
206
- def test_string
207
- values = [
208
- {"0" => "Ruby"},
209
- {"1" => nil},
210
- ]
211
- target = build(:string, values)
212
- assert_equal(remove_field_names(values),
213
- target.values)
214
- end
215
-
216
- def test_date32
217
- values = [
218
- {"0" => Date.new(1960, 1, 1)},
219
- {"1" => nil},
220
- ]
221
- target = build(:date32, values)
222
- assert_equal(remove_field_names(values),
223
- target.values)
224
- end
225
-
226
- def test_date64
227
- values = [
228
- {"0" => DateTime.new(1960, 1, 1, 2, 9, 30)},
229
- {"1" => nil},
230
- ]
231
- target = build(:date64, values)
232
- assert_equal(remove_field_names(values),
233
- target.values)
234
- end
235
-
236
- def test_timestamp_second
237
- values = [
238
- {"0" => Time.parse("1960-01-01T02:09:30Z")},
239
- {"1" => nil},
240
- ]
241
- target = build({
242
- type: :timestamp,
243
- unit: :second,
244
- },
245
- values)
246
- assert_equal(remove_field_names(values),
247
- target.values)
248
- end
249
-
250
- def test_timestamp_milli
251
- values = [
252
- {"0" => Time.parse("1960-01-01T02:09:30.123Z")},
253
- {"1" => nil},
254
- ]
255
- target = build({
256
- type: :timestamp,
257
- unit: :milli,
258
- },
259
- values)
260
- assert_equal(remove_field_names(values),
261
- target.values)
262
- end
263
-
264
- def test_timestamp_micro
265
- values = [
266
- {"0" => Time.parse("1960-01-01T02:09:30.123456Z")},
267
- {"1" => nil},
268
- ]
269
- target = build({
270
- type: :timestamp,
271
- unit: :micro,
272
- },
273
- values)
274
- assert_equal(remove_field_names(values),
275
- target.values)
276
- end
277
-
278
- def test_timestamp_nano
279
- values = [
280
- {"0" => Time.parse("1960-01-01T02:09:30.123456789Z")},
281
- {"1" => nil},
282
- ]
283
- target = build({
284
- type: :timestamp,
285
- unit: :nano,
286
- },
287
- values)
288
- assert_equal(remove_field_names(values),
289
- target.values)
290
- end
291
-
292
- def test_time32_second
293
- unit = Arrow::TimeUnit::SECOND
294
- values = [
295
- # 00:10:00
296
- {"0" => Arrow::Time.new(unit, 60 * 10)},
297
- {"1" => nil},
298
- ]
299
- target = build({
300
- type: :time32,
301
- unit: :second,
302
- },
303
- values)
304
- assert_equal(remove_field_names(values),
305
- target.values)
306
- end
307
-
308
- def test_time32_milli
309
- unit = Arrow::TimeUnit::MILLI
310
- values = [
311
- # 00:10:00.123
312
- {"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)},
313
- {"1" => nil},
314
- ]
315
- target = build({
316
- type: :time32,
317
- unit: :milli,
318
- },
319
- values)
320
- assert_equal(remove_field_names(values),
321
- target.values)
322
- end
323
-
324
- def test_time64_micro
325
- unit = Arrow::TimeUnit::MICRO
326
- values = [
327
- # 00:10:00.123456
328
- {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)},
329
- {"1" => nil},
330
- ]
331
- target = build({
332
- type: :time64,
333
- unit: :micro,
334
- },
335
- values)
336
- assert_equal(remove_field_names(values),
337
- target.values)
338
- end
339
-
340
- def test_time64_nano
341
- unit = Arrow::TimeUnit::NANO
342
- values = [
343
- # 00:10:00.123456789
344
- {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)},
345
- {"1" => nil},
346
- ]
347
- target = build({
348
- type: :time64,
349
- unit: :nano,
350
- },
351
- values)
352
- assert_equal(remove_field_names(values),
353
- target.values)
354
- end
355
-
356
- def test_decimal128
357
- values = [
358
- {"0" => BigDecimal("92.92")},
359
- {"1" => nil},
360
- ]
361
- target = build({
362
- type: :decimal128,
363
- precision: 8,
364
- scale: 2,
365
- },
366
- values)
367
- assert_equal(remove_field_names(values),
368
- target.values)
369
- end
370
-
371
- def test_decimal256
372
- values = [
373
- {"0" => BigDecimal("92.92")},
374
- {"1" => nil},
375
- ]
376
- target = build({
377
- type: :decimal256,
378
- precision: 38,
379
- scale: 2,
380
- },
381
- values)
382
- assert_equal(remove_field_names(values),
383
- target.values)
384
- end
385
-
386
- def test_month_interval
387
- values = [
388
- {"0" => 1},
389
- {"1" => nil},
390
- ]
391
- target = build(:month_interval, values)
392
- assert_equal(remove_field_names(values),
393
- target.values)
394
- end
395
-
396
- def test_day_time_interval
397
- values = [
398
- {"0" => {day: 1, millisecond: 100}},
399
- {"1" => nil},
400
- ]
401
- target = build(:day_time_interval, values)
402
- assert_equal(remove_field_names(values),
403
- target.values)
404
- end
405
-
406
- def test_month_day_nano_interval
407
- values = [
408
- {"0" => {month: 1, day: 1, nanosecond: 100}},
409
- {"1" => nil},
410
- ]
411
- target = build(:month_day_nano_interval, values)
412
- assert_equal(remove_field_names(values),
413
- target.values)
414
- end
415
-
416
- def test_list
417
- values = [
418
- {"0" => [true, nil, false]},
419
- {"1" => nil},
420
- ]
421
- target = build({
422
- type: :list,
423
- field: {
424
- name: :sub_element,
425
- type: :boolean,
426
- },
427
- },
428
- values)
429
- assert_equal(remove_field_names(values),
430
- target.values)
431
- end
432
-
433
- def test_struct
434
- values = [
435
- {"0" => {"sub_field" => true}},
436
- {"1" => nil},
437
- {"0" => {"sub_field" => nil}},
438
- ]
439
- target = build({
440
- type: :struct,
441
- fields: [
442
- {
443
- name: :sub_field,
444
- type: :boolean,
445
- },
446
- ],
447
- },
448
- values)
449
- assert_equal(remove_field_names(values),
450
- target.values)
451
- end
452
-
453
- def test_map
454
- values = [
455
- {"0" => {"key1" => true, "key2" => nil}},
456
- {"1" => nil},
457
- ]
458
- target = build({
459
- type: :map,
460
- key: :string,
461
- item: :boolean,
462
- },
463
- values)
464
- assert_equal(remove_field_names(values),
465
- target.values)
466
- end
467
-
468
- def test_sparse_union
469
- values = [
470
- {"0" => {"field1" => true}},
471
- {"1" => nil},
472
- {"0" => {"field2" => 29}},
473
- {"0" => {"field2" => nil}},
474
- ]
475
- target = build({
476
- type: :sparse_union,
477
- fields: [
478
- {
479
- name: :field1,
480
- type: :boolean,
481
- },
482
- {
483
- name: :field2,
484
- type: :uint8,
485
- },
486
- ],
487
- type_codes: [0, 1],
488
- },
489
- values)
490
- assert_equal(remove_field_names(remove_field_names(values)),
491
- target.values)
492
- end
493
-
494
- def test_dense_union
495
- values = [
496
- {"0" => {"field1" => true}},
497
- {"1" => nil},
498
- {"0" => {"field2" => 29}},
499
- {"0" => {"field2" => nil}},
500
- ]
501
- target = build({
502
- type: :dense_union,
503
- fields: [
504
- {
505
- name: :field1,
506
- type: :boolean,
507
- },
508
- {
509
- name: :field2,
510
- type: :uint8,
511
- },
512
- ],
513
- type_codes: [0, 1],
514
- },
515
- values)
516
- assert_equal(remove_field_names(remove_field_names(values)),
517
- target.values)
518
- end
519
-
520
- def test_dictionary
521
- values = [
522
- {"0" => "Ruby"},
523
- {"1" => nil},
524
- {"0" => "GLib"},
525
- ]
526
- target = build({
527
- type: :dictionary,
528
- index_data_type: :int8,
529
- value_data_type: :string,
530
- ordered: false,
531
- },
532
- values)
533
- assert_equal(remove_field_names(values),
534
- target.values)
535
- end
536
- end
537
-
538
- class ValuesArrayDenseUnionArrayTest < Test::Unit::TestCase
539
- include ValuesDenseUnionArrayTests
540
-
541
- def build(type, values)
542
- build_array(type, values)
543
- end
544
- end
545
-
546
- class ValuesChunkedArrayDenseUnionArrayTest < Test::Unit::TestCase
547
- include ValuesDenseUnionArrayTests
548
-
549
- def build(type, values)
550
- Arrow::ChunkedArray.new([build_array(type, values)])
551
- end
552
- end