red-arrow 18.1.0 → 19.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
@@ -1,543 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module ValuesSparseUnionArrayTests
19
- def build_data_type(type, type_codes)
20
- field_description = {}
21
- if type.is_a?(Hash)
22
- field_description = field_description.merge(type)
23
- else
24
- field_description[:type] = type
25
- end
26
- Arrow::SparseUnionDataType.new(fields: [
27
- field_description.merge(name: "0"),
28
- field_description.merge(name: "1"),
29
- ],
30
- type_codes: type_codes)
31
- end
32
-
33
- def build_array(type, values)
34
- type_codes = [0, 1]
35
- data_type = build_data_type(type, type_codes)
36
- type_ids = []
37
- arrays = data_type.fields.collect do |field|
38
- sub_schema = Arrow::Schema.new([field])
39
- sub_records = values.collect do |value|
40
- [value.nil? ? nil : value[field.name]]
41
- end
42
- sub_record_batch = Arrow::RecordBatch.new(sub_schema,
43
- sub_records)
44
- sub_record_batch.columns[0].data
45
- end
46
- values.each do |value|
47
- if value.key?("0")
48
- type_ids << type_codes[0]
49
- elsif value.key?("1")
50
- type_ids << type_codes[1]
51
- end
52
- end
53
- Arrow::SparseUnionArray.new(data_type,
54
- Arrow::Int8Array.new(type_ids),
55
- arrays)
56
- end
57
-
58
- def remove_field_names(values)
59
- values.collect do |value|
60
- if value.nil?
61
- value
62
- else
63
- value.values[0]
64
- end
65
- end
66
- end
67
-
68
- def test_null
69
- values = [
70
- {"0" => nil},
71
- ]
72
- target = build(:null, values)
73
- assert_equal(remove_field_names(values),
74
- target.values)
75
- end
76
-
77
- def test_boolean
78
- values = [
79
- {"0" => true},
80
- {"1" => nil},
81
- ]
82
- target = build(:boolean, values)
83
- assert_equal(remove_field_names(values),
84
- target.values)
85
- end
86
-
87
- def test_int8
88
- values = [
89
- {"0" => -(2 ** 7)},
90
- {"1" => nil},
91
- ]
92
- target = build(:int8, values)
93
- assert_equal(remove_field_names(values),
94
- target.values)
95
- end
96
-
97
- def test_uint8
98
- values = [
99
- {"0" => (2 ** 8) - 1},
100
- {"1" => nil},
101
- ]
102
- target = build(:uint8, values)
103
- assert_equal(remove_field_names(values),
104
- target.values)
105
- end
106
-
107
- def test_int16
108
- values = [
109
- {"0" => -(2 ** 15)},
110
- {"1" => nil},
111
- ]
112
- target = build(:int16, values)
113
- assert_equal(remove_field_names(values),
114
- target.values)
115
- end
116
-
117
- def test_uint16
118
- values = [
119
- {"0" => (2 ** 16) - 1},
120
- {"1" => nil},
121
- ]
122
- target = build(:uint16, values)
123
- assert_equal(remove_field_names(values),
124
- target.values)
125
- end
126
-
127
- def test_int32
128
- values = [
129
- {"0" => -(2 ** 31)},
130
- {"1" => nil},
131
- ]
132
- target = build(:int32, values)
133
- assert_equal(remove_field_names(values),
134
- target.values)
135
- end
136
-
137
- def test_uint32
138
- values = [
139
- {"0" => (2 ** 32) - 1},
140
- {"1" => nil},
141
- ]
142
- target = build(:uint32, values)
143
- assert_equal(remove_field_names(values),
144
- target.values)
145
- end
146
-
147
- def test_int64
148
- values = [
149
- {"0" => -(2 ** 63)},
150
- {"1" => nil},
151
- ]
152
- target = build(:int64, values)
153
- assert_equal(remove_field_names(values),
154
- target.values)
155
- end
156
-
157
- def test_uint64
158
- values = [
159
- {"0" => (2 ** 64) - 1},
160
- {"1" => nil},
161
- ]
162
- target = build(:uint64, values)
163
- assert_equal(remove_field_names(values),
164
- target.values)
165
- end
166
-
167
- def test_float
168
- values = [
169
- {"0" => -1.0},
170
- {"1" => nil},
171
- ]
172
- target = build(:float, values)
173
- assert_equal(remove_field_names(values),
174
- target.values)
175
- end
176
-
177
- def test_double
178
- values = [
179
- {"0" => -1.0},
180
- {"1" => nil},
181
- ]
182
- target = build(:double, values)
183
- assert_equal(remove_field_names(values),
184
- target.values)
185
- end
186
-
187
- def test_binary
188
- values = [
189
- {"0" => "\xff".b},
190
- {"1" => nil},
191
- ]
192
- target = build(:binary, values)
193
- assert_equal(remove_field_names(values),
194
- target.values)
195
- end
196
-
197
- def test_string
198
- values = [
199
- {"0" => "Ruby"},
200
- {"1" => nil},
201
- ]
202
- target = build(:string, values)
203
- assert_equal(remove_field_names(values),
204
- target.values)
205
- end
206
-
207
- def test_date32
208
- values = [
209
- {"0" => Date.new(1960, 1, 1)},
210
- {"1" => nil},
211
- ]
212
- target = build(:date32, values)
213
- assert_equal(remove_field_names(values),
214
- target.values)
215
- end
216
-
217
- def test_date64
218
- values = [
219
- {"0" => DateTime.new(1960, 1, 1, 2, 9, 30)},
220
- {"1" => nil},
221
- ]
222
- target = build(:date64, values)
223
- assert_equal(remove_field_names(values),
224
- target.values)
225
- end
226
-
227
- def test_timestamp_second
228
- values = [
229
- {"0" => Time.parse("1960-01-01T02:09:30Z")},
230
- {"1" => nil},
231
- ]
232
- target = build({
233
- type: :timestamp,
234
- unit: :second,
235
- },
236
- values)
237
- assert_equal(remove_field_names(values),
238
- target.values)
239
- end
240
-
241
- def test_timestamp_milli
242
- values = [
243
- {"0" => Time.parse("1960-01-01T02:09:30.123Z")},
244
- {"1" => nil},
245
- ]
246
- target = build({
247
- type: :timestamp,
248
- unit: :milli,
249
- },
250
- values)
251
- assert_equal(remove_field_names(values),
252
- target.values)
253
- end
254
-
255
- def test_timestamp_micro
256
- values = [
257
- {"0" => Time.parse("1960-01-01T02:09:30.123456Z")},
258
- {"1" => nil},
259
- ]
260
- target = build({
261
- type: :timestamp,
262
- unit: :micro,
263
- },
264
- values)
265
- assert_equal(remove_field_names(values),
266
- target.values)
267
- end
268
-
269
- def test_timestamp_nano
270
- values = [
271
- {"0" => Time.parse("1960-01-01T02:09:30.123456789Z")},
272
- {"1" => nil},
273
- ]
274
- target = build({
275
- type: :timestamp,
276
- unit: :nano,
277
- },
278
- values)
279
- assert_equal(remove_field_names(values),
280
- target.values)
281
- end
282
-
283
- def test_time32_second
284
- unit = Arrow::TimeUnit::SECOND
285
- values = [
286
- # 00:10:00
287
- {"0" => Arrow::Time.new(unit, 60 * 10)},
288
- {"1" => nil},
289
- ]
290
- target = build({
291
- type: :time32,
292
- unit: :second,
293
- },
294
- values)
295
- assert_equal(remove_field_names(values),
296
- target.values)
297
- end
298
-
299
- def test_time32_milli
300
- unit = Arrow::TimeUnit::MILLI
301
- values = [
302
- # 00:10:00.123
303
- {"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)},
304
- {"1" => nil},
305
- ]
306
- target = build({
307
- type: :time32,
308
- unit: :milli,
309
- },
310
- values)
311
- assert_equal(remove_field_names(values),
312
- target.values)
313
- end
314
-
315
- def test_time64_micro
316
- unit = Arrow::TimeUnit::MICRO
317
- values = [
318
- # 00:10:00.123456
319
- {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)},
320
- {"1" => nil},
321
- ]
322
- target = build({
323
- type: :time64,
324
- unit: :micro,
325
- },
326
- values)
327
- assert_equal(remove_field_names(values),
328
- target.values)
329
- end
330
-
331
- def test_time64_nano
332
- unit = Arrow::TimeUnit::NANO
333
- values = [
334
- # 00:10:00.123456789
335
- {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)},
336
- {"1" => nil},
337
- ]
338
- target = build({
339
- type: :time64,
340
- unit: :nano,
341
- },
342
- values)
343
- assert_equal(remove_field_names(values),
344
- target.values)
345
- end
346
-
347
- def test_decimal128
348
- values = [
349
- {"0" => BigDecimal("92.92")},
350
- {"1" => nil},
351
- ]
352
- target = build({
353
- type: :decimal128,
354
- precision: 8,
355
- scale: 2,
356
- },
357
- values)
358
- assert_equal(remove_field_names(values),
359
- target.values)
360
- end
361
-
362
- def test_month_interval
363
- values = [
364
- {"0" => 1},
365
- {"1" => nil},
366
- ]
367
- target = build(:month_interval, values)
368
- assert_equal(remove_field_names(values),
369
- target.values)
370
- end
371
-
372
- def test_day_time_interval
373
- values = [
374
- {"0" => {day: 1, millisecond: 100}},
375
- {"1" => nil},
376
- ]
377
- target = build(:day_time_interval, values)
378
- assert_equal(remove_field_names(values),
379
- target.values)
380
- end
381
-
382
- def test_month_day_nano_interval
383
- values = [
384
- {"0" => {month: 1, day: 1, nanosecond: 100}},
385
- {"1" => nil},
386
- ]
387
- target = build(:month_day_nano_interval, values)
388
- assert_equal(remove_field_names(values),
389
- target.values)
390
- end
391
-
392
- def test_decimal256
393
- values = [
394
- {"0" => BigDecimal("92.92")},
395
- {"1" => nil},
396
- ]
397
- target = build({
398
- type: :decimal256,
399
- precision: 38,
400
- scale: 2,
401
- },
402
- values)
403
- assert_equal(remove_field_names(values),
404
- target.values)
405
- end
406
-
407
- def test_list
408
- values = [
409
- {"0" => [true, nil, false]},
410
- {"1" => nil},
411
- ]
412
- target = build({
413
- type: :list,
414
- field: {
415
- name: :sub_element,
416
- type: :boolean,
417
- },
418
- },
419
- values)
420
- assert_equal(remove_field_names(values),
421
- target.values)
422
- end
423
-
424
- def test_struct
425
- values = [
426
- {"0" => {"sub_field" => true}},
427
- {"1" => nil},
428
- {"0" => {"sub_field" => nil}},
429
- ]
430
- target = build({
431
- type: :struct,
432
- fields: [
433
- {
434
- name: :sub_field,
435
- type: :boolean,
436
- },
437
- ],
438
- },
439
- values)
440
- assert_equal(remove_field_names(values),
441
- target.values)
442
- end
443
-
444
- def test_map
445
- values = [
446
- {"0" => {"key1" => true, "key2" => nil}},
447
- {"1" => nil},
448
- ]
449
- target = build({
450
- type: :map,
451
- key: :string,
452
- item: :boolean,
453
- },
454
- values)
455
- assert_equal(remove_field_names(values),
456
- target.values)
457
- end
458
-
459
- def test_sparse_union
460
- values = [
461
- {"0" => {"field1" => true}},
462
- {"1" => nil},
463
- {"0" => {"field2" => 29}},
464
- {"0" => {"field2" => nil}},
465
- ]
466
- target = build({
467
- type: :sparse_union,
468
- fields: [
469
- {
470
- name: :field1,
471
- type: :boolean,
472
- },
473
- {
474
- name: :field2,
475
- type: :uint8,
476
- },
477
- ],
478
- type_codes: [0, 1],
479
- },
480
- values)
481
- assert_equal(remove_field_names(remove_field_names(values)),
482
- target.values)
483
- end
484
-
485
- def test_dense_union
486
- values = [
487
- {"0" => {"field1" => true}},
488
- {"1" => nil},
489
- {"0" => {"field2" => 29}},
490
- {"0" => {"field2" => nil}},
491
- ]
492
- target = build({
493
- type: :dense_union,
494
- fields: [
495
- {
496
- name: :field1,
497
- type: :boolean,
498
- },
499
- {
500
- name: :field2,
501
- type: :uint8,
502
- },
503
- ],
504
- type_codes: [0, 1],
505
- },
506
- values)
507
- assert_equal(remove_field_names(remove_field_names(values)),
508
- target.values)
509
- end
510
-
511
- def test_dictionary
512
- values = [
513
- {"0" => "Ruby"},
514
- {"1" => nil},
515
- {"0" => "GLib"},
516
- ]
517
- target = build({
518
- type: :dictionary,
519
- index_data_type: :int8,
520
- value_data_type: :string,
521
- ordered: false,
522
- },
523
- values)
524
- assert_equal(remove_field_names(values),
525
- target.values)
526
- end
527
- end
528
-
529
- class ValuesArraySparseUnionArrayTest < Test::Unit::TestCase
530
- include ValuesSparseUnionArrayTests
531
-
532
- def build(type, values)
533
- build_array(type, values)
534
- end
535
- end
536
-
537
- class ValuesChunkedArraySparseUnionArrayTest < Test::Unit::TestCase
538
- include ValuesSparseUnionArrayTests
539
-
540
- def build(type, values)
541
- Arrow::ChunkedArray.new([build_array(type, values)])
542
- end
543
- end