red-arrow 18.1.0 → 19.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
@@ -1,552 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module ValuesDenseUnionArrayTests
19
- def build_data_type(type, type_codes)
20
- field_description = {}
21
- if type.is_a?(Hash)
22
- field_description = field_description.merge(type)
23
- else
24
- field_description[:type] = type
25
- end
26
- Arrow::DenseUnionDataType.new(fields: [
27
- field_description.merge(name: "0"),
28
- field_description.merge(name: "1"),
29
- ],
30
- type_codes: type_codes)
31
- end
32
-
33
- def build_array(type, values)
34
- type_codes = [0, 1]
35
- data_type = build_data_type(type, type_codes)
36
- type_ids = []
37
- offsets = []
38
- arrays = data_type.fields.collect do |field|
39
- sub_schema = Arrow::Schema.new([field])
40
- sub_records = []
41
- values.each do |value|
42
- next if value.nil?
43
- next unless value.key?(field.name)
44
- sub_records << [value[field.name]]
45
- end
46
- sub_record_batch = Arrow::RecordBatch.new(sub_schema,
47
- sub_records)
48
- sub_record_batch.columns[0].data
49
- end
50
- values.each do |value|
51
- if value.key?("0")
52
- type_id = type_codes[0]
53
- type_ids << type_id
54
- offsets << (type_ids.count(type_id) - 1)
55
- elsif value.key?("1")
56
- type_id = type_codes[1]
57
- type_ids << type_id
58
- offsets << (type_ids.count(type_id) - 1)
59
- end
60
- end
61
- Arrow::DenseUnionArray.new(data_type,
62
- Arrow::Int8Array.new(type_ids),
63
- Arrow::Int32Array.new(offsets),
64
- arrays)
65
- end
66
-
67
- def remove_field_names(values)
68
- values.collect do |value|
69
- if value.nil?
70
- value
71
- else
72
- value.values[0]
73
- end
74
- end
75
- end
76
-
77
- def test_null
78
- values = [
79
- {"0" => nil},
80
- ]
81
- target = build(:null, values)
82
- assert_equal(remove_field_names(values),
83
- target.values)
84
- end
85
-
86
- def test_boolean
87
- values = [
88
- {"0" => true},
89
- {"1" => nil},
90
- ]
91
- target = build(:boolean, values)
92
- assert_equal(remove_field_names(values),
93
- target.values)
94
- end
95
-
96
- def test_int8
97
- values = [
98
- {"0" => -(2 ** 7)},
99
- {"1" => nil},
100
- ]
101
- target = build(:int8, values)
102
- assert_equal(remove_field_names(values),
103
- target.values)
104
- end
105
-
106
- def test_uint8
107
- values = [
108
- {"0" => (2 ** 8) - 1},
109
- {"1" => nil},
110
- ]
111
- target = build(:uint8, values)
112
- assert_equal(remove_field_names(values),
113
- target.values)
114
- end
115
-
116
- def test_int16
117
- values = [
118
- {"0" => -(2 ** 15)},
119
- {"1" => nil},
120
- ]
121
- target = build(:int16, values)
122
- assert_equal(remove_field_names(values),
123
- target.values)
124
- end
125
-
126
- def test_uint16
127
- values = [
128
- {"0" => (2 ** 16) - 1},
129
- {"1" => nil},
130
- ]
131
- target = build(:uint16, values)
132
- assert_equal(remove_field_names(values),
133
- target.values)
134
- end
135
-
136
- def test_int32
137
- values = [
138
- {"0" => -(2 ** 31)},
139
- {"1" => nil},
140
- ]
141
- target = build(:int32, values)
142
- assert_equal(remove_field_names(values),
143
- target.values)
144
- end
145
-
146
- def test_uint32
147
- values = [
148
- {"0" => (2 ** 32) - 1},
149
- {"1" => nil},
150
- ]
151
- target = build(:uint32, values)
152
- assert_equal(remove_field_names(values),
153
- target.values)
154
- end
155
-
156
- def test_int64
157
- values = [
158
- {"0" => -(2 ** 63)},
159
- {"1" => nil},
160
- ]
161
- target = build(:int64, values)
162
- assert_equal(remove_field_names(values),
163
- target.values)
164
- end
165
-
166
- def test_uint64
167
- values = [
168
- {"0" => (2 ** 64) - 1},
169
- {"1" => nil},
170
- ]
171
- target = build(:uint64, values)
172
- assert_equal(remove_field_names(values),
173
- target.values)
174
- end
175
-
176
- def test_float
177
- values = [
178
- {"0" => -1.0},
179
- {"1" => nil},
180
- ]
181
- target = build(:float, values)
182
- assert_equal(remove_field_names(values),
183
- target.values)
184
- end
185
-
186
- def test_double
187
- values = [
188
- {"0" => -1.0},
189
- {"1" => nil},
190
- ]
191
- target = build(:double, values)
192
- assert_equal(remove_field_names(values),
193
- target.values)
194
- end
195
-
196
- def test_binary
197
- values = [
198
- {"0" => "\xff".b},
199
- {"1" => nil},
200
- ]
201
- target = build(:binary, values)
202
- assert_equal(remove_field_names(values),
203
- target.values)
204
- end
205
-
206
- def test_string
207
- values = [
208
- {"0" => "Ruby"},
209
- {"1" => nil},
210
- ]
211
- target = build(:string, values)
212
- assert_equal(remove_field_names(values),
213
- target.values)
214
- end
215
-
216
- def test_date32
217
- values = [
218
- {"0" => Date.new(1960, 1, 1)},
219
- {"1" => nil},
220
- ]
221
- target = build(:date32, values)
222
- assert_equal(remove_field_names(values),
223
- target.values)
224
- end
225
-
226
- def test_date64
227
- values = [
228
- {"0" => DateTime.new(1960, 1, 1, 2, 9, 30)},
229
- {"1" => nil},
230
- ]
231
- target = build(:date64, values)
232
- assert_equal(remove_field_names(values),
233
- target.values)
234
- end
235
-
236
- def test_timestamp_second
237
- values = [
238
- {"0" => Time.parse("1960-01-01T02:09:30Z")},
239
- {"1" => nil},
240
- ]
241
- target = build({
242
- type: :timestamp,
243
- unit: :second,
244
- },
245
- values)
246
- assert_equal(remove_field_names(values),
247
- target.values)
248
- end
249
-
250
- def test_timestamp_milli
251
- values = [
252
- {"0" => Time.parse("1960-01-01T02:09:30.123Z")},
253
- {"1" => nil},
254
- ]
255
- target = build({
256
- type: :timestamp,
257
- unit: :milli,
258
- },
259
- values)
260
- assert_equal(remove_field_names(values),
261
- target.values)
262
- end
263
-
264
- def test_timestamp_micro
265
- values = [
266
- {"0" => Time.parse("1960-01-01T02:09:30.123456Z")},
267
- {"1" => nil},
268
- ]
269
- target = build({
270
- type: :timestamp,
271
- unit: :micro,
272
- },
273
- values)
274
- assert_equal(remove_field_names(values),
275
- target.values)
276
- end
277
-
278
- def test_timestamp_nano
279
- values = [
280
- {"0" => Time.parse("1960-01-01T02:09:30.123456789Z")},
281
- {"1" => nil},
282
- ]
283
- target = build({
284
- type: :timestamp,
285
- unit: :nano,
286
- },
287
- values)
288
- assert_equal(remove_field_names(values),
289
- target.values)
290
- end
291
-
292
- def test_time32_second
293
- unit = Arrow::TimeUnit::SECOND
294
- values = [
295
- # 00:10:00
296
- {"0" => Arrow::Time.new(unit, 60 * 10)},
297
- {"1" => nil},
298
- ]
299
- target = build({
300
- type: :time32,
301
- unit: :second,
302
- },
303
- values)
304
- assert_equal(remove_field_names(values),
305
- target.values)
306
- end
307
-
308
- def test_time32_milli
309
- unit = Arrow::TimeUnit::MILLI
310
- values = [
311
- # 00:10:00.123
312
- {"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)},
313
- {"1" => nil},
314
- ]
315
- target = build({
316
- type: :time32,
317
- unit: :milli,
318
- },
319
- values)
320
- assert_equal(remove_field_names(values),
321
- target.values)
322
- end
323
-
324
- def test_time64_micro
325
- unit = Arrow::TimeUnit::MICRO
326
- values = [
327
- # 00:10:00.123456
328
- {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)},
329
- {"1" => nil},
330
- ]
331
- target = build({
332
- type: :time64,
333
- unit: :micro,
334
- },
335
- values)
336
- assert_equal(remove_field_names(values),
337
- target.values)
338
- end
339
-
340
- def test_time64_nano
341
- unit = Arrow::TimeUnit::NANO
342
- values = [
343
- # 00:10:00.123456789
344
- {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)},
345
- {"1" => nil},
346
- ]
347
- target = build({
348
- type: :time64,
349
- unit: :nano,
350
- },
351
- values)
352
- assert_equal(remove_field_names(values),
353
- target.values)
354
- end
355
-
356
- def test_decimal128
357
- values = [
358
- {"0" => BigDecimal("92.92")},
359
- {"1" => nil},
360
- ]
361
- target = build({
362
- type: :decimal128,
363
- precision: 8,
364
- scale: 2,
365
- },
366
- values)
367
- assert_equal(remove_field_names(values),
368
- target.values)
369
- end
370
-
371
- def test_decimal256
372
- values = [
373
- {"0" => BigDecimal("92.92")},
374
- {"1" => nil},
375
- ]
376
- target = build({
377
- type: :decimal256,
378
- precision: 38,
379
- scale: 2,
380
- },
381
- values)
382
- assert_equal(remove_field_names(values),
383
- target.values)
384
- end
385
-
386
- def test_month_interval
387
- values = [
388
- {"0" => 1},
389
- {"1" => nil},
390
- ]
391
- target = build(:month_interval, values)
392
- assert_equal(remove_field_names(values),
393
- target.values)
394
- end
395
-
396
- def test_day_time_interval
397
- values = [
398
- {"0" => {day: 1, millisecond: 100}},
399
- {"1" => nil},
400
- ]
401
- target = build(:day_time_interval, values)
402
- assert_equal(remove_field_names(values),
403
- target.values)
404
- end
405
-
406
- def test_month_day_nano_interval
407
- values = [
408
- {"0" => {month: 1, day: 1, nanosecond: 100}},
409
- {"1" => nil},
410
- ]
411
- target = build(:month_day_nano_interval, values)
412
- assert_equal(remove_field_names(values),
413
- target.values)
414
- end
415
-
416
- def test_list
417
- values = [
418
- {"0" => [true, nil, false]},
419
- {"1" => nil},
420
- ]
421
- target = build({
422
- type: :list,
423
- field: {
424
- name: :sub_element,
425
- type: :boolean,
426
- },
427
- },
428
- values)
429
- assert_equal(remove_field_names(values),
430
- target.values)
431
- end
432
-
433
- def test_struct
434
- values = [
435
- {"0" => {"sub_field" => true}},
436
- {"1" => nil},
437
- {"0" => {"sub_field" => nil}},
438
- ]
439
- target = build({
440
- type: :struct,
441
- fields: [
442
- {
443
- name: :sub_field,
444
- type: :boolean,
445
- },
446
- ],
447
- },
448
- values)
449
- assert_equal(remove_field_names(values),
450
- target.values)
451
- end
452
-
453
- def test_map
454
- values = [
455
- {"0" => {"key1" => true, "key2" => nil}},
456
- {"1" => nil},
457
- ]
458
- target = build({
459
- type: :map,
460
- key: :string,
461
- item: :boolean,
462
- },
463
- values)
464
- assert_equal(remove_field_names(values),
465
- target.values)
466
- end
467
-
468
- def test_sparse_union
469
- values = [
470
- {"0" => {"field1" => true}},
471
- {"1" => nil},
472
- {"0" => {"field2" => 29}},
473
- {"0" => {"field2" => nil}},
474
- ]
475
- target = build({
476
- type: :sparse_union,
477
- fields: [
478
- {
479
- name: :field1,
480
- type: :boolean,
481
- },
482
- {
483
- name: :field2,
484
- type: :uint8,
485
- },
486
- ],
487
- type_codes: [0, 1],
488
- },
489
- values)
490
- assert_equal(remove_field_names(remove_field_names(values)),
491
- target.values)
492
- end
493
-
494
- def test_dense_union
495
- values = [
496
- {"0" => {"field1" => true}},
497
- {"1" => nil},
498
- {"0" => {"field2" => 29}},
499
- {"0" => {"field2" => nil}},
500
- ]
501
- target = build({
502
- type: :dense_union,
503
- fields: [
504
- {
505
- name: :field1,
506
- type: :boolean,
507
- },
508
- {
509
- name: :field2,
510
- type: :uint8,
511
- },
512
- ],
513
- type_codes: [0, 1],
514
- },
515
- values)
516
- assert_equal(remove_field_names(remove_field_names(values)),
517
- target.values)
518
- end
519
-
520
- def test_dictionary
521
- values = [
522
- {"0" => "Ruby"},
523
- {"1" => nil},
524
- {"0" => "GLib"},
525
- ]
526
- target = build({
527
- type: :dictionary,
528
- index_data_type: :int8,
529
- value_data_type: :string,
530
- ordered: false,
531
- },
532
- values)
533
- assert_equal(remove_field_names(values),
534
- target.values)
535
- end
536
- end
537
-
538
- class ValuesArrayDenseUnionArrayTest < Test::Unit::TestCase
539
- include ValuesDenseUnionArrayTests
540
-
541
- def build(type, values)
542
- build_array(type, values)
543
- end
544
- end
545
-
546
- class ValuesChunkedArrayDenseUnionArrayTest < Test::Unit::TestCase
547
- include ValuesDenseUnionArrayTests
548
-
549
- def build(type, values)
550
- Arrow::ChunkedArray.new([build_array(type, values)])
551
- end
552
- end