red-arrow 18.1.0 → 19.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
@@ -1,556 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module RawRecordsSparseUnionArrayTests
19
- def build_schema(type, type_codes)
20
- field_description = {}
21
- if type.is_a?(Hash)
22
- field_description = field_description.merge(type)
23
- else
24
- field_description[:type] = type
25
- end
26
- {
27
- column: {
28
- type: :sparse_union,
29
- fields: [
30
- field_description.merge(name: "0"),
31
- field_description.merge(name: "1"),
32
- ],
33
- type_codes: type_codes,
34
- },
35
- }
36
- end
37
-
38
- # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records)
39
- def build_record_batch(type, records)
40
- type_codes = [0, 1]
41
- schema = Arrow::Schema.new(build_schema(type, type_codes))
42
- type_ids = []
43
- arrays = schema.fields[0].data_type.fields.collect do |field|
44
- sub_schema = Arrow::Schema.new([field])
45
- sub_records = records.collect do |record|
46
- [record[0].nil? ? nil : record[0][field.name]]
47
- end
48
- sub_record_batch = Arrow::RecordBatch.new(sub_schema,
49
- sub_records)
50
- sub_record_batch.columns[0].data
51
- end
52
- records.each do |record|
53
- column = record[0]
54
- if column.key?("0")
55
- type_ids << type_codes[0]
56
- elsif column.key?("1")
57
- type_ids << type_codes[1]
58
- end
59
- end
60
- union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
61
- Arrow::Int8Array.new(type_ids),
62
- arrays)
63
- schema = Arrow::Schema.new(column: union_array.value_data_type)
64
- Arrow::RecordBatch.new(schema,
65
- records.size,
66
- [union_array])
67
- end
68
-
69
- def remove_field_names(records)
70
- records.collect do |record|
71
- record.collect do |column|
72
- if column.nil?
73
- column
74
- else
75
- column.values[0]
76
- end
77
- end
78
- end
79
- end
80
-
81
- def test_null
82
- records = [
83
- [{"0" => nil}],
84
- ]
85
- target = build(:null, records)
86
- assert_equal(remove_field_names(records),
87
- target.raw_records)
88
- end
89
-
90
- def test_boolean
91
- records = [
92
- [{"0" => true}],
93
- [{"1" => nil}],
94
- ]
95
- target = build(:boolean, records)
96
- assert_equal(remove_field_names(records),
97
- target.raw_records)
98
- end
99
-
100
- def test_int8
101
- records = [
102
- [{"0" => -(2 ** 7)}],
103
- [{"1" => nil}],
104
- ]
105
- target = build(:int8, records)
106
- assert_equal(remove_field_names(records),
107
- target.raw_records)
108
- end
109
-
110
- def test_uint8
111
- records = [
112
- [{"0" => (2 ** 8) - 1}],
113
- [{"1" => nil}],
114
- ]
115
- target = build(:uint8, records)
116
- assert_equal(remove_field_names(records),
117
- target.raw_records)
118
- end
119
-
120
- def test_int16
121
- records = [
122
- [{"0" => -(2 ** 15)}],
123
- [{"1" => nil}],
124
- ]
125
- target = build(:int16, records)
126
- assert_equal(remove_field_names(records),
127
- target.raw_records)
128
- end
129
-
130
- def test_uint16
131
- records = [
132
- [{"0" => (2 ** 16) - 1}],
133
- [{"1" => nil}],
134
- ]
135
- target = build(:uint16, records)
136
- assert_equal(remove_field_names(records),
137
- target.raw_records)
138
- end
139
-
140
- def test_int32
141
- records = [
142
- [{"0" => -(2 ** 31)}],
143
- [{"1" => nil}],
144
- ]
145
- target = build(:int32, records)
146
- assert_equal(remove_field_names(records),
147
- target.raw_records)
148
- end
149
-
150
- def test_uint32
151
- records = [
152
- [{"0" => (2 ** 32) - 1}],
153
- [{"1" => nil}],
154
- ]
155
- target = build(:uint32, records)
156
- assert_equal(remove_field_names(records),
157
- target.raw_records)
158
- end
159
-
160
- def test_int64
161
- records = [
162
- [{"0" => -(2 ** 63)}],
163
- [{"1" => nil}],
164
- ]
165
- target = build(:int64, records)
166
- assert_equal(remove_field_names(records),
167
- target.raw_records)
168
- end
169
-
170
- def test_uint64
171
- records = [
172
- [{"0" => (2 ** 64) - 1}],
173
- [{"1" => nil}],
174
- ]
175
- target = build(:uint64, records)
176
- assert_equal(remove_field_names(records),
177
- target.raw_records)
178
- end
179
-
180
- def test_float
181
- records = [
182
- [{"0" => -1.0}],
183
- [{"1" => nil}],
184
- ]
185
- target = build(:float, records)
186
- assert_equal(remove_field_names(records),
187
- target.raw_records)
188
- end
189
-
190
- def test_double
191
- records = [
192
- [{"0" => -1.0}],
193
- [{"1" => nil}],
194
- ]
195
- target = build(:double, records)
196
- assert_equal(remove_field_names(records),
197
- target.raw_records)
198
- end
199
-
200
- def test_binary
201
- records = [
202
- [{"0" => "\xff".b}],
203
- [{"1" => nil}],
204
- ]
205
- target = build(:binary, records)
206
- assert_equal(remove_field_names(records),
207
- target.raw_records)
208
- end
209
-
210
- def test_string
211
- records = [
212
- [{"0" => "Ruby"}],
213
- [{"1" => nil}],
214
- ]
215
- target = build(:string, records)
216
- assert_equal(remove_field_names(records),
217
- target.raw_records)
218
- end
219
-
220
- def test_date32
221
- records = [
222
- [{"0" => Date.new(1960, 1, 1)}],
223
- [{"1" => nil}],
224
- ]
225
- target = build(:date32, records)
226
- assert_equal(remove_field_names(records),
227
- target.raw_records)
228
- end
229
-
230
- def test_date64
231
- records = [
232
- [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
233
- [{"1" => nil}],
234
- ]
235
- target = build(:date64, records)
236
- assert_equal(remove_field_names(records),
237
- target.raw_records)
238
- end
239
-
240
- def test_timestamp_second
241
- records = [
242
- [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
243
- [{"1" => nil}],
244
- ]
245
- target = build({
246
- type: :timestamp,
247
- unit: :second,
248
- },
249
- records)
250
- assert_equal(remove_field_names(records),
251
- target.raw_records)
252
- end
253
-
254
- def test_timestamp_milli
255
- records = [
256
- [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
257
- [{"1" => nil}],
258
- ]
259
- target = build({
260
- type: :timestamp,
261
- unit: :milli,
262
- },
263
- records)
264
- assert_equal(remove_field_names(records),
265
- target.raw_records)
266
- end
267
-
268
- def test_timestamp_micro
269
- records = [
270
- [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
271
- [{"1" => nil}],
272
- ]
273
- target = build({
274
- type: :timestamp,
275
- unit: :micro,
276
- },
277
- records)
278
- assert_equal(remove_field_names(records),
279
- target.raw_records)
280
- end
281
-
282
- def test_timestamp_nano
283
- records = [
284
- [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
285
- [{"1" => nil}],
286
- ]
287
- target = build({
288
- type: :timestamp,
289
- unit: :nano,
290
- },
291
- records)
292
- assert_equal(remove_field_names(records),
293
- target.raw_records)
294
- end
295
-
296
- def test_time32_second
297
- unit = Arrow::TimeUnit::SECOND
298
- records = [
299
- # 00:10:00
300
- [{"0" => Arrow::Time.new(unit, 60 * 10)}],
301
- [{"1" => nil}],
302
- ]
303
- target = build({
304
- type: :time32,
305
- unit: :second,
306
- },
307
- records)
308
- assert_equal(remove_field_names(records),
309
- target.raw_records)
310
- end
311
-
312
- def test_time32_milli
313
- unit = Arrow::TimeUnit::MILLI
314
- records = [
315
- # 00:10:00.123
316
- [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
317
- [{"1" => nil}],
318
- ]
319
- target = build({
320
- type: :time32,
321
- unit: :milli,
322
- },
323
- records)
324
- assert_equal(remove_field_names(records),
325
- target.raw_records)
326
- end
327
-
328
- def test_time64_micro
329
- unit = Arrow::TimeUnit::MICRO
330
- records = [
331
- # 00:10:00.123456
332
- [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
333
- [{"1" => nil}],
334
- ]
335
- target = build({
336
- type: :time64,
337
- unit: :micro,
338
- },
339
- records)
340
- assert_equal(remove_field_names(records),
341
- target.raw_records)
342
- end
343
-
344
- def test_time64_nano
345
- unit = Arrow::TimeUnit::NANO
346
- records = [
347
- # 00:10:00.123456789
348
- [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
349
- [{"1" => nil}],
350
- ]
351
- target = build({
352
- type: :time64,
353
- unit: :nano,
354
- },
355
- records)
356
- assert_equal(remove_field_names(records),
357
- target.raw_records)
358
- end
359
-
360
- def test_decimal128
361
- records = [
362
- [{"0" => BigDecimal("92.92")}],
363
- [{"1" => nil}],
364
- ]
365
- target = build({
366
- type: :decimal128,
367
- precision: 8,
368
- scale: 2,
369
- },
370
- records)
371
- assert_equal(remove_field_names(records),
372
- target.raw_records)
373
- end
374
-
375
- def test_decimal256
376
- records = [
377
- [{"0" => BigDecimal("92.92")}],
378
- [{"1" => nil}],
379
- ]
380
- target = build({
381
- type: :decimal256,
382
- precision: 38,
383
- scale: 2,
384
- },
385
- records)
386
- assert_equal(remove_field_names(records),
387
- target.raw_records)
388
- end
389
-
390
- def test_month_interval
391
- records = [
392
- [{"0" => 1}],
393
- [{"1" => nil}],
394
- ]
395
- target = build(:month_interval, records)
396
- assert_equal(remove_field_names(records),
397
- target.raw_records)
398
- end
399
-
400
- def test_day_time_interval
401
- records = [
402
- [{"0" => {day: 1, millisecond: 100}}],
403
- [{"1" => nil}],
404
- ]
405
- target = build(:day_time_interval, records)
406
- assert_equal(remove_field_names(records),
407
- target.raw_records)
408
- end
409
-
410
- def test_month_day_nano_interval
411
- records = [
412
- [{"0" => {month: 1, day: 1, nanosecond: 100}}],
413
- [{"1" => nil}],
414
- ]
415
- target = build(:month_day_nano_interval, records)
416
- assert_equal(remove_field_names(records),
417
- target.raw_records)
418
- end
419
-
420
- def test_list
421
- records = [
422
- [{"0" => [true, nil, false]}],
423
- [{"1" => nil}],
424
- ]
425
- target = build({
426
- type: :list,
427
- field: {
428
- name: :sub_element,
429
- type: :boolean,
430
- },
431
- },
432
- records)
433
- assert_equal(remove_field_names(records),
434
- target.raw_records)
435
- end
436
-
437
- def test_struct
438
- records = [
439
- [{"0" => {"sub_field" => true}}],
440
- [{"1" => nil}],
441
- [{"0" => {"sub_field" => nil}}],
442
- ]
443
- target = build({
444
- type: :struct,
445
- fields: [
446
- {
447
- name: :sub_field,
448
- type: :boolean,
449
- },
450
- ],
451
- },
452
- records)
453
- assert_equal(remove_field_names(records),
454
- target.raw_records)
455
- end
456
-
457
- def test_map
458
- records = [
459
- [{"0" => {"key1" => true, "key2" => nil}}],
460
- [{"1" => nil}],
461
- ]
462
- target = build({
463
- type: :map,
464
- key: :string,
465
- item: :boolean,
466
- },
467
- records)
468
- assert_equal(remove_field_names(records),
469
- target.raw_records)
470
- end
471
-
472
- def test_sparse_union
473
- records = [
474
- [{"0" => {"field1" => true}}],
475
- [{"1" => nil}],
476
- [{"0" => {"field2" => 29}}],
477
- [{"0" => {"field2" => nil}}],
478
- ]
479
- target = build({
480
- type: :sparse_union,
481
- fields: [
482
- {
483
- name: :field1,
484
- type: :boolean,
485
- },
486
- {
487
- name: :field2,
488
- type: :uint8,
489
- },
490
- ],
491
- type_codes: [0, 1],
492
- },
493
- records)
494
- assert_equal(remove_field_names(remove_field_names(records)),
495
- target.raw_records)
496
- end
497
-
498
- def test_dense_union
499
- records = [
500
- [{"0" => {"field1" => true}}],
501
- [{"1" => nil}],
502
- [{"0" => {"field2" => 29}}],
503
- [{"0" => {"field2" => nil}}],
504
- ]
505
- target = build({
506
- type: :dense_union,
507
- fields: [
508
- {
509
- name: :field1,
510
- type: :boolean,
511
- },
512
- {
513
- name: :field2,
514
- type: :uint8,
515
- },
516
- ],
517
- type_codes: [0, 1],
518
- },
519
- records)
520
- assert_equal(remove_field_names(remove_field_names(records)),
521
- target.raw_records)
522
- end
523
-
524
- def test_dictionary
525
- records = [
526
- [{"0" => "Ruby"}],
527
- [{"1" => nil}],
528
- [{"0" => "GLib"}],
529
- ]
530
- target = build({
531
- type: :dictionary,
532
- index_data_type: :int8,
533
- value_data_type: :string,
534
- ordered: false,
535
- },
536
- records)
537
- assert_equal(remove_field_names(records),
538
- target.raw_records)
539
- end
540
- end
541
-
542
- class RawRecordsRecordBatchSparseUnionArrayTest < Test::Unit::TestCase
543
- include RawRecordsSparseUnionArrayTests
544
-
545
- def build(type, records)
546
- build_record_batch(type, records)
547
- end
548
- end
549
-
550
- class RawRecordsTableSparseUnionArrayTest < Test::Unit::TestCase
551
- include RawRecordsSparseUnionArrayTests
552
-
553
- def build(type, records)
554
- build_record_batch(type, records).to_table
555
- end
556
- end