red-arrow 18.1.0 → 19.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
@@ -1,566 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module EachRawRecordDenseUnionArrayTests
19
- def build_schema(type, type_codes)
20
- field_description = {}
21
- if type.is_a?(Hash)
22
- field_description = field_description.merge(type)
23
- else
24
- field_description[:type] = type
25
- end
26
- {
27
- column: {
28
- type: :dense_union,
29
- fields: [
30
- field_description.merge(name: "0"),
31
- field_description.merge(name: "1"),
32
- ],
33
- type_codes: type_codes,
34
- },
35
- }
36
- end
37
-
38
- # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records)
39
- def build_record_batch(type, records)
40
- type_codes = [0, 1]
41
- schema = Arrow::Schema.new(build_schema(type, type_codes))
42
- type_ids = []
43
- offsets = []
44
- arrays = schema.fields[0].data_type.fields.collect do |field|
45
- sub_schema = Arrow::Schema.new([field])
46
- sub_records = []
47
- records.each do |record|
48
- column = record[0]
49
- next if column.nil?
50
- next unless column.key?(field.name)
51
- sub_records << [column[field.name]]
52
- end
53
- sub_record_batch = Arrow::RecordBatch.new(sub_schema,
54
- sub_records)
55
- sub_record_batch.columns[0].data
56
- end
57
- records.each do |record|
58
- column = record[0]
59
- if column.key?("0")
60
- type_id = type_codes[0]
61
- type_ids << type_id
62
- offsets << (type_ids.count(type_id) - 1)
63
- elsif column.key?("1")
64
- type_id = type_codes[1]
65
- type_ids << type_id
66
- offsets << (type_ids.count(type_id) - 1)
67
- end
68
- end
69
- union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
70
- Arrow::Int8Array.new(type_ids),
71
- Arrow::Int32Array.new(offsets),
72
- arrays)
73
- schema = Arrow::Schema.new(column: union_array.value_data_type)
74
- Arrow::RecordBatch.new(schema,
75
- records.size,
76
- [union_array])
77
- end
78
-
79
- def remove_field_names(records)
80
- records.collect do |record|
81
- record.collect do |column|
82
- if column.nil?
83
- column
84
- else
85
- column.values[0]
86
- end
87
- end
88
- end
89
- end
90
-
91
- def test_null
92
- records = [
93
- [{"0" => nil}],
94
- ]
95
- target = build(:null, records)
96
- assert_equal(remove_field_names(records),
97
- target.each_raw_record.to_a)
98
- end
99
-
100
- def test_boolean
101
- records = [
102
- [{"0" => true}],
103
- [{"1" => nil}],
104
- ]
105
- target = build(:boolean, records)
106
- assert_equal(remove_field_names(records),
107
- target.each_raw_record.to_a)
108
- end
109
-
110
- def test_int8
111
- records = [
112
- [{"0" => -(2 ** 7)}],
113
- [{"1" => nil}],
114
- ]
115
- target = build(:int8, records)
116
- assert_equal(remove_field_names(records),
117
- target.each_raw_record.to_a)
118
- end
119
-
120
- def test_uint8
121
- records = [
122
- [{"0" => (2 ** 8) - 1}],
123
- [{"1" => nil}],
124
- ]
125
- target = build(:uint8, records)
126
- assert_equal(remove_field_names(records),
127
- target.each_raw_record.to_a)
128
- end
129
-
130
- def test_int16
131
- records = [
132
- [{"0" => -(2 ** 15)}],
133
- [{"1" => nil}],
134
- ]
135
- target = build(:int16, records)
136
- assert_equal(remove_field_names(records),
137
- target.each_raw_record.to_a)
138
- end
139
-
140
- def test_uint16
141
- records = [
142
- [{"0" => (2 ** 16) - 1}],
143
- [{"1" => nil}],
144
- ]
145
- target = build(:uint16, records)
146
- assert_equal(remove_field_names(records),
147
- target.each_raw_record.to_a)
148
- end
149
-
150
- def test_int32
151
- records = [
152
- [{"0" => -(2 ** 31)}],
153
- [{"1" => nil}],
154
- ]
155
- target = build(:int32, records)
156
- assert_equal(remove_field_names(records),
157
- target.each_raw_record.to_a)
158
- end
159
-
160
- def test_uint32
161
- records = [
162
- [{"0" => (2 ** 32) - 1}],
163
- [{"1" => nil}],
164
- ]
165
- target = build(:uint32, records)
166
- assert_equal(remove_field_names(records),
167
- target.each_raw_record.to_a)
168
- end
169
-
170
- def test_int64
171
- records = [
172
- [{"0" => -(2 ** 63)}],
173
- [{"1" => nil}],
174
- ]
175
- target = build(:int64, records)
176
- assert_equal(remove_field_names(records),
177
- target.each_raw_record.to_a)
178
- end
179
-
180
- def test_uint64
181
- records = [
182
- [{"0" => (2 ** 64) - 1}],
183
- [{"1" => nil}],
184
- ]
185
- target = build(:uint64, records)
186
- assert_equal(remove_field_names(records),
187
- target.each_raw_record.to_a)
188
- end
189
-
190
- def test_float
191
- records = [
192
- [{"0" => -1.0}],
193
- [{"1" => nil}],
194
- ]
195
- target = build(:float, records)
196
- assert_equal(remove_field_names(records),
197
- target.each_raw_record.to_a)
198
- end
199
-
200
- def test_double
201
- records = [
202
- [{"0" => -1.0}],
203
- [{"1" => nil}],
204
- ]
205
- target = build(:double, records)
206
- assert_equal(remove_field_names(records),
207
- target.each_raw_record.to_a)
208
- end
209
-
210
- def test_binary
211
- records = [
212
- [{"0" => "\xff".b}],
213
- [{"1" => nil}],
214
- ]
215
- target = build(:binary, records)
216
- assert_equal(remove_field_names(records),
217
- target.each_raw_record.to_a)
218
- end
219
-
220
- def test_string
221
- records = [
222
- [{"0" => "Ruby"}],
223
- [{"1" => nil}],
224
- ]
225
- target = build(:string, records)
226
- assert_equal(remove_field_names(records),
227
- target.each_raw_record.to_a)
228
- end
229
-
230
- def test_date32
231
- records = [
232
- [{"0" => Date.new(1960, 1, 1)}],
233
- [{"1" => nil}],
234
- ]
235
- target = build(:date32, records)
236
- assert_equal(remove_field_names(records),
237
- target.each_raw_record.to_a)
238
- end
239
-
240
- def test_date64
241
- records = [
242
- [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
243
- [{"1" => nil}],
244
- ]
245
- target = build(:date64, records)
246
- assert_equal(remove_field_names(records),
247
- target.each_raw_record.to_a)
248
- end
249
-
250
- def test_timestamp_second
251
- records = [
252
- [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
253
- [{"1" => nil}],
254
- ]
255
- target = build({
256
- type: :timestamp,
257
- unit: :second,
258
- },
259
- records)
260
- assert_equal(remove_field_names(records),
261
- target.each_raw_record.to_a)
262
- end
263
-
264
- def test_timestamp_milli
265
- records = [
266
- [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
267
- [{"1" => nil}],
268
- ]
269
- target = build({
270
- type: :timestamp,
271
- unit: :milli,
272
- },
273
- records)
274
- assert_equal(remove_field_names(records),
275
- target.each_raw_record.to_a)
276
- end
277
-
278
- def test_timestamp_micro
279
- records = [
280
- [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
281
- [{"1" => nil}],
282
- ]
283
- target = build({
284
- type: :timestamp,
285
- unit: :micro,
286
- },
287
- records)
288
- assert_equal(remove_field_names(records),
289
- target.each_raw_record.to_a)
290
- end
291
-
292
- def test_timestamp_nano
293
- records = [
294
- [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
295
- [{"1" => nil}],
296
- ]
297
- target = build({
298
- type: :timestamp,
299
- unit: :nano,
300
- },
301
- records)
302
- assert_equal(remove_field_names(records),
303
- target.each_raw_record.to_a)
304
- end
305
-
306
- def test_time32_second
307
- unit = Arrow::TimeUnit::SECOND
308
- records = [
309
- # 00:10:00
310
- [{"0" => Arrow::Time.new(unit, 60 * 10)}],
311
- [{"1" => nil}],
312
- ]
313
- target = build({
314
- type: :time32,
315
- unit: :second,
316
- },
317
- records)
318
- assert_equal(remove_field_names(records),
319
- target.each_raw_record.to_a)
320
- end
321
-
322
- def test_time32_milli
323
- unit = Arrow::TimeUnit::MILLI
324
- records = [
325
- # 00:10:00.123
326
- [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
327
- [{"1" => nil}],
328
- ]
329
- target = build({
330
- type: :time32,
331
- unit: :milli,
332
- },
333
- records)
334
- assert_equal(remove_field_names(records),
335
- target.each_raw_record.to_a)
336
- end
337
-
338
- def test_time64_micro
339
- unit = Arrow::TimeUnit::MICRO
340
- records = [
341
- # 00:10:00.123456
342
- [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
343
- [{"1" => nil}],
344
- ]
345
- target = build({
346
- type: :time64,
347
- unit: :micro,
348
- },
349
- records)
350
- assert_equal(remove_field_names(records),
351
- target.each_raw_record.to_a)
352
- end
353
-
354
- def test_time64_nano
355
- unit = Arrow::TimeUnit::NANO
356
- records = [
357
- # 00:10:00.123456789
358
- [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
359
- [{"1" => nil}],
360
- ]
361
- target = build({
362
- type: :time64,
363
- unit: :nano,
364
- },
365
- records)
366
- assert_equal(remove_field_names(records),
367
- target.each_raw_record.to_a)
368
- end
369
-
370
- def test_decimal128
371
- records = [
372
- [{"0" => BigDecimal("92.92")}],
373
- [{"1" => nil}],
374
- ]
375
- target = build({
376
- type: :decimal128,
377
- precision: 8,
378
- scale: 2,
379
- },
380
- records)
381
- assert_equal(remove_field_names(records),
382
- target.each_raw_record.to_a)
383
- end
384
-
385
- def test_decimal256
386
- records = [
387
- [{"0" => BigDecimal("92.92")}],
388
- [{"1" => nil}],
389
- ]
390
- target = build({
391
- type: :decimal256,
392
- precision: 38,
393
- scale: 2,
394
- },
395
- records)
396
- assert_equal(remove_field_names(records),
397
- target.each_raw_record.to_a)
398
- end
399
-
400
- def test_month_interval
401
- records = [
402
- [{"0" => 1}],
403
- [{"1" => nil}],
404
- ]
405
- target = build(:month_interval, records)
406
- assert_equal(remove_field_names(records),
407
- target.each_raw_record.to_a)
408
- end
409
-
410
- def test_day_time_interval
411
- records = [
412
- [{"0" => {day: 1, millisecond: 100}}],
413
- [{"1" => nil}],
414
- ]
415
- target = build(:day_time_interval, records)
416
- assert_equal(remove_field_names(records),
417
- target.each_raw_record.to_a)
418
- end
419
-
420
- def test_month_day_nano_interval
421
- records = [
422
- [{"0" => {month: 1, day: 1, nanosecond: 100}}],
423
- [{"1" => nil}],
424
- ]
425
- target = build(:month_day_nano_interval, records)
426
- assert_equal(remove_field_names(records),
427
- target.each_raw_record.to_a)
428
- end
429
-
430
- def test_list
431
- records = [
432
- [{"0" => [true, nil, false]}],
433
- [{"1" => nil}],
434
- ]
435
- target = build({
436
- type: :list,
437
- field: {
438
- name: :sub_element,
439
- type: :boolean,
440
- },
441
- },
442
- records)
443
- assert_equal(remove_field_names(records),
444
- target.each_raw_record.to_a)
445
- end
446
-
447
- def test_struct
448
- records = [
449
- [{"0" => {"sub_field" => true}}],
450
- [{"1" => nil}],
451
- [{"0" => {"sub_field" => nil}}],
452
- ]
453
- target = build({
454
- type: :struct,
455
- fields: [
456
- {
457
- name: :sub_field,
458
- type: :boolean,
459
- },
460
- ],
461
- },
462
- records)
463
- assert_equal(remove_field_names(records),
464
- target.each_raw_record.to_a)
465
- end
466
-
467
- def test_map
468
- records = [
469
- [{"0" => {"key1" => true, "key2" => nil}}],
470
- [{"1" => nil}],
471
- ]
472
- target = build({
473
- type: :map,
474
- key: :string,
475
- item: :boolean,
476
- },
477
- records)
478
- assert_equal(remove_field_names(records),
479
- target.each_raw_record.to_a)
480
- end
481
-
482
- def test_sparse_union
483
- records = [
484
- [{"0" => {"field1" => true}}],
485
- [{"1" => nil}],
486
- [{"0" => {"field2" => 29}}],
487
- [{"0" => {"field2" => nil}}],
488
- ]
489
- target = build({
490
- type: :sparse_union,
491
- fields: [
492
- {
493
- name: :field1,
494
- type: :boolean,
495
- },
496
- {
497
- name: :field2,
498
- type: :uint8,
499
- },
500
- ],
501
- type_codes: [0, 1],
502
- },
503
- records)
504
- assert_equal(remove_field_names(remove_field_names(records)),
505
- target.each_raw_record.to_a)
506
- end
507
-
508
- def test_dense_union
509
- records = [
510
- [{"0" => {"field1" => true}}],
511
- [{"1" => nil}],
512
- [{"0" => {"field2" => 29}}],
513
- [{"0" => {"field2" => nil}}],
514
- ]
515
- target = build({
516
- type: :dense_union,
517
- fields: [
518
- {
519
- name: :field1,
520
- type: :boolean,
521
- },
522
- {
523
- name: :field2,
524
- type: :uint8,
525
- },
526
- ],
527
- type_codes: [0, 1],
528
- },
529
- records)
530
- assert_equal(remove_field_names(remove_field_names(records)),
531
- target.each_raw_record.to_a)
532
- end
533
-
534
- def test_dictionary
535
- records = [
536
- [{"0" => "Ruby"}],
537
- [{"1" => nil}],
538
- [{"0" => "GLib"}],
539
- ]
540
- target = build({
541
- type: :dictionary,
542
- index_data_type: :int8,
543
- value_data_type: :string,
544
- ordered: false,
545
- },
546
- records)
547
- assert_equal(remove_field_names(records),
548
- target.each_raw_record.to_a)
549
- end
550
- end
551
-
552
- class EachRawRecordRecordBatchDenseUnionArrayTest < Test::Unit::TestCase
553
- include EachRawRecordDenseUnionArrayTests
554
-
555
- def build(type, records)
556
- build_record_batch(type, records)
557
- end
558
- end
559
-
560
- class EachRawRecordTableDenseUnionArrayTest < Test::Unit::TestCase
561
- include EachRawRecordDenseUnionArrayTests
562
-
563
- def build(type, records)
564
- build_record_batch(type, records).to_table
565
- end
566
- end