red-arrow 18.1.0 → 19.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
@@ -1,507 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module RawRecordsMapArrayTests
19
- def build_schema(type)
20
- {
21
- column: {
22
- type: :map,
23
- key: :string,
24
- item: type
25
- },
26
- }
27
- end
28
-
29
- def test_null
30
- records = [
31
- [{"key1" => nil}],
32
- [nil],
33
- ]
34
- target = build(:null, records)
35
- assert_equal(records, target.raw_records)
36
- end
37
-
38
- def test_boolean
39
- records = [
40
- [{"key1" => true, "key2" => nil}],
41
- [nil],
42
- ]
43
- target = build(:boolean, records)
44
- assert_equal(records, target.raw_records)
45
- end
46
-
47
- def test_int8
48
- records = [
49
- [{"key1" => -(2 ** 7), "key2" => nil}],
50
- [nil],
51
- ]
52
- target = build(:int8, records)
53
- assert_equal(records, target.raw_records)
54
- end
55
-
56
- def test_uint8
57
- records = [
58
- [{"key1" => (2 ** 8) - 1, "key2" => nil}],
59
- [nil],
60
- ]
61
- target = build(:uint8, records)
62
- assert_equal(records, target.raw_records)
63
- end
64
-
65
- def test_int16
66
- records = [
67
- [{"key1" => -(2 ** 15), "key2" => nil}],
68
- [nil],
69
- ]
70
- target = build(:int16, records)
71
- assert_equal(records, target.raw_records)
72
- end
73
-
74
- def test_uint16
75
- records = [
76
- [{"key1" => (2 ** 16) - 1, "key2" => nil}],
77
- [nil],
78
- ]
79
- target = build(:uint16, records)
80
- assert_equal(records, target.raw_records)
81
- end
82
-
83
- def test_int32
84
- records = [
85
- [{"key1" => -(2 ** 31), "key2" => nil}],
86
- [nil],
87
- ]
88
- target = build(:int32, records)
89
- assert_equal(records, target.raw_records)
90
- end
91
-
92
- def test_uint32
93
- records = [
94
- [{"key1" => (2 ** 32) - 1, "key2" => nil}],
95
- [nil],
96
- ]
97
- target = build(:uint32, records)
98
- assert_equal(records, target.raw_records)
99
- end
100
-
101
- def test_int64
102
- records = [
103
- [{"key1" => -(2 ** 63), "key2" => nil}],
104
- [nil],
105
- ]
106
- target = build(:int64, records)
107
- assert_equal(records, target.raw_records)
108
- end
109
-
110
- def test_uint64
111
- records = [
112
- [{"key1" => (2 ** 64) - 1, "key2" => nil}],
113
- [nil],
114
- ]
115
- target = build(:uint64, records)
116
- assert_equal(records, target.raw_records)
117
- end
118
-
119
- def test_float
120
- records = [
121
- [{"key1" => -1.0, "key2" => nil}],
122
- [nil],
123
- ]
124
- target = build(:float, records)
125
- assert_equal(records, target.raw_records)
126
- end
127
-
128
- def test_double
129
- records = [
130
- [{"key1" => -1.0, "key2" => nil}],
131
- [nil],
132
- ]
133
- target = build(:double, records)
134
- assert_equal(records, target.raw_records)
135
- end
136
-
137
- def test_binary
138
- records = [
139
- [{"key1" => "\xff".b, "key2" => nil}],
140
- [nil],
141
- ]
142
- target = build(:binary, records)
143
- assert_equal(records, target.raw_records)
144
- end
145
-
146
- def test_string
147
- records = [
148
- [{"key1" => "Ruby", "key2" => nil}],
149
- [nil],
150
- ]
151
- target = build(:string, records)
152
- assert_equal(records, target.raw_records)
153
- end
154
-
155
- def test_date32
156
- records = [
157
- [{"key1" => Date.new(1960, 1, 1), "key2" => nil}],
158
- [nil],
159
- ]
160
- target = build(:date32, records)
161
- assert_equal(records, target.raw_records)
162
- end
163
-
164
- def test_date64
165
- records = [
166
- [{"key1" => DateTime.new(1960, 1, 1, 2, 9, 30), "key2" => nil}],
167
- [nil],
168
- ]
169
- target = build(:date64, records)
170
- assert_equal(records, target.raw_records)
171
- end
172
-
173
- def test_timestamp_second
174
- records = [
175
- [{"key1" => Time.parse("1960-01-01T02:09:30Z"), "key2" => nil}],
176
- [nil],
177
- ]
178
- target = build({
179
- type: :timestamp,
180
- unit: :second,
181
- },
182
- records)
183
- assert_equal(records, target.raw_records)
184
- end
185
-
186
- def test_timestamp_milli
187
- records = [
188
- [{"key1" => Time.parse("1960-01-01T02:09:30.123Z"), "key2" => nil}],
189
- [nil],
190
- ]
191
- target = build({
192
- type: :timestamp,
193
- unit: :milli,
194
- },
195
- records)
196
- assert_equal(records, target.raw_records)
197
- end
198
-
199
- def test_timestamp_micro
200
- records = [
201
- [{"key1" => Time.parse("1960-01-01T02:09:30.123456Z"), "key2" => nil}],
202
- [nil],
203
- ]
204
- target = build({
205
- type: :timestamp,
206
- unit: :micro,
207
- },
208
- records)
209
- assert_equal(records, target.raw_records)
210
- end
211
-
212
- def test_timestamp_nano
213
- records = [
214
- [{"key1" => Time.parse("1960-01-01T02:09:30.123456789Z"), "key2" => nil}],
215
- [nil],
216
- ]
217
- target = build({
218
- type: :timestamp,
219
- unit: :nano,
220
- },
221
- records)
222
- assert_equal(records, target.raw_records)
223
- end
224
-
225
- def test_time32_second
226
- unit = Arrow::TimeUnit::SECOND
227
- records = [
228
- # 00:10:00
229
- [{"key1" => Arrow::Time.new(unit, 60 * 10), "key2" => nil}],
230
- [nil],
231
- ]
232
- target = build({
233
- type: :time32,
234
- unit: :second,
235
- },
236
- records)
237
- assert_equal(records, target.raw_records)
238
- end
239
-
240
- def test_time32_milli
241
- unit = Arrow::TimeUnit::MILLI
242
- records = [
243
- # 00:10:00.123
244
- [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123), "key2" => nil}],
245
- [nil],
246
- ]
247
- target = build({
248
- type: :time32,
249
- unit: :milli,
250
- },
251
- records)
252
- assert_equal(records, target.raw_records)
253
- end
254
-
255
- def test_time64_micro
256
- unit = Arrow::TimeUnit::MICRO
257
- records = [
258
- # 00:10:00.123456
259
- [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), "key2" => nil}],
260
- [nil],
261
- ]
262
- target = build({
263
- type: :time64,
264
- unit: :micro,
265
- },
266
- records)
267
- assert_equal(records, target.raw_records)
268
- end
269
-
270
- def test_time64_nano
271
- unit = Arrow::TimeUnit::NANO
272
- records = [
273
- # 00:10:00.123456789
274
- [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil}],
275
- [nil],
276
- ]
277
- target = build({
278
- type: :time64,
279
- unit: :nano,
280
- },
281
- records)
282
- assert_equal(records, target.raw_records)
283
- end
284
-
285
- def test_decimal128
286
- records = [
287
- [{"key1" => BigDecimal("92.92"), "key2" => nil}],
288
- [nil],
289
- ]
290
- target = build({
291
- type: :decimal128,
292
- precision: 8,
293
- scale: 2,
294
- },
295
- records)
296
- assert_equal(records, target.raw_records)
297
- end
298
-
299
- def test_decimal256
300
- records = [
301
- [{"key1" => BigDecimal("92.92"), "key2" => nil}],
302
- [nil],
303
- ]
304
- target = build({
305
- type: :decimal256,
306
- precision: 38,
307
- scale: 2,
308
- },
309
- records)
310
- assert_equal(records, target.raw_records)
311
- end
312
-
313
- def test_month_interval
314
- records = [
315
- [{"key1" => 1, "key2" => nil}],
316
- [nil],
317
- ]
318
- target = build(:month_interval, records)
319
- assert_equal(records, target.raw_records)
320
- end
321
-
322
- def test_day_time_interval
323
- records = [
324
- [
325
- {
326
- "key1" => {day: 1, millisecond: 100},
327
- "key2" => nil,
328
- },
329
- ],
330
- [nil],
331
- ]
332
- target = build(:day_time_interval, records)
333
- assert_equal(records, target.raw_records)
334
- end
335
-
336
- def test_month_day_nano_interval
337
- records = [
338
- [
339
- {
340
- "key1" => {month: 1, day: 1, nanosecond: 100},
341
- "key2" => nil,
342
- },
343
- ],
344
- [nil],
345
- ]
346
- target = build(:month_day_nano_interval, records)
347
- assert_equal(records, target.raw_records)
348
- end
349
-
350
- def test_list
351
- records = [
352
- [{"key1" => [true, nil, false], "key2" => nil}],
353
- [nil],
354
- ]
355
- target = build({
356
- type: :list,
357
- field: {
358
- name: :element,
359
- type: :boolean,
360
- },
361
- },
362
- records)
363
- assert_equal(records, target.raw_records)
364
- end
365
-
366
- def test_struct
367
- records = [
368
- [{"key1" => {"field" => true}, "key2" => nil, "key3" => {"field" => nil}}],
369
- [nil],
370
- ]
371
- target = build({
372
- type: :struct,
373
- fields: [
374
- {
375
- name: :field,
376
- type: :boolean,
377
- },
378
- ],
379
- },
380
- records)
381
- assert_equal(records, target.raw_records)
382
- end
383
-
384
- def test_map
385
- records = [
386
- [{"key1" => {"sub_key1" => true, "sub_key2" => nil}, "key2" => nil}],
387
- [nil],
388
- ]
389
- target = build({
390
- type: :map,
391
- key: :string,
392
- item: :boolean,
393
- },
394
- records)
395
- assert_equal(records, target.raw_records)
396
- end
397
-
398
- def remove_union_field_names(records)
399
- records.collect do |record|
400
- record.collect do |column|
401
- if column.nil?
402
- column
403
- else
404
- value = {}
405
- column.each do |k, v|
406
- v = v.values[0] unless v.nil?
407
- value[k] = v
408
- end
409
- value
410
- end
411
- end
412
- end
413
- end
414
-
415
- def test_sparse_union
416
- records = [
417
- [
418
- {
419
- "key1" => {"field1" => true},
420
- "key2" => nil,
421
- "key3" => {"field2" => 29},
422
- "key4" => {"field2" => nil},
423
- },
424
- ],
425
- [nil],
426
- ]
427
- target = build({
428
- type: :sparse_union,
429
- fields: [
430
- {
431
- name: :field1,
432
- type: :boolean,
433
- },
434
- {
435
- name: :field2,
436
- type: :uint8,
437
- },
438
- ],
439
- type_codes: [0, 1],
440
- },
441
- records)
442
- assert_equal(remove_union_field_names(records),
443
- target.raw_records)
444
- end
445
-
446
- def test_dense_union
447
- records = [
448
- [
449
- {
450
- "key1" => {"field1" => true},
451
- "key2" => nil,
452
- "key3" => {"field2" => 29},
453
- "key4" => {"field2" => nil},
454
- },
455
- ],
456
- [nil],
457
- ]
458
- target = build({
459
- type: :dense_union,
460
- fields: [
461
- {
462
- name: :field1,
463
- type: :boolean,
464
- },
465
- {
466
- name: :field2,
467
- type: :uint8,
468
- },
469
- ],
470
- type_codes: [0, 1],
471
- },
472
- records)
473
- assert_equal(remove_union_field_names(records),
474
- target.raw_records)
475
- end
476
-
477
- def test_dictionary
478
- records = [
479
- [{"key1" => "Ruby", "key2" => nil, "key3" => "GLib"}],
480
- [nil],
481
- ]
482
- target = build({
483
- type: :dictionary,
484
- index_data_type: :int8,
485
- value_data_type: :string,
486
- ordered: false,
487
- },
488
- records)
489
- assert_equal(records, target.raw_records)
490
- end
491
- end
492
-
493
- class RawRecordsRecordBatchMapArrayTest < Test::Unit::TestCase
494
- include RawRecordsMapArrayTests
495
-
496
- def build(type, records)
497
- Arrow::RecordBatch.new(build_schema(type), records)
498
- end
499
- end
500
-
501
- class RawRecordsTableMapArrayTest < Test::Unit::TestCase
502
- include RawRecordsMapArrayTests
503
-
504
- def build(type, records)
505
- Arrow::Table.new(build_schema(type), records)
506
- end
507
- end
@@ -1,65 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module RawRecordsMultipleColumnsTests
19
- def test_3_elements
20
- records = [
21
- [true, nil, "Ruby"],
22
- [nil, 0, "GLib"],
23
- [false, 2 ** 8 - 1, nil],
24
- ]
25
- target = build([
26
- {name: :column0, type: :boolean},
27
- {name: :column1, type: :uint8},
28
- {name: :column2, type: :string},
29
- ],
30
- records)
31
- assert_equal(records, target.raw_records)
32
- end
33
-
34
- def test_4_elements
35
- records = [
36
- [true, nil, "Ruby", -(2 ** 63)],
37
- [nil, 0, "GLib", nil],
38
- [false, 2 ** 8 - 1, nil, (2 ** 63) - 1],
39
- ]
40
- target = build([
41
- {name: :column0, type: :boolean},
42
- {name: :column1, type: :uint8},
43
- {name: :column2, type: :string},
44
- {name: :column3, type: :int64},
45
- ],
46
- records)
47
- assert_equal(records, target.raw_records)
48
- end
49
- end
50
-
51
- class RawRecordsRecordBatchMultipleColumnsTest < Test::Unit::TestCase
52
- include RawRecordsMultipleColumnsTests
53
-
54
- def build(schema, records)
55
- Arrow::RecordBatch.new(schema, records)
56
- end
57
- end
58
-
59
- class RawRecordsTableMultipleColumnsTest < Test::Unit::TestCase
60
- include RawRecordsMultipleColumnsTests
61
-
62
- def build(schema, records)
63
- Arrow::Table.new(schema, records)
64
- end
65
- end