red-arrow 8.0.0 → 24.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +15 -7
  3. data/ext/arrow/arrow.cpp +67 -0
  4. data/ext/arrow/converters.cpp +10 -0
  5. data/ext/arrow/converters.hpp +310 -46
  6. data/ext/arrow/extconf.rb +41 -22
  7. data/ext/arrow/raw-records.cpp +165 -2
  8. data/ext/arrow/red-arrow.hpp +2 -0
  9. data/ext/arrow/values.cpp +6 -2
  10. data/lib/arrow/array-builder.rb +89 -14
  11. data/{test/test-time32-data-type.rb → lib/arrow/array-computable.rb} +24 -16
  12. data/{test/test-buffer.rb → lib/arrow/array-statistics.rb} +19 -24
  13. data/lib/arrow/array.rb +40 -4
  14. data/lib/arrow/chunked-array.rb +56 -1
  15. data/lib/arrow/column-containable.rb +9 -0
  16. data/lib/arrow/column.rb +49 -4
  17. data/{test/test-tensor.rb → lib/arrow/csv-write-options.rb} +28 -31
  18. data/lib/arrow/data-type.rb +17 -3
  19. data/lib/arrow/decimal128-array-builder.rb +16 -6
  20. data/lib/arrow/decimal128.rb +14 -0
  21. data/lib/arrow/decimal256-array-builder.rb +16 -6
  22. data/lib/arrow/decimal256.rb +14 -0
  23. data/{test/test-float-scalar.rb → lib/arrow/dense-union-array-builder.rb} +27 -24
  24. data/{test/test-boolean-scalar.rb → lib/arrow/dense-union-array.rb} +7 -7
  25. data/lib/arrow/duration-array-builder.rb +27 -0
  26. data/lib/arrow/duration-array.rb +24 -0
  27. data/lib/arrow/duration-data-type.rb +32 -0
  28. data/lib/arrow/expression.rb +6 -2
  29. data/lib/arrow/field-containable.rb +1 -1
  30. data/lib/arrow/field.rb +44 -3
  31. data/lib/arrow/fixed-size-list-array-builder.rb +29 -0
  32. data/lib/arrow/fixed-size-list-data-type.rb +118 -0
  33. data/lib/arrow/function.rb +0 -1
  34. data/lib/arrow/half-float-array-builder.rb +32 -0
  35. data/lib/arrow/half-float-array.rb +24 -0
  36. data/lib/arrow/half-float.rb +118 -0
  37. data/{test/helper/fixture.rb → lib/arrow/input-referable.rb} +7 -6
  38. data/lib/arrow/jruby/array-builder.rb +114 -0
  39. data/lib/arrow/jruby/array.rb +109 -0
  40. data/lib/arrow/jruby/chunked-array.rb +36 -0
  41. data/lib/arrow/jruby/compression-type.rb +26 -0
  42. data/lib/arrow/jruby/csv-read-options.rb +32 -0
  43. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  44. data/lib/arrow/jruby/decimal128.rb +28 -0
  45. data/lib/arrow/jruby/decimal256.rb +28 -0
  46. data/{test/fixture/float-integer.csv → lib/arrow/jruby/error.rb} +7 -4
  47. data/lib/arrow/jruby/file-system.rb +24 -0
  48. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  49. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  50. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  51. data/{test/fixture/integer-float.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  52. data/lib/arrow/jruby/sort-options.rb +24 -0
  53. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  54. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  55. data/lib/arrow/jruby/writable.rb +24 -0
  56. data/lib/arrow/jruby.rb +52 -0
  57. data/{test/test-date32-array.rb → lib/arrow/large-list-array-builder.rb} +10 -5
  58. data/lib/arrow/large-list-data-type.rb +83 -0
  59. data/lib/arrow/libraries.rb +140 -0
  60. data/lib/arrow/list-array-builder.rb +1 -68
  61. data/lib/arrow/list-data-type.rb +3 -38
  62. data/{test/test-dictionary-array.rb → lib/arrow/list-field-resolvable.rb} +26 -17
  63. data/lib/arrow/list-slice-options.rb +76 -0
  64. data/lib/arrow/list-values-appendable.rb +88 -0
  65. data/lib/arrow/loader.rb +15 -96
  66. data/{test/test-decimal128-array.rb → lib/arrow/make-struct-options.rb} +18 -18
  67. data/lib/arrow/raw-table-converter.rb +10 -3
  68. data/lib/arrow/raw-tensor-converter.rb +89 -0
  69. data/lib/arrow/record-batch-file-reader.rb +2 -0
  70. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  71. data/lib/arrow/record-batch.rb +6 -2
  72. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +5 -4
  73. data/lib/arrow/scalar.rb +67 -0
  74. data/lib/arrow/slicer.rb +61 -0
  75. data/lib/arrow/sort-key.rb +3 -3
  76. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  77. data/lib/arrow/sparse-union-array.rb +26 -0
  78. data/lib/arrow/stream-decoder.rb +29 -0
  79. data/{test/test-decimal256-data-type.rb → lib/arrow/stream-listener.rb} +25 -9
  80. data/lib/arrow/string-array-builder.rb +30 -0
  81. data/lib/arrow/struct-array-builder.rb +0 -5
  82. data/lib/arrow/table-formatter.rb +38 -8
  83. data/lib/arrow/table-list-formatter.rb +3 -3
  84. data/lib/arrow/table-loader.rb +11 -5
  85. data/lib/arrow/table-saver.rb +4 -3
  86. data/lib/arrow/table-table-formatter.rb +7 -0
  87. data/lib/arrow/table.rb +180 -33
  88. data/lib/arrow/tensor.rb +144 -0
  89. data/lib/arrow/time-unit.rb +31 -0
  90. data/lib/arrow/time32-array-builder.rb +2 -14
  91. data/lib/arrow/time32-data-type.rb +9 -38
  92. data/lib/arrow/time64-array-builder.rb +2 -14
  93. data/lib/arrow/time64-data-type.rb +9 -38
  94. data/lib/arrow/timestamp-array-builder.rb +3 -15
  95. data/lib/arrow/timestamp-data-type.rb +9 -34
  96. data/{test/test-date64-array.rb → lib/arrow/timestamp-parser.rb} +14 -6
  97. data/lib/arrow/union-array-builder.rb +59 -0
  98. data/lib/arrow/union-array.rb +26 -0
  99. data/lib/arrow/version.rb +1 -1
  100. data/lib/arrow.rb +2 -7
  101. data/red-arrow.gemspec +74 -11
  102. metadata +85 -210
  103. data/test/fixture/TestOrcFile.test1.orc +0 -0
  104. data/test/fixture/with-header-float.csv +0 -20
  105. data/test/fixture/with-header.csv +0 -20
  106. data/test/fixture/without-header-float.csv +0 -19
  107. data/test/fixture/without-header.csv +0 -19
  108. data/test/helper/omittable.rb +0 -36
  109. data/test/helper.rb +0 -30
  110. data/test/raw-records/test-basic-arrays.rb +0 -395
  111. data/test/raw-records/test-dense-union-array.rb +0 -521
  112. data/test/raw-records/test-list-array.rb +0 -610
  113. data/test/raw-records/test-map-array.rb +0 -478
  114. data/test/raw-records/test-multiple-columns.rb +0 -65
  115. data/test/raw-records/test-sparse-union-array.rb +0 -511
  116. data/test/raw-records/test-struct-array.rb +0 -515
  117. data/test/raw-records/test-table.rb +0 -47
  118. data/test/run-test.rb +0 -71
  119. data/test/test-array-builder.rb +0 -136
  120. data/test/test-array.rb +0 -325
  121. data/test/test-bigdecimal.rb +0 -40
  122. data/test/test-binary-dictionary-array-builder.rb +0 -103
  123. data/test/test-chunked-array.rb +0 -183
  124. data/test/test-column.rb +0 -92
  125. data/test/test-csv-loader.rb +0 -250
  126. data/test/test-data-type.rb +0 -83
  127. data/test/test-decimal128-array-builder.rb +0 -112
  128. data/test/test-decimal128-data-type.rb +0 -31
  129. data/test/test-decimal128.rb +0 -102
  130. data/test/test-decimal256-array-builder.rb +0 -112
  131. data/test/test-decimal256-array.rb +0 -38
  132. data/test/test-decimal256.rb +0 -102
  133. data/test/test-dense-union-data-type.rb +0 -41
  134. data/test/test-dictionary-data-type.rb +0 -40
  135. data/test/test-expression.rb +0 -40
  136. data/test/test-feather.rb +0 -49
  137. data/test/test-field.rb +0 -91
  138. data/test/test-file-output-stream.rb +0 -54
  139. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  140. data/test/test-fixed-size-binary-array.rb +0 -36
  141. data/test/test-function.rb +0 -210
  142. data/test/test-group.rb +0 -180
  143. data/test/test-list-array-builder.rb +0 -79
  144. data/test/test-list-array.rb +0 -32
  145. data/test/test-list-data-type.rb +0 -69
  146. data/test/test-map-array-builder.rb +0 -110
  147. data/test/test-map-array.rb +0 -33
  148. data/test/test-memory-view.rb +0 -434
  149. data/test/test-orc.rb +0 -173
  150. data/test/test-record-batch-builder.rb +0 -125
  151. data/test/test-record-batch-file-reader.rb +0 -115
  152. data/test/test-record-batch-iterator.rb +0 -37
  153. data/test/test-record-batch-reader.rb +0 -46
  154. data/test/test-record-batch.rb +0 -182
  155. data/test/test-schema.rb +0 -134
  156. data/test/test-slicer.rb +0 -487
  157. data/test/test-sort-indices.rb +0 -40
  158. data/test/test-sort-key.rb +0 -81
  159. data/test/test-sort-options.rb +0 -58
  160. data/test/test-sparse-union-data-type.rb +0 -41
  161. data/test/test-string-dictionary-array-builder.rb +0 -103
  162. data/test/test-struct-array-builder.rb +0 -184
  163. data/test/test-struct-array.rb +0 -94
  164. data/test/test-struct-data-type.rb +0 -112
  165. data/test/test-table.rb +0 -1123
  166. data/test/test-time.rb +0 -288
  167. data/test/test-time32-array.rb +0 -81
  168. data/test/test-time64-array.rb +0 -81
  169. data/test/test-time64-data-type.rb +0 -42
  170. data/test/test-timestamp-array.rb +0 -45
  171. data/test/test-timestamp-data-type.rb +0 -42
  172. data/test/values/test-basic-arrays.rb +0 -325
  173. data/test/values/test-dense-union-array.rb +0 -509
  174. data/test/values/test-dictionary-array.rb +0 -295
  175. data/test/values/test-list-array.rb +0 -571
  176. data/test/values/test-map-array.rb +0 -466
  177. data/test/values/test-sparse-union-array.rb +0 -500
  178. data/test/values/test-struct-array.rb +0 -512
@@ -1,466 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module ValuesMapArrayTests
19
- def build_data_type(item_type)
20
- Arrow::MapDataType.new(
21
- key: :string,
22
- item: item_type
23
- )
24
- end
25
-
26
- def build_array(item_type, values)
27
- Arrow::MapArray.new(build_data_type(item_type), values)
28
- end
29
-
30
- def test_null
31
- values = [
32
- {"key1" => nil},
33
- nil,
34
- ]
35
- target = build(:null, values)
36
- assert_equal(values, target.values)
37
- end
38
-
39
- def test_boolean
40
- values = [
41
- {"key1" => false, "key2" => nil},
42
- nil,
43
- ]
44
- target = build(:boolean, values)
45
- assert_equal(values, target.values)
46
- end
47
-
48
- def test_int8
49
- values = [
50
- {"key1" => (2 ** 7) - 1, "key2" => nil},
51
- nil,
52
- ]
53
- target = build(:int8, values)
54
- assert_equal(values, target.values)
55
- end
56
-
57
- def test_uint8
58
- values = [
59
- {"key1" => (2 ** 8) - 1, "key2" => nil},
60
- nil,
61
- ]
62
- target = build(:uint8, values)
63
- assert_equal(values, target.values)
64
- end
65
-
66
- def test_uint16
67
- values = [
68
- {"key1" => (2 ** 16) - 1, "key2" => nil},
69
- nil,
70
- ]
71
- target = build(:uint16, values)
72
- assert_equal(values, target.values)
73
- end
74
-
75
- def test_int32
76
- values = [
77
- {"key1" => -(2 ** 31), "key2" => nil},
78
- nil,
79
- ]
80
- target = build(:int32, values)
81
- assert_equal(values, target.values)
82
- end
83
-
84
- def test_uint32
85
- values = [
86
- {"key1" => (2 ** 32) - 1, "key2" => nil},
87
- nil,
88
- ]
89
- target = build(:uint32, values)
90
- assert_equal(values, target.values)
91
- end
92
-
93
- def test_int64
94
- values = [
95
- {"key1" => -(2 ** 63), "key2" => nil},
96
- nil,
97
- ]
98
- target = build(:int64, values)
99
- assert_equal(values, target.values)
100
- end
101
-
102
- def test_uint64
103
- values = [
104
- {"key1" => (2 ** 64) - 1, "key2" => nil},
105
- nil,
106
- ]
107
- target = build(:uint64, values)
108
- assert_equal(values, target.values)
109
- end
110
-
111
- def test_float
112
- values = [
113
- {"key1" => -1.0, "key2" => nil},
114
- nil,
115
- ]
116
- target = build(:float, values)
117
- assert_equal(values, target.values)
118
- end
119
-
120
- def test_double
121
- values = [
122
- {"key1" => -1.0, "key2" => nil},
123
- nil,
124
- ]
125
- target = build(:double, values)
126
- assert_equal(values, target.values)
127
- end
128
-
129
- def test_binary
130
- values = [
131
- {"key1" => "\xff".b, "key2" => nil},
132
- nil,
133
- ]
134
- target = build(:binary, values)
135
- assert_equal(values, target.values)
136
- end
137
-
138
- def test_string
139
- values = [
140
- {"key1" => "Ruby", "key2" => nil},
141
- nil,
142
- ]
143
- target = build(:string, values)
144
- assert_equal(values, target.values)
145
- end
146
-
147
- def test_date32
148
- values = [
149
- {"key1" => Date.new(1960, 1, 1), "key2" => nil},
150
- nil,
151
- ]
152
- target = build(:date32, values)
153
- assert_equal(values, target.values)
154
- end
155
-
156
- def test_date64
157
- values = [
158
- {"key1" => DateTime.new(1960, 1, 1, 2, 9, 30), "key2" => nil},
159
- nil,
160
- ]
161
- target = build(:date64, values)
162
- assert_equal(values, target.values)
163
- end
164
-
165
- def test_timestamp_second
166
- values = [
167
- {"key1" => Time.parse("1960-01-01T02:09:30Z"), "key2" => nil},
168
- nil,
169
- ]
170
- target = build({
171
- type: :timestamp,
172
- unit: :second,
173
- },
174
- values)
175
- assert_equal(values, target.values)
176
- end
177
-
178
- def test_timestamp_milli
179
- values = [
180
- {"key1" => Time.parse("1960-01-01T02:09:30.123Z"), "key2" => nil},
181
- nil,
182
- ]
183
- target = build({
184
- type: :timestamp,
185
- unit: :milli,
186
- },
187
- values)
188
- assert_equal(values, target.values)
189
- end
190
-
191
- def test_timestamp_micro
192
- values = [
193
- {"key1" => Time.parse("1960-01-01T02:09:30.123456Z"), "key2" => nil},
194
- nil,
195
- ]
196
- target = build({
197
- type: :timestamp,
198
- unit: :micro,
199
- },
200
- values)
201
- assert_equal(values, target.values)
202
- end
203
-
204
- def test_timestamp_nano
205
- values = [
206
- {"key1" => Time.parse("1960-01-01T02:09:30.123456789Z"), "key2" => nil},
207
- nil,
208
- ]
209
- target = build({
210
- type: :timestamp,
211
- unit: :nano,
212
- },
213
- values)
214
- assert_equal(values, target.values)
215
- end
216
-
217
- def test_time32_second
218
- unit = Arrow::TimeUnit::SECOND
219
- values = [
220
- # 00:10:00
221
- {"key1" => Arrow::Time.new(unit, 60 * 10), "key2" => nil},
222
- nil,
223
- ]
224
- target = build({
225
- type: :time32,
226
- unit: :second,
227
- },
228
- values)
229
- assert_equal(values, target.values)
230
- end
231
-
232
- def test_time32_milli
233
- unit = Arrow::TimeUnit::MILLI
234
- values = [
235
- # 00:10:00.123
236
- {"key1" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123), "key2" => nil},
237
- nil,
238
- ]
239
- target = build({
240
- type: :time32,
241
- unit: :milli,
242
- },
243
- values)
244
- assert_equal(values, target.values)
245
- end
246
-
247
- def test_time64_micro
248
- unit = Arrow::TimeUnit::MICRO
249
- values = [
250
- # 00:10:00.123456
251
- {"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), "key2" => nil},
252
- nil,
253
- ]
254
- target = build({
255
- type: :time64,
256
- unit: :micro,
257
- },
258
- values)
259
- assert_equal(values, target.values)
260
- end
261
-
262
- def test_time64_nano
263
- unit = Arrow::TimeUnit::NANO
264
- values = [
265
- # 00:10:00.123456789
266
- {"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil},
267
- nil,
268
- ]
269
- target = build({
270
- type: :time64,
271
- unit: :nano,
272
- },
273
- values)
274
- assert_equal(values, target.values)
275
- end
276
-
277
- def test_decimal128
278
- values = [
279
- {"key1" => BigDecimal("92.92"), "key2" => nil},
280
- nil,
281
- ]
282
- target = build({
283
- type: :decimal128,
284
- precision: 8,
285
- scale: 2,
286
- },
287
- values)
288
- assert_equal(values, target.values)
289
- end
290
-
291
- def test_decimal256
292
- values = [
293
- {"key1" => BigDecimal("92.92"), "key2" => nil},
294
- nil,
295
- ]
296
- target = build({
297
- type: :decimal256,
298
- precision: 38,
299
- scale: 2,
300
- },
301
- values)
302
- assert_equal(values, target.values)
303
- end
304
-
305
- def test_month_interval
306
- values = [
307
- {"key1" => 1, "key2" => nil},
308
- nil,
309
- ]
310
- target = build(:month_interval, values)
311
- assert_equal(values, target.values)
312
- end
313
-
314
- def test_day_time_interval
315
- values = [
316
- {
317
- "key1" => {day: 1, millisecond: 100},
318
- "key2" => nil,
319
- },
320
- nil,
321
- ]
322
- target = build(:day_time_interval, values)
323
- assert_equal(values, target.values)
324
- end
325
-
326
- def test_month_day_nano_interval
327
- values = [
328
- {
329
- "key1" => {month: 1, day: 1, nanosecond: 100},
330
- "key2" => nil,
331
- },
332
- nil,
333
- ]
334
- target = build(:month_day_nano_interval, values)
335
- assert_equal(values, target.values)
336
- end
337
-
338
- def test_list
339
- values = [
340
- {"key1" => [true, nil, false], "key2" => nil},
341
- nil,
342
- ]
343
- target = build({
344
- type: :list,
345
- field: {
346
- name: :sub_element,
347
- type: :boolean,
348
- },
349
- },
350
- values)
351
- assert_equal(values, target.values)
352
- end
353
-
354
- def test_struct
355
- values = [
356
- {"key1" => {"field" => true}, "key2" => nil, "key3" => {"field" => nil}},
357
- nil,
358
- ]
359
- target = build({
360
- type: :struct,
361
- fields: [
362
- {
363
- name: :field,
364
- type: :boolean,
365
- },
366
- ],
367
- },
368
- values)
369
- assert_equal(values, target.values)
370
- end
371
-
372
- def test_map
373
- values = [
374
- {"key1" => {"sub_key1" => true, "sub_key2" => nil}, "key2" => nil},
375
- nil,
376
- ]
377
- target = build({
378
- type: :map,
379
- key: :string,
380
- item: :boolean,
381
- },
382
- values)
383
- assert_equal(values, target.values)
384
- end
385
-
386
- def test_sparse_union
387
- omit("Need to add support for SparseUnionArrayBuilder")
388
- values = [
389
- {"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}},
390
- nil,
391
- ]
392
- target = build({
393
- type: :sparse_union,
394
- fields: [
395
- {
396
- name: :field1,
397
- type: :boolean,
398
- },
399
- {
400
- name: :field2,
401
- type: :uint8,
402
- },
403
- ],
404
- type_codes: [0, 1],
405
- },
406
- values)
407
- assert_equal(values, target.values)
408
- end
409
-
410
- def test_dense_union
411
- omit("Need to add support for DenseUnionArrayBuilder")
412
- values = [
413
- {"key1" => {"field1" => true}, "key2" => nil, "key3" => {"field2" => nil}},
414
- nil,
415
- ]
416
- target = build({
417
- type: :dense_union,
418
- fields: [
419
- {
420
- name: :field1,
421
- type: :boolean,
422
- },
423
- {
424
- name: :field2,
425
- type: :uint8,
426
- },
427
- ],
428
- type_codes: [0, 1],
429
- },
430
- values)
431
- assert_equal(values, target.values)
432
- end
433
-
434
- def test_dictionary
435
- omit("Need to add support for DictionaryArrayBuilder")
436
- values = [
437
- {"key1" => "Ruby", "key2" => nil, "key3" => "GLib"},
438
- nil,
439
- ]
440
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
441
- target = build({
442
- type: :dictionary,
443
- index_data_type: :int8,
444
- dictionary: dictionary,
445
- ordered: true,
446
- },
447
- values)
448
- assert_equal(values, target.values)
449
- end
450
- end
451
-
452
- class ValuesArrayMapArrayTest < Test::Unit::TestCase
453
- include ValuesMapArrayTests
454
-
455
- def build(item_type, values)
456
- build_array(item_type, values)
457
- end
458
- end
459
-
460
- class ValuesChunkedArrayMapArrayTest < Test::Unit::TestCase
461
- include ValuesMapArrayTests
462
-
463
- def build(item_type, values)
464
- Arrow::ChunkedArray.new([build_array(item_type, values)])
465
- end
466
- end