red-arrow 8.0.0 → 24.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +15 -7
  3. data/ext/arrow/arrow.cpp +67 -0
  4. data/ext/arrow/converters.cpp +10 -0
  5. data/ext/arrow/converters.hpp +310 -46
  6. data/ext/arrow/extconf.rb +41 -22
  7. data/ext/arrow/raw-records.cpp +165 -2
  8. data/ext/arrow/red-arrow.hpp +2 -0
  9. data/ext/arrow/values.cpp +6 -2
  10. data/lib/arrow/array-builder.rb +89 -14
  11. data/{test/test-time32-data-type.rb → lib/arrow/array-computable.rb} +24 -16
  12. data/{test/test-buffer.rb → lib/arrow/array-statistics.rb} +19 -24
  13. data/lib/arrow/array.rb +40 -4
  14. data/lib/arrow/chunked-array.rb +56 -1
  15. data/lib/arrow/column-containable.rb +9 -0
  16. data/lib/arrow/column.rb +49 -4
  17. data/{test/test-tensor.rb → lib/arrow/csv-write-options.rb} +28 -31
  18. data/lib/arrow/data-type.rb +17 -3
  19. data/lib/arrow/decimal128-array-builder.rb +16 -6
  20. data/lib/arrow/decimal128.rb +14 -0
  21. data/lib/arrow/decimal256-array-builder.rb +16 -6
  22. data/lib/arrow/decimal256.rb +14 -0
  23. data/{test/test-float-scalar.rb → lib/arrow/dense-union-array-builder.rb} +27 -24
  24. data/{test/test-boolean-scalar.rb → lib/arrow/dense-union-array.rb} +7 -7
  25. data/lib/arrow/duration-array-builder.rb +27 -0
  26. data/lib/arrow/duration-array.rb +24 -0
  27. data/lib/arrow/duration-data-type.rb +32 -0
  28. data/lib/arrow/expression.rb +6 -2
  29. data/lib/arrow/field-containable.rb +1 -1
  30. data/lib/arrow/field.rb +44 -3
  31. data/lib/arrow/fixed-size-list-array-builder.rb +29 -0
  32. data/lib/arrow/fixed-size-list-data-type.rb +118 -0
  33. data/lib/arrow/function.rb +0 -1
  34. data/lib/arrow/half-float-array-builder.rb +32 -0
  35. data/lib/arrow/half-float-array.rb +24 -0
  36. data/lib/arrow/half-float.rb +118 -0
  37. data/{test/helper/fixture.rb → lib/arrow/input-referable.rb} +7 -6
  38. data/lib/arrow/jruby/array-builder.rb +114 -0
  39. data/lib/arrow/jruby/array.rb +109 -0
  40. data/lib/arrow/jruby/chunked-array.rb +36 -0
  41. data/lib/arrow/jruby/compression-type.rb +26 -0
  42. data/lib/arrow/jruby/csv-read-options.rb +32 -0
  43. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  44. data/lib/arrow/jruby/decimal128.rb +28 -0
  45. data/lib/arrow/jruby/decimal256.rb +28 -0
  46. data/{test/fixture/float-integer.csv → lib/arrow/jruby/error.rb} +7 -4
  47. data/lib/arrow/jruby/file-system.rb +24 -0
  48. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  49. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  50. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  51. data/{test/fixture/integer-float.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  52. data/lib/arrow/jruby/sort-options.rb +24 -0
  53. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  54. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  55. data/lib/arrow/jruby/writable.rb +24 -0
  56. data/lib/arrow/jruby.rb +52 -0
  57. data/{test/test-date32-array.rb → lib/arrow/large-list-array-builder.rb} +10 -5
  58. data/lib/arrow/large-list-data-type.rb +83 -0
  59. data/lib/arrow/libraries.rb +140 -0
  60. data/lib/arrow/list-array-builder.rb +1 -68
  61. data/lib/arrow/list-data-type.rb +3 -38
  62. data/{test/test-dictionary-array.rb → lib/arrow/list-field-resolvable.rb} +26 -17
  63. data/lib/arrow/list-slice-options.rb +76 -0
  64. data/lib/arrow/list-values-appendable.rb +88 -0
  65. data/lib/arrow/loader.rb +15 -96
  66. data/{test/test-decimal128-array.rb → lib/arrow/make-struct-options.rb} +18 -18
  67. data/lib/arrow/raw-table-converter.rb +10 -3
  68. data/lib/arrow/raw-tensor-converter.rb +89 -0
  69. data/lib/arrow/record-batch-file-reader.rb +2 -0
  70. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  71. data/lib/arrow/record-batch.rb +6 -2
  72. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +5 -4
  73. data/lib/arrow/scalar.rb +67 -0
  74. data/lib/arrow/slicer.rb +61 -0
  75. data/lib/arrow/sort-key.rb +3 -3
  76. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  77. data/lib/arrow/sparse-union-array.rb +26 -0
  78. data/lib/arrow/stream-decoder.rb +29 -0
  79. data/{test/test-decimal256-data-type.rb → lib/arrow/stream-listener.rb} +25 -9
  80. data/lib/arrow/string-array-builder.rb +30 -0
  81. data/lib/arrow/struct-array-builder.rb +0 -5
  82. data/lib/arrow/table-formatter.rb +38 -8
  83. data/lib/arrow/table-list-formatter.rb +3 -3
  84. data/lib/arrow/table-loader.rb +11 -5
  85. data/lib/arrow/table-saver.rb +4 -3
  86. data/lib/arrow/table-table-formatter.rb +7 -0
  87. data/lib/arrow/table.rb +180 -33
  88. data/lib/arrow/tensor.rb +144 -0
  89. data/lib/arrow/time-unit.rb +31 -0
  90. data/lib/arrow/time32-array-builder.rb +2 -14
  91. data/lib/arrow/time32-data-type.rb +9 -38
  92. data/lib/arrow/time64-array-builder.rb +2 -14
  93. data/lib/arrow/time64-data-type.rb +9 -38
  94. data/lib/arrow/timestamp-array-builder.rb +3 -15
  95. data/lib/arrow/timestamp-data-type.rb +9 -34
  96. data/{test/test-date64-array.rb → lib/arrow/timestamp-parser.rb} +14 -6
  97. data/lib/arrow/union-array-builder.rb +59 -0
  98. data/lib/arrow/union-array.rb +26 -0
  99. data/lib/arrow/version.rb +1 -1
  100. data/lib/arrow.rb +2 -7
  101. data/red-arrow.gemspec +74 -11
  102. metadata +85 -210
  103. data/test/fixture/TestOrcFile.test1.orc +0 -0
  104. data/test/fixture/with-header-float.csv +0 -20
  105. data/test/fixture/with-header.csv +0 -20
  106. data/test/fixture/without-header-float.csv +0 -19
  107. data/test/fixture/without-header.csv +0 -19
  108. data/test/helper/omittable.rb +0 -36
  109. data/test/helper.rb +0 -30
  110. data/test/raw-records/test-basic-arrays.rb +0 -395
  111. data/test/raw-records/test-dense-union-array.rb +0 -521
  112. data/test/raw-records/test-list-array.rb +0 -610
  113. data/test/raw-records/test-map-array.rb +0 -478
  114. data/test/raw-records/test-multiple-columns.rb +0 -65
  115. data/test/raw-records/test-sparse-union-array.rb +0 -511
  116. data/test/raw-records/test-struct-array.rb +0 -515
  117. data/test/raw-records/test-table.rb +0 -47
  118. data/test/run-test.rb +0 -71
  119. data/test/test-array-builder.rb +0 -136
  120. data/test/test-array.rb +0 -325
  121. data/test/test-bigdecimal.rb +0 -40
  122. data/test/test-binary-dictionary-array-builder.rb +0 -103
  123. data/test/test-chunked-array.rb +0 -183
  124. data/test/test-column.rb +0 -92
  125. data/test/test-csv-loader.rb +0 -250
  126. data/test/test-data-type.rb +0 -83
  127. data/test/test-decimal128-array-builder.rb +0 -112
  128. data/test/test-decimal128-data-type.rb +0 -31
  129. data/test/test-decimal128.rb +0 -102
  130. data/test/test-decimal256-array-builder.rb +0 -112
  131. data/test/test-decimal256-array.rb +0 -38
  132. data/test/test-decimal256.rb +0 -102
  133. data/test/test-dense-union-data-type.rb +0 -41
  134. data/test/test-dictionary-data-type.rb +0 -40
  135. data/test/test-expression.rb +0 -40
  136. data/test/test-feather.rb +0 -49
  137. data/test/test-field.rb +0 -91
  138. data/test/test-file-output-stream.rb +0 -54
  139. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  140. data/test/test-fixed-size-binary-array.rb +0 -36
  141. data/test/test-function.rb +0 -210
  142. data/test/test-group.rb +0 -180
  143. data/test/test-list-array-builder.rb +0 -79
  144. data/test/test-list-array.rb +0 -32
  145. data/test/test-list-data-type.rb +0 -69
  146. data/test/test-map-array-builder.rb +0 -110
  147. data/test/test-map-array.rb +0 -33
  148. data/test/test-memory-view.rb +0 -434
  149. data/test/test-orc.rb +0 -173
  150. data/test/test-record-batch-builder.rb +0 -125
  151. data/test/test-record-batch-file-reader.rb +0 -115
  152. data/test/test-record-batch-iterator.rb +0 -37
  153. data/test/test-record-batch-reader.rb +0 -46
  154. data/test/test-record-batch.rb +0 -182
  155. data/test/test-schema.rb +0 -134
  156. data/test/test-slicer.rb +0 -487
  157. data/test/test-sort-indices.rb +0 -40
  158. data/test/test-sort-key.rb +0 -81
  159. data/test/test-sort-options.rb +0 -58
  160. data/test/test-sparse-union-data-type.rb +0 -41
  161. data/test/test-string-dictionary-array-builder.rb +0 -103
  162. data/test/test-struct-array-builder.rb +0 -184
  163. data/test/test-struct-array.rb +0 -94
  164. data/test/test-struct-data-type.rb +0 -112
  165. data/test/test-table.rb +0 -1123
  166. data/test/test-time.rb +0 -288
  167. data/test/test-time32-array.rb +0 -81
  168. data/test/test-time64-array.rb +0 -81
  169. data/test/test-time64-data-type.rb +0 -42
  170. data/test/test-timestamp-array.rb +0 -45
  171. data/test/test-timestamp-data-type.rb +0 -42
  172. data/test/values/test-basic-arrays.rb +0 -325
  173. data/test/values/test-dense-union-array.rb +0 -509
  174. data/test/values/test-dictionary-array.rb +0 -295
  175. data/test/values/test-list-array.rb +0 -571
  176. data/test/values/test-map-array.rb +0 -466
  177. data/test/values/test-sparse-union-array.rb +0 -500
  178. data/test/values/test-struct-array.rb +0 -512
@@ -1,395 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module RawRecordsBasicArraysTests
19
- def test_null
20
- records = [
21
- [nil],
22
- [nil],
23
- [nil],
24
- [nil],
25
- ]
26
- target = build({column: :null}, records)
27
- assert_equal(records, target.raw_records)
28
- end
29
-
30
- def test_boolean
31
- records = [
32
- [true],
33
- [nil],
34
- [false],
35
- ]
36
- target = build({column: :boolean}, records)
37
- assert_equal(records, target.raw_records)
38
- end
39
-
40
- def test_int8
41
- records = [
42
- [-(2 ** 7)],
43
- [nil],
44
- [(2 ** 7) - 1],
45
- ]
46
- target = build({column: :int8}, records)
47
- assert_equal(records, target.raw_records)
48
- end
49
-
50
- def test_uint8
51
- records = [
52
- [0],
53
- [nil],
54
- [(2 ** 8) - 1],
55
- ]
56
- target = build({column: :uint8}, records)
57
- assert_equal(records, target.raw_records)
58
- end
59
-
60
- def test_int16
61
- records = [
62
- [-(2 ** 15)],
63
- [nil],
64
- [(2 ** 15) - 1],
65
- ]
66
- target = build({column: :int16}, records)
67
- assert_equal(records, target.raw_records)
68
- end
69
-
70
- def test_uint16
71
- records = [
72
- [0],
73
- [nil],
74
- [(2 ** 16) - 1],
75
- ]
76
- target = build({column: :uint16}, records)
77
- assert_equal(records, target.raw_records)
78
- end
79
-
80
- def test_int32
81
- records = [
82
- [-(2 ** 31)],
83
- [nil],
84
- [(2 ** 31) - 1],
85
- ]
86
- target = build({column: :int32}, records)
87
- assert_equal(records, target.raw_records)
88
- end
89
-
90
- def test_uint32
91
- records = [
92
- [0],
93
- [nil],
94
- [(2 ** 32) - 1],
95
- ]
96
- target = build({column: :uint32}, records)
97
- assert_equal(records, target.raw_records)
98
- end
99
-
100
- def test_int64
101
- records = [
102
- [-(2 ** 63)],
103
- [nil],
104
- [(2 ** 63) - 1],
105
- ]
106
- target = build({column: :int64}, records)
107
- assert_equal(records, target.raw_records)
108
- end
109
-
110
- def test_uint64
111
- records = [
112
- [0],
113
- [nil],
114
- [(2 ** 64) - 1],
115
- ]
116
- target = build({column: :uint64}, records)
117
- assert_equal(records, target.raw_records)
118
- end
119
-
120
- def test_float
121
- records = [
122
- [-1.0],
123
- [nil],
124
- [1.0],
125
- ]
126
- target = build({column: :float}, records)
127
- assert_equal(records, target.raw_records)
128
- end
129
-
130
- def test_double
131
- records = [
132
- [-1.0],
133
- [nil],
134
- [1.0],
135
- ]
136
- target = build({column: :double}, records)
137
- assert_equal(records, target.raw_records)
138
- end
139
-
140
- def test_binary
141
- records = [
142
- ["\x00".b],
143
- [nil],
144
- ["\xff".b],
145
- ]
146
- target = build({column: :binary}, records)
147
- assert_equal(records, target.raw_records)
148
- end
149
-
150
- def test_tring
151
- records = [
152
- ["Ruby"],
153
- [nil],
154
- ["\u3042"], # U+3042 HIRAGANA LETTER A
155
- ]
156
- target = build({column: :string}, records)
157
- assert_equal(records, target.raw_records)
158
- end
159
-
160
- def test_date32
161
- records = [
162
- [Date.new(1960, 1, 1)],
163
- [nil],
164
- [Date.new(2017, 8, 23)],
165
- ]
166
- target = build({column: :date32}, records)
167
- assert_equal(records, target.raw_records)
168
- end
169
-
170
- def test_date64
171
- records = [
172
- [DateTime.new(1960, 1, 1, 2, 9, 30)],
173
- [nil],
174
- [DateTime.new(2017, 8, 23, 14, 57, 2)],
175
- ]
176
- target = build({column: :date64}, records)
177
- assert_equal(records, target.raw_records)
178
- end
179
-
180
- def test_timestamp_second
181
- records = [
182
- [Time.parse("1960-01-01T02:09:30Z")],
183
- [nil],
184
- [Time.parse("2017-08-23T14:57:02Z")],
185
- ]
186
- target = build({
187
- column: {
188
- type: :timestamp,
189
- unit: :second,
190
- }
191
- },
192
- records)
193
- assert_equal(records, target.raw_records)
194
- end
195
-
196
- def test_timestamp_milli
197
- records = [
198
- [Time.parse("1960-01-01T02:09:30.123Z")],
199
- [nil],
200
- [Time.parse("2017-08-23T14:57:02.987Z")],
201
- ]
202
- target = build({
203
- column: {
204
- type: :timestamp,
205
- unit: :milli,
206
- }
207
- },
208
- records)
209
- assert_equal(records, target.raw_records)
210
- end
211
-
212
- def test_timestamp_micro
213
- records = [
214
- [Time.parse("1960-01-01T02:09:30.123456Z")],
215
- [nil],
216
- [Time.parse("2017-08-23T14:57:02.987654Z")],
217
- ]
218
- target = build({
219
- column: {
220
- type: :timestamp,
221
- unit: :micro,
222
- }
223
- },
224
- records)
225
- assert_equal(records, target.raw_records)
226
- end
227
-
228
- def test_timestamp_nano
229
- records = [
230
- [Time.parse("1960-01-01T02:09:30.123456789Z")],
231
- [nil],
232
- [Time.parse("2017-08-23T14:57:02.987654321Z")],
233
- ]
234
- target = build({
235
- column: {
236
- type: :timestamp,
237
- unit: :nano,
238
- }
239
- },
240
- records)
241
- assert_equal(records, target.raw_records)
242
- end
243
-
244
- def test_time32_second
245
- unit = Arrow::TimeUnit::SECOND
246
- records = [
247
- [Arrow::Time.new(unit, 60 * 10)], # 00:10:00
248
- [nil],
249
- [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09
250
- ]
251
- target = build({
252
- column: {
253
- type: :time32,
254
- unit: :second,
255
- }
256
- },
257
- records)
258
- assert_equal(records, target.raw_records)
259
- end
260
-
261
- def test_time32_milli
262
- unit = Arrow::TimeUnit::MILLI
263
- records = [
264
- [Arrow::Time.new(unit, (60 * 10) * 1000 + 123)], # 00:10:00.123
265
- [nil],
266
- [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987
267
- ]
268
- target = build({
269
- column: {
270
- type: :time32,
271
- unit: :milli,
272
- }
273
- },
274
- records)
275
- assert_equal(records, target.raw_records)
276
- end
277
-
278
- def test_time64_micro
279
- unit = Arrow::TimeUnit::MICRO
280
- records = [
281
- # 00:10:00.123456
282
- [Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)],
283
- [nil],
284
- # 02:00:09.987654
285
- [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)],
286
- ]
287
- target = build({
288
- column: {
289
- type: :time64,
290
- unit: :micro,
291
- }
292
- },
293
- records)
294
- assert_equal(records, target.raw_records)
295
- end
296
-
297
- def test_time64_nano
298
- unit = Arrow::TimeUnit::NANO
299
- records = [
300
- # 00:10:00.123456789
301
- [Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)],
302
- [nil],
303
- # 02:00:09.987654321
304
- [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)],
305
- ]
306
- target = build({
307
- column: {
308
- type: :time64,
309
- unit: :nano,
310
- }
311
- },
312
- records)
313
- assert_equal(records, target.raw_records)
314
- end
315
-
316
- def test_decimal128
317
- records = [
318
- [BigDecimal("92.92")],
319
- [nil],
320
- [BigDecimal("29.29")],
321
- ]
322
- target = build({
323
- column: {
324
- type: :decimal128,
325
- precision: 8,
326
- scale: 2,
327
- }
328
- },
329
- records)
330
- assert_equal(records, target.raw_records)
331
- end
332
-
333
- def test_decimal256
334
- records = [
335
- [BigDecimal("92.92")],
336
- [nil],
337
- [BigDecimal("29.29")],
338
- ]
339
- target = build({
340
- column: {
341
- type: :decimal256,
342
- precision: 38,
343
- scale: 2,
344
- }
345
- },
346
- records)
347
- assert_equal(records, target.raw_records)
348
- end
349
-
350
- def test_month_interval
351
- records = [
352
- [1],
353
- [nil],
354
- [12],
355
- ]
356
- target = build({column: :month_interval}, records)
357
- assert_equal(records, target.raw_records)
358
- end
359
-
360
- def test_day_time_interval
361
- records = [
362
- [{day: 1, millisecond: 100}],
363
- [nil],
364
- [{day: 2, millisecond: 300}],
365
- ]
366
- target = build({column: :day_time_interval}, records)
367
- assert_equal(records, target.raw_records)
368
- end
369
-
370
- def test_month_day_nano_interval
371
- records = [
372
- [{month: 1, day: 1, nanosecond: 100}],
373
- [nil],
374
- [{month: 2, day: 3, nanosecond: 400}],
375
- ]
376
- target = build({column: :month_day_nano_interval}, records)
377
- assert_equal(records, target.raw_records)
378
- end
379
- end
380
-
381
- class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase
382
- include RawRecordsBasicArraysTests
383
-
384
- def build(schema, records)
385
- Arrow::RecordBatch.new(schema, records)
386
- end
387
- end
388
-
389
- class RawRecordsTableBasicArraysTest < Test::Unit::TestCase
390
- include RawRecordsBasicArraysTests
391
-
392
- def build(schema, records)
393
- Arrow::Table.new(schema, records)
394
- end
395
- end