red-arrow 3.0.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -0
  3. data/ext/arrow/arrow.cpp +3 -0
  4. data/ext/arrow/converters.cpp +5 -0
  5. data/ext/arrow/converters.hpp +126 -0
  6. data/ext/arrow/extconf.rb +13 -0
  7. data/ext/arrow/memory-view.cpp +311 -0
  8. data/ext/arrow/memory-view.hpp +26 -0
  9. data/ext/arrow/raw-records.cpp +1 -0
  10. data/ext/arrow/values.cpp +1 -0
  11. data/lib/arrow/aggregate-node-options.rb +35 -0
  12. data/lib/arrow/aggregation.rb +46 -0
  13. data/lib/arrow/array-builder.rb +5 -0
  14. data/lib/arrow/array.rb +130 -0
  15. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  16. data/lib/arrow/buffer.rb +10 -6
  17. data/lib/arrow/column-containable.rb +100 -1
  18. data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
  19. data/lib/arrow/data-type.rb +14 -5
  20. data/lib/arrow/datum.rb +100 -0
  21. data/lib/arrow/dense-union-data-type.rb +2 -2
  22. data/lib/arrow/dictionary-data-type.rb +2 -2
  23. data/lib/arrow/equal-options.rb +38 -0
  24. data/lib/arrow/expression.rb +48 -0
  25. data/lib/arrow/file-system.rb +34 -0
  26. data/lib/arrow/group.rb +116 -124
  27. data/lib/arrow/loader.rb +46 -0
  28. data/lib/arrow/map-array-builder.rb +109 -0
  29. data/lib/arrow/map-array.rb +26 -0
  30. data/lib/arrow/map-data-type.rb +89 -0
  31. data/lib/arrow/path-extension.rb +1 -1
  32. data/lib/arrow/record-batch-reader.rb +41 -0
  33. data/lib/arrow/record-batch.rb +0 -2
  34. data/lib/arrow/scalar.rb +32 -0
  35. data/lib/arrow/slicer.rb +44 -143
  36. data/lib/arrow/sort-key.rb +193 -0
  37. data/lib/arrow/sort-options.rb +109 -0
  38. data/lib/arrow/source-node-options.rb +32 -0
  39. data/lib/arrow/sparse-union-data-type.rb +2 -2
  40. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  41. data/lib/arrow/symbol-values-appendable.rb +34 -0
  42. data/lib/arrow/table-concatenate-options.rb +36 -0
  43. data/lib/arrow/table-formatter.rb +141 -17
  44. data/lib/arrow/table-list-formatter.rb +5 -3
  45. data/lib/arrow/table-loader.rb +41 -3
  46. data/lib/arrow/table-saver.rb +29 -3
  47. data/lib/arrow/table-table-formatter.rb +7 -31
  48. data/lib/arrow/table.rb +34 -40
  49. data/lib/arrow/time32-data-type.rb +2 -2
  50. data/lib/arrow/time64-data-type.rb +2 -2
  51. data/lib/arrow/timestamp-data-type.rb +2 -2
  52. data/lib/arrow/version.rb +1 -1
  53. data/red-arrow.gemspec +2 -1
  54. data/test/helper.rb +1 -0
  55. data/test/raw-records/test-dense-union-array.rb +14 -0
  56. data/test/raw-records/test-list-array.rb +19 -0
  57. data/test/raw-records/test-map-array.rb +441 -0
  58. data/test/raw-records/test-sparse-union-array.rb +14 -0
  59. data/test/raw-records/test-struct-array.rb +15 -0
  60. data/test/test-array-builder.rb +7 -0
  61. data/test/test-array.rb +154 -0
  62. data/test/test-binary-dictionary-array-builder.rb +103 -0
  63. data/test/test-boolean-scalar.rb +26 -0
  64. data/test/test-csv-loader.rb +8 -8
  65. data/test/test-decimal128-data-type.rb +2 -2
  66. data/test/test-expression.rb +40 -0
  67. data/test/test-float-scalar.rb +46 -0
  68. data/test/test-function.rb +176 -0
  69. data/test/test-group.rb +75 -51
  70. data/test/test-map-array-builder.rb +110 -0
  71. data/test/test-map-array.rb +33 -0
  72. data/test/test-map-data-type.rb +36 -0
  73. data/test/test-memory-view.rb +434 -0
  74. data/test/test-orc.rb +19 -23
  75. data/test/test-record-batch-reader.rb +46 -0
  76. data/test/test-record-batch.rb +42 -0
  77. data/test/test-slicer.rb +166 -167
  78. data/test/test-sort-indices.rb +40 -0
  79. data/test/test-sort-key.rb +81 -0
  80. data/test/test-sort-options.rb +58 -0
  81. data/test/test-string-dictionary-array-builder.rb +103 -0
  82. data/test/test-table.rb +190 -53
  83. data/test/values/test-dense-union-array.rb +14 -0
  84. data/test/values/test-list-array.rb +17 -0
  85. data/test/values/test-map-array.rb +433 -0
  86. data/test/values/test-sparse-union-array.rb +14 -0
  87. data/test/values/test-struct-array.rb +15 -0
  88. metadata +73 -6
@@ -0,0 +1,434 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class MemoryViewTest < Test::Unit::TestCase
19
+ def setup
20
+ unless Fiddle.const_defined?(:MemoryView)
21
+ omit("Fiddle::MemoryView is needed")
22
+ end
23
+ unless Fiddle::MemoryView.respond_to?(:export)
24
+ omit("Fiddle::MemoryView.export is needed")
25
+ end
26
+ end
27
+
28
+ def little_endian?
29
+ [1].pack("s") == [1].pack("s<")
30
+ end
31
+
32
+ test("BooleanArray") do
33
+ array = Arrow::BooleanArray.new([true] * 9)
34
+ Fiddle::MemoryView.export(array) do |memory_view|
35
+ if little_endian?
36
+ template = "b"
37
+ else
38
+ template = "B"
39
+ end
40
+ assert_equal([
41
+ "#{template}8",
42
+ 1,
43
+ 2,
44
+ [(("1" * 9) + ("0" * 7))].pack("#{template}*"),
45
+ ],
46
+ [
47
+ memory_view.format,
48
+ memory_view.item_size,
49
+ memory_view.byte_size,
50
+ memory_view.to_s,
51
+ ])
52
+ end
53
+ end
54
+
55
+ test("Int8Array") do
56
+ values = [-(2 ** 7), 0, (2 ** 7) - 1]
57
+ array = Arrow::Int8Array.new(values)
58
+ Fiddle::MemoryView.export(array) do |memory_view|
59
+ assert_equal([
60
+ "c",
61
+ 1,
62
+ values.size,
63
+ values.pack("c*"),
64
+ ],
65
+ [
66
+ memory_view.format,
67
+ memory_view.item_size,
68
+ memory_view.byte_size,
69
+ memory_view.to_s,
70
+ ])
71
+ end
72
+ end
73
+
74
+ test("Int16Array") do
75
+ values = [-(2 ** 15), 0, (2 ** 15) - 1]
76
+ array = Arrow::Int16Array.new(values)
77
+ Fiddle::MemoryView.export(array) do |memory_view|
78
+ assert_equal([
79
+ "s",
80
+ 2,
81
+ 2 * values.size,
82
+ values.pack("s*"),
83
+ ],
84
+ [
85
+ memory_view.format,
86
+ memory_view.item_size,
87
+ memory_view.byte_size,
88
+ memory_view.to_s,
89
+ ])
90
+ end
91
+ end
92
+
93
+ test("Int32Array") do
94
+ values = [-(2 ** 31), 0, (2 ** 31) - 1]
95
+ array = Arrow::Int32Array.new(values)
96
+ Fiddle::MemoryView.export(array) do |memory_view|
97
+ assert_equal([
98
+ "l",
99
+ 4,
100
+ 4 * values.size,
101
+ values.pack("l*"),
102
+ ],
103
+ [
104
+ memory_view.format,
105
+ memory_view.item_size,
106
+ memory_view.byte_size,
107
+ memory_view.to_s,
108
+ ])
109
+ end
110
+ end
111
+
112
+ test("Int64Array") do
113
+ values = [-(2 ** 63), 0, (2 ** 63) - 1]
114
+ array = Arrow::Int64Array.new(values)
115
+ Fiddle::MemoryView.export(array) do |memory_view|
116
+ assert_equal([
117
+ "q",
118
+ 8,
119
+ 8 * values.size,
120
+ values.pack("q*"),
121
+ ],
122
+ [
123
+ memory_view.format,
124
+ memory_view.item_size,
125
+ memory_view.byte_size,
126
+ memory_view.to_s,
127
+ ])
128
+ end
129
+ end
130
+
131
+ test("UInt8Array") do
132
+ values = [0, (2 ** 8) - 1]
133
+ array = Arrow::UInt8Array.new(values)
134
+ Fiddle::MemoryView.export(array) do |memory_view|
135
+ assert_equal([
136
+ "C",
137
+ 1,
138
+ values.size,
139
+ values.pack("C*"),
140
+ ],
141
+ [
142
+ memory_view.format,
143
+ memory_view.item_size,
144
+ memory_view.byte_size,
145
+ memory_view.to_s,
146
+ ])
147
+ end
148
+ end
149
+
150
+ test("UInt16Array") do
151
+ values = [0, (2 ** 16) - 1]
152
+ array = Arrow::UInt16Array.new(values)
153
+ Fiddle::MemoryView.export(array) do |memory_view|
154
+ assert_equal([
155
+ "S",
156
+ 2,
157
+ 2 * values.size,
158
+ values.pack("S*"),
159
+ ],
160
+ [
161
+ memory_view.format,
162
+ memory_view.item_size,
163
+ memory_view.byte_size,
164
+ memory_view.to_s,
165
+ ])
166
+ end
167
+ end
168
+
169
+ test("UInt32Array") do
170
+ values = [0, (2 ** 32) - 1]
171
+ array = Arrow::UInt32Array.new(values)
172
+ Fiddle::MemoryView.export(array) do |memory_view|
173
+ assert_equal([
174
+ "L",
175
+ 4,
176
+ 4 * values.size,
177
+ values.pack("L*"),
178
+ ],
179
+ [
180
+ memory_view.format,
181
+ memory_view.item_size,
182
+ memory_view.byte_size,
183
+ memory_view.to_s,
184
+ ])
185
+ end
186
+ end
187
+
188
+ test("UInt64Array") do
189
+ values = [(2 ** 64) - 1]
190
+ array = Arrow::UInt64Array.new(values)
191
+ Fiddle::MemoryView.export(array) do |memory_view|
192
+ assert_equal([
193
+ "Q",
194
+ 8,
195
+ 8 * values.size,
196
+ values.pack("Q*"),
197
+ ],
198
+ [
199
+ memory_view.format,
200
+ memory_view.item_size,
201
+ memory_view.byte_size,
202
+ memory_view.to_s,
203
+ ])
204
+ end
205
+ end
206
+
207
+ test("FloatArray") do
208
+ values = [-1.1, 0.0, 1.1]
209
+ array = Arrow::FloatArray.new(values)
210
+ Fiddle::MemoryView.export(array) do |memory_view|
211
+ assert_equal([
212
+ "f",
213
+ 4,
214
+ 4 * values.size,
215
+ values.pack("f*"),
216
+ ],
217
+ [
218
+ memory_view.format,
219
+ memory_view.item_size,
220
+ memory_view.byte_size,
221
+ memory_view.to_s,
222
+ ])
223
+ end
224
+ end
225
+
226
+ test("DoubleArray") do
227
+ values = [-1.1, 0.0, 1.1]
228
+ array = Arrow::DoubleArray.new(values)
229
+ Fiddle::MemoryView.export(array) do |memory_view|
230
+ assert_equal([
231
+ "d",
232
+ 8,
233
+ 8 * values.size,
234
+ values.pack("d*"),
235
+ ],
236
+ [
237
+ memory_view.format,
238
+ memory_view.item_size,
239
+ memory_view.byte_size,
240
+ memory_view.to_s,
241
+ ])
242
+ end
243
+ end
244
+
245
+ test("FixedSizeBinaryArray") do
246
+ values = ["\x01\x02", "\x03\x04", "\x05\x06"]
247
+ data_type = Arrow::FixedSizeBinaryDataType.new(2)
248
+ array = Arrow::FixedSizeBinaryArray.new(data_type, values)
249
+ Fiddle::MemoryView.export(array) do |memory_view|
250
+ assert_equal([
251
+ "C2",
252
+ 2,
253
+ 2 * values.size,
254
+ values.join("").b,
255
+ ],
256
+ [
257
+ memory_view.format,
258
+ memory_view.item_size,
259
+ memory_view.byte_size,
260
+ memory_view.to_s,
261
+ ])
262
+ end
263
+ end
264
+
265
+ test("Date32Array") do
266
+ n_days_since_epoch = 17406 # 2017-08-28
267
+ values = [n_days_since_epoch]
268
+ array = Arrow::Date32Array.new(values)
269
+ Fiddle::MemoryView.export(array) do |memory_view|
270
+ assert_equal([
271
+ "l",
272
+ 4,
273
+ 4 * values.size,
274
+ values.pack("l*"),
275
+ ],
276
+ [
277
+ memory_view.format,
278
+ memory_view.item_size,
279
+ memory_view.byte_size,
280
+ memory_view.to_s,
281
+ ])
282
+ end
283
+ end
284
+
285
+ test("Date64Array") do
286
+ n_msecs_since_epoch = 1503878400000 # 2017-08-28T00:00:00Z
287
+ values = [n_msecs_since_epoch]
288
+ array = Arrow::Date64Array.new(values)
289
+ Fiddle::MemoryView.export(array) do |memory_view|
290
+ assert_equal([
291
+ "q",
292
+ 8,
293
+ 8 * values.size,
294
+ values.pack("q*"),
295
+ ],
296
+ [
297
+ memory_view.format,
298
+ memory_view.item_size,
299
+ memory_view.byte_size,
300
+ memory_view.to_s,
301
+ ])
302
+ end
303
+ end
304
+
305
+ test("Time32Array") do
306
+ values = [1, 2, 3]
307
+ array = Arrow::Time32Array.new(:milli, values)
308
+ Fiddle::MemoryView.export(array) do |memory_view|
309
+ assert_equal([
310
+ "l",
311
+ 4,
312
+ 4 * values.size,
313
+ values.pack("l*"),
314
+ ],
315
+ [
316
+ memory_view.format,
317
+ memory_view.item_size,
318
+ memory_view.byte_size,
319
+ memory_view.to_s,
320
+ ])
321
+ end
322
+ end
323
+
324
+ test("Time64Array") do
325
+ values = [1, 2, 3]
326
+ array = Arrow::Time64Array.new(:nano, values)
327
+ Fiddle::MemoryView.export(array) do |memory_view|
328
+ assert_equal([
329
+ "q",
330
+ 8,
331
+ 8 * values.size,
332
+ values.pack("q*"),
333
+ ],
334
+ [
335
+ memory_view.format,
336
+ memory_view.item_size,
337
+ memory_view.byte_size,
338
+ memory_view.to_s,
339
+ ])
340
+ end
341
+ end
342
+
343
+ test("TimestampArray") do
344
+ values = [1, 2, 3]
345
+ array = Arrow::TimestampArray.new(:micro, values)
346
+ Fiddle::MemoryView.export(array) do |memory_view|
347
+ assert_equal([
348
+ "q",
349
+ 8,
350
+ 8 * values.size,
351
+ values.pack("q*"),
352
+ ],
353
+ [
354
+ memory_view.format,
355
+ memory_view.item_size,
356
+ memory_view.byte_size,
357
+ memory_view.to_s,
358
+ ])
359
+ end
360
+ end
361
+
362
+ test("Decimal128Array") do
363
+ values = [
364
+ Arrow::Decimal128.new("10.1"),
365
+ Arrow::Decimal128.new("11.1"),
366
+ Arrow::Decimal128.new("10.2"),
367
+ ]
368
+ data_type = Arrow::Decimal128DataType.new(3, 1)
369
+ array = Arrow::Decimal128Array.new(data_type, values)
370
+ Fiddle::MemoryView.export(array) do |memory_view|
371
+ assert_equal([
372
+ "q2",
373
+ 16,
374
+ 16 * values.size,
375
+ values.collect {|value| value.to_bytes.to_s}.join(""),
376
+ ],
377
+ [
378
+ memory_view.format,
379
+ memory_view.item_size,
380
+ memory_view.byte_size,
381
+ memory_view.to_s,
382
+ ])
383
+ end
384
+ end
385
+
386
+ test("Decimal256Array") do
387
+ values = [
388
+ Arrow::Decimal256.new("10.1"),
389
+ Arrow::Decimal256.new("11.1"),
390
+ Arrow::Decimal256.new("10.2"),
391
+ ]
392
+ data_type = Arrow::Decimal256DataType.new(3, 1)
393
+ array = Arrow::Decimal256Array.new(data_type, values)
394
+ Fiddle::MemoryView.export(array) do |memory_view|
395
+ assert_equal([
396
+ "q4",
397
+ 32,
398
+ 32 * values.size,
399
+ values.collect {|value| value.to_bytes.to_s}.join(""),
400
+ ],
401
+ [
402
+ memory_view.format,
403
+ memory_view.item_size,
404
+ memory_view.byte_size,
405
+ memory_view.to_s,
406
+ ])
407
+ end
408
+ end
409
+
410
+ test("Buffer") do
411
+ values = [0, nil, nil] * 3
412
+ array = Arrow::Int8Array.new(values)
413
+ buffer = array.null_bitmap
414
+ Fiddle::MemoryView.export(buffer) do |memory_view|
415
+ if little_endian?
416
+ template = "b"
417
+ else
418
+ template = "B"
419
+ end
420
+ assert_equal([
421
+ "#{template}8",
422
+ 1,
423
+ 2,
424
+ ["100" * 3].pack("#{template}*"),
425
+ ],
426
+ [
427
+ memory_view.format,
428
+ memory_view.item_size,
429
+ memory_view.byte_size,
430
+ memory_view.to_s,
431
+ ])
432
+ end
433
+ end
434
+ end
data/test/test-orc.rb CHANGED
@@ -118,39 +118,35 @@ class ORCTest < Test::Unit::TestCase
118
118
  ]
119
119
  ],
120
120
  [
121
- "map: list<item: " +
122
- "struct<key: string, value: " +
123
- "struct<int1: int32, string1: string>>>",
121
+ "map: map<string, struct<int1: int32, string1: string>>",
124
122
  [
125
123
  <<-MAP.chomp
126
124
  [
125
+ keys:
126
+ []
127
+ values:
127
128
  -- is_valid: all not null
128
- -- child 0 type: string
129
+ -- child 0 type: int32
129
130
  []
130
- -- child 1 type: struct<int1: int32, string1: string>
131
- -- is_valid: all not null
132
- -- child 0 type: int32
133
- []
134
- -- child 1 type: string
135
- [],
131
+ -- child 1 type: string
132
+ [],
133
+ keys:
134
+ [
135
+ "chani",
136
+ "mauddib"
137
+ ]
138
+ values:
136
139
  -- is_valid: all not null
137
- -- child 0 type: string
140
+ -- child 0 type: int32
141
+ [
142
+ 5,
143
+ 1
144
+ ]
145
+ -- child 1 type: string
138
146
  [
139
147
  "chani",
140
148
  "mauddib"
141
149
  ]
142
- -- child 1 type: struct<int1: int32, string1: string>
143
- -- is_valid: all not null
144
- -- child 0 type: int32
145
- [
146
- 5,
147
- 1
148
- ]
149
- -- child 1 type: string
150
- [
151
- "chani",
152
- "mauddib"
153
- ]
154
150
  ]
155
151
  MAP
156
152
  ],
@@ -0,0 +1,46 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class TestRecordBatchReader < Test::Unit::TestCase
19
+ sub_test_case(".try_convert") do
20
+ test("Arrow::RecordBatch") do
21
+ record_batch =
22
+ Arrow::RecordBatch.new("count" => [1, 2, 3],
23
+ "private" => [true, false, true])
24
+ reader = Arrow::RecordBatchReader.try_convert(record_batch)
25
+ assert_equal(record_batch,
26
+ reader.read_next)
27
+ end
28
+
29
+ test("[Arrow::RecordBatch]") do
30
+ record_batch =
31
+ Arrow::RecordBatch.new("count" => [1, 2, 3],
32
+ "private" => [true, false, true])
33
+ reader = Arrow::RecordBatchReader.try_convert([record_batch])
34
+ assert_equal(record_batch,
35
+ reader.read_next)
36
+ end
37
+
38
+ test("Arrow::Table") do
39
+ table = Arrow::Table.new("count" => [1, 2, 3],
40
+ "private" => [true, false, true])
41
+ reader = Arrow::RecordBatchReader.try_convert(table)
42
+ assert_equal(table,
43
+ reader.read_all)
44
+ end
45
+ end
46
+ end
@@ -136,5 +136,47 @@ class RecordBatchTest < Test::Unit::TestCase
136
136
  end
137
137
  end
138
138
  end
139
+
140
+ sub_test_case("#[]") do
141
+ def setup
142
+ @record_batch = Arrow::RecordBatch.new(a: [true],
143
+ b: [true],
144
+ c: [true],
145
+ d: [true],
146
+ e: [true],
147
+ f: [true],
148
+ g: [true])
149
+ end
150
+
151
+ test("[String]") do
152
+ assert_equal(Arrow::Column.new(@record_batch, 0),
153
+ @record_batch["a"])
154
+ end
155
+
156
+ test("[Symbol]") do
157
+ assert_equal(Arrow::Column.new(@record_batch, 1),
158
+ @record_batch[:b])
159
+ end
160
+
161
+ test("[Integer]") do
162
+ assert_equal(Arrow::Column.new(@record_batch, 6),
163
+ @record_batch[-1])
164
+ end
165
+
166
+ test("[Range]") do
167
+ assert_equal(Arrow::RecordBatch.new(d: [true],
168
+ e: [true]),
169
+ @record_batch[3..4])
170
+ end
171
+
172
+ test("[[Symbol, String, Integer, Range]]") do
173
+ assert_equal(Arrow::RecordBatch.new(c: [true],
174
+ a: [true],
175
+ g: [true],
176
+ d: [true],
177
+ e: [true]),
178
+ @record_batch[[:c, "a", -1, 3..4]])
179
+ end
180
+ end
139
181
  end
140
182
  end