red-arrow 4.0.1 → 7.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +10 -0
  3. data/README.md +23 -0
  4. data/ext/arrow/arrow.cpp +3 -0
  5. data/ext/arrow/converters.cpp +5 -0
  6. data/ext/arrow/converters.hpp +126 -0
  7. data/ext/arrow/extconf.rb +13 -0
  8. data/ext/arrow/memory-view.cpp +311 -0
  9. data/ext/arrow/memory-view.hpp +26 -0
  10. data/ext/arrow/raw-records.cpp +1 -0
  11. data/ext/arrow/values.cpp +1 -0
  12. data/lib/arrow/aggregate-node-options.rb +35 -0
  13. data/lib/arrow/aggregation.rb +46 -0
  14. data/lib/arrow/array-builder.rb +5 -0
  15. data/lib/arrow/array.rb +12 -0
  16. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  17. data/lib/arrow/buffer.rb +10 -6
  18. data/lib/arrow/column-containable.rb +100 -1
  19. data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
  20. data/lib/arrow/datum.rb +102 -0
  21. data/lib/arrow/equal-options.rb +38 -0
  22. data/lib/arrow/expression.rb +48 -0
  23. data/lib/arrow/file-system.rb +34 -0
  24. data/lib/arrow/function.rb +52 -0
  25. data/lib/arrow/group.rb +116 -124
  26. data/lib/arrow/loader.rb +58 -0
  27. data/lib/arrow/map-array-builder.rb +109 -0
  28. data/lib/arrow/map-array.rb +26 -0
  29. data/lib/arrow/map-data-type.rb +89 -0
  30. data/lib/arrow/path-extension.rb +1 -1
  31. data/lib/arrow/record-batch-reader.rb +41 -0
  32. data/lib/arrow/record-batch.rb +0 -2
  33. data/lib/arrow/s3-global-options.rb +38 -0
  34. data/lib/arrow/scalar.rb +32 -0
  35. data/lib/arrow/slicer.rb +44 -143
  36. data/lib/arrow/sort-key.rb +61 -55
  37. data/lib/arrow/sort-options.rb +8 -8
  38. data/lib/arrow/source-node-options.rb +32 -0
  39. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  40. data/lib/arrow/symbol-values-appendable.rb +34 -0
  41. data/lib/arrow/table-concatenate-options.rb +36 -0
  42. data/lib/arrow/table-formatter.rb +141 -17
  43. data/lib/arrow/table-list-formatter.rb +5 -3
  44. data/lib/arrow/table-loader.rb +119 -44
  45. data/lib/arrow/table-saver.rb +36 -5
  46. data/lib/arrow/table-table-formatter.rb +7 -31
  47. data/lib/arrow/table.rb +112 -40
  48. data/lib/arrow/version.rb +1 -1
  49. data/red-arrow.gemspec +1 -9
  50. data/test/helper.rb +3 -0
  51. data/test/raw-records/test-dense-union-array.rb +14 -0
  52. data/test/raw-records/test-list-array.rb +19 -0
  53. data/test/raw-records/test-map-array.rb +441 -0
  54. data/test/raw-records/test-sparse-union-array.rb +14 -0
  55. data/test/raw-records/test-struct-array.rb +15 -0
  56. data/test/test-array-builder.rb +7 -0
  57. data/test/test-array.rb +34 -0
  58. data/test/test-binary-dictionary-array-builder.rb +103 -0
  59. data/test/test-boolean-scalar.rb +26 -0
  60. data/test/test-csv-loader.rb +8 -8
  61. data/test/test-expression.rb +40 -0
  62. data/test/test-float-scalar.rb +46 -0
  63. data/test/test-function.rb +210 -0
  64. data/test/test-group.rb +75 -51
  65. data/test/test-map-array-builder.rb +110 -0
  66. data/test/test-map-array.rb +33 -0
  67. data/test/test-map-data-type.rb +36 -0
  68. data/test/test-memory-view.rb +434 -0
  69. data/test/test-record-batch-reader.rb +46 -0
  70. data/test/test-record-batch.rb +42 -0
  71. data/test/test-slicer.rb +166 -167
  72. data/test/test-string-dictionary-array-builder.rb +103 -0
  73. data/test/test-table.rb +376 -56
  74. data/test/values/test-dense-union-array.rb +14 -0
  75. data/test/values/test-list-array.rb +17 -0
  76. data/test/values/test-map-array.rb +433 -0
  77. data/test/values/test-sparse-union-array.rb +14 -0
  78. data/test/values/test-struct-array.rb +15 -0
  79. metadata +117 -168
@@ -0,0 +1,434 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class MemoryViewTest < Test::Unit::TestCase
19
+ def setup
20
+ unless Fiddle.const_defined?(:MemoryView)
21
+ omit("Fiddle::MemoryView is needed")
22
+ end
23
+ unless Fiddle::MemoryView.respond_to?(:export)
24
+ omit("Fiddle::MemoryView.export is needed")
25
+ end
26
+ end
27
+
28
+ def little_endian?
29
+ [1].pack("s") == [1].pack("s<")
30
+ end
31
+
32
+ test("BooleanArray") do
33
+ array = Arrow::BooleanArray.new([true] * 9)
34
+ Fiddle::MemoryView.export(array) do |memory_view|
35
+ if little_endian?
36
+ template = "b"
37
+ else
38
+ template = "B"
39
+ end
40
+ assert_equal([
41
+ "#{template}8",
42
+ 1,
43
+ 2,
44
+ [(("1" * 9) + ("0" * 7))].pack("#{template}*"),
45
+ ],
46
+ [
47
+ memory_view.format,
48
+ memory_view.item_size,
49
+ memory_view.byte_size,
50
+ memory_view.to_s,
51
+ ])
52
+ end
53
+ end
54
+
55
+ test("Int8Array") do
56
+ values = [-(2 ** 7), 0, (2 ** 7) - 1]
57
+ array = Arrow::Int8Array.new(values)
58
+ Fiddle::MemoryView.export(array) do |memory_view|
59
+ assert_equal([
60
+ "c",
61
+ 1,
62
+ values.size,
63
+ values.pack("c*"),
64
+ ],
65
+ [
66
+ memory_view.format,
67
+ memory_view.item_size,
68
+ memory_view.byte_size,
69
+ memory_view.to_s,
70
+ ])
71
+ end
72
+ end
73
+
74
+ test("Int16Array") do
75
+ values = [-(2 ** 15), 0, (2 ** 15) - 1]
76
+ array = Arrow::Int16Array.new(values)
77
+ Fiddle::MemoryView.export(array) do |memory_view|
78
+ assert_equal([
79
+ "s",
80
+ 2,
81
+ 2 * values.size,
82
+ values.pack("s*"),
83
+ ],
84
+ [
85
+ memory_view.format,
86
+ memory_view.item_size,
87
+ memory_view.byte_size,
88
+ memory_view.to_s,
89
+ ])
90
+ end
91
+ end
92
+
93
+ test("Int32Array") do
94
+ values = [-(2 ** 31), 0, (2 ** 31) - 1]
95
+ array = Arrow::Int32Array.new(values)
96
+ Fiddle::MemoryView.export(array) do |memory_view|
97
+ assert_equal([
98
+ "l",
99
+ 4,
100
+ 4 * values.size,
101
+ values.pack("l*"),
102
+ ],
103
+ [
104
+ memory_view.format,
105
+ memory_view.item_size,
106
+ memory_view.byte_size,
107
+ memory_view.to_s,
108
+ ])
109
+ end
110
+ end
111
+
112
+ test("Int64Array") do
113
+ values = [-(2 ** 63), 0, (2 ** 63) - 1]
114
+ array = Arrow::Int64Array.new(values)
115
+ Fiddle::MemoryView.export(array) do |memory_view|
116
+ assert_equal([
117
+ "q",
118
+ 8,
119
+ 8 * values.size,
120
+ values.pack("q*"),
121
+ ],
122
+ [
123
+ memory_view.format,
124
+ memory_view.item_size,
125
+ memory_view.byte_size,
126
+ memory_view.to_s,
127
+ ])
128
+ end
129
+ end
130
+
131
+ test("UInt8Array") do
132
+ values = [0, (2 ** 8) - 1]
133
+ array = Arrow::UInt8Array.new(values)
134
+ Fiddle::MemoryView.export(array) do |memory_view|
135
+ assert_equal([
136
+ "C",
137
+ 1,
138
+ values.size,
139
+ values.pack("C*"),
140
+ ],
141
+ [
142
+ memory_view.format,
143
+ memory_view.item_size,
144
+ memory_view.byte_size,
145
+ memory_view.to_s,
146
+ ])
147
+ end
148
+ end
149
+
150
+ test("UInt16Array") do
151
+ values = [0, (2 ** 16) - 1]
152
+ array = Arrow::UInt16Array.new(values)
153
+ Fiddle::MemoryView.export(array) do |memory_view|
154
+ assert_equal([
155
+ "S",
156
+ 2,
157
+ 2 * values.size,
158
+ values.pack("S*"),
159
+ ],
160
+ [
161
+ memory_view.format,
162
+ memory_view.item_size,
163
+ memory_view.byte_size,
164
+ memory_view.to_s,
165
+ ])
166
+ end
167
+ end
168
+
169
+ test("UInt32Array") do
170
+ values = [0, (2 ** 32) - 1]
171
+ array = Arrow::UInt32Array.new(values)
172
+ Fiddle::MemoryView.export(array) do |memory_view|
173
+ assert_equal([
174
+ "L",
175
+ 4,
176
+ 4 * values.size,
177
+ values.pack("L*"),
178
+ ],
179
+ [
180
+ memory_view.format,
181
+ memory_view.item_size,
182
+ memory_view.byte_size,
183
+ memory_view.to_s,
184
+ ])
185
+ end
186
+ end
187
+
188
+ test("UInt64Array") do
189
+ values = [(2 ** 64) - 1]
190
+ array = Arrow::UInt64Array.new(values)
191
+ Fiddle::MemoryView.export(array) do |memory_view|
192
+ assert_equal([
193
+ "Q",
194
+ 8,
195
+ 8 * values.size,
196
+ values.pack("Q*"),
197
+ ],
198
+ [
199
+ memory_view.format,
200
+ memory_view.item_size,
201
+ memory_view.byte_size,
202
+ memory_view.to_s,
203
+ ])
204
+ end
205
+ end
206
+
207
+ test("FloatArray") do
208
+ values = [-1.1, 0.0, 1.1]
209
+ array = Arrow::FloatArray.new(values)
210
+ Fiddle::MemoryView.export(array) do |memory_view|
211
+ assert_equal([
212
+ "f",
213
+ 4,
214
+ 4 * values.size,
215
+ values.pack("f*"),
216
+ ],
217
+ [
218
+ memory_view.format,
219
+ memory_view.item_size,
220
+ memory_view.byte_size,
221
+ memory_view.to_s,
222
+ ])
223
+ end
224
+ end
225
+
226
+ test("DoubleArray") do
227
+ values = [-1.1, 0.0, 1.1]
228
+ array = Arrow::DoubleArray.new(values)
229
+ Fiddle::MemoryView.export(array) do |memory_view|
230
+ assert_equal([
231
+ "d",
232
+ 8,
233
+ 8 * values.size,
234
+ values.pack("d*"),
235
+ ],
236
+ [
237
+ memory_view.format,
238
+ memory_view.item_size,
239
+ memory_view.byte_size,
240
+ memory_view.to_s,
241
+ ])
242
+ end
243
+ end
244
+
245
+ test("FixedSizeBinaryArray") do
246
+ values = ["\x01\x02", "\x03\x04", "\x05\x06"]
247
+ data_type = Arrow::FixedSizeBinaryDataType.new(2)
248
+ array = Arrow::FixedSizeBinaryArray.new(data_type, values)
249
+ Fiddle::MemoryView.export(array) do |memory_view|
250
+ assert_equal([
251
+ "C2",
252
+ 2,
253
+ 2 * values.size,
254
+ values.join("").b,
255
+ ],
256
+ [
257
+ memory_view.format,
258
+ memory_view.item_size,
259
+ memory_view.byte_size,
260
+ memory_view.to_s,
261
+ ])
262
+ end
263
+ end
264
+
265
+ test("Date32Array") do
266
+ n_days_since_epoch = 17406 # 2017-08-28
267
+ values = [n_days_since_epoch]
268
+ array = Arrow::Date32Array.new(values)
269
+ Fiddle::MemoryView.export(array) do |memory_view|
270
+ assert_equal([
271
+ "l",
272
+ 4,
273
+ 4 * values.size,
274
+ values.pack("l*"),
275
+ ],
276
+ [
277
+ memory_view.format,
278
+ memory_view.item_size,
279
+ memory_view.byte_size,
280
+ memory_view.to_s,
281
+ ])
282
+ end
283
+ end
284
+
285
+ test("Date64Array") do
286
+ n_msecs_since_epoch = 1503878400000 # 2017-08-28T00:00:00Z
287
+ values = [n_msecs_since_epoch]
288
+ array = Arrow::Date64Array.new(values)
289
+ Fiddle::MemoryView.export(array) do |memory_view|
290
+ assert_equal([
291
+ "q",
292
+ 8,
293
+ 8 * values.size,
294
+ values.pack("q*"),
295
+ ],
296
+ [
297
+ memory_view.format,
298
+ memory_view.item_size,
299
+ memory_view.byte_size,
300
+ memory_view.to_s,
301
+ ])
302
+ end
303
+ end
304
+
305
+ test("Time32Array") do
306
+ values = [1, 2, 3]
307
+ array = Arrow::Time32Array.new(:milli, values)
308
+ Fiddle::MemoryView.export(array) do |memory_view|
309
+ assert_equal([
310
+ "l",
311
+ 4,
312
+ 4 * values.size,
313
+ values.pack("l*"),
314
+ ],
315
+ [
316
+ memory_view.format,
317
+ memory_view.item_size,
318
+ memory_view.byte_size,
319
+ memory_view.to_s,
320
+ ])
321
+ end
322
+ end
323
+
324
+ test("Time64Array") do
325
+ values = [1, 2, 3]
326
+ array = Arrow::Time64Array.new(:nano, values)
327
+ Fiddle::MemoryView.export(array) do |memory_view|
328
+ assert_equal([
329
+ "q",
330
+ 8,
331
+ 8 * values.size,
332
+ values.pack("q*"),
333
+ ],
334
+ [
335
+ memory_view.format,
336
+ memory_view.item_size,
337
+ memory_view.byte_size,
338
+ memory_view.to_s,
339
+ ])
340
+ end
341
+ end
342
+
343
+ test("TimestampArray") do
344
+ values = [1, 2, 3]
345
+ array = Arrow::TimestampArray.new(:micro, values)
346
+ Fiddle::MemoryView.export(array) do |memory_view|
347
+ assert_equal([
348
+ "q",
349
+ 8,
350
+ 8 * values.size,
351
+ values.pack("q*"),
352
+ ],
353
+ [
354
+ memory_view.format,
355
+ memory_view.item_size,
356
+ memory_view.byte_size,
357
+ memory_view.to_s,
358
+ ])
359
+ end
360
+ end
361
+
362
+ test("Decimal128Array") do
363
+ values = [
364
+ Arrow::Decimal128.new("10.1"),
365
+ Arrow::Decimal128.new("11.1"),
366
+ Arrow::Decimal128.new("10.2"),
367
+ ]
368
+ data_type = Arrow::Decimal128DataType.new(3, 1)
369
+ array = Arrow::Decimal128Array.new(data_type, values)
370
+ Fiddle::MemoryView.export(array) do |memory_view|
371
+ assert_equal([
372
+ "q2",
373
+ 16,
374
+ 16 * values.size,
375
+ values.collect {|value| value.to_bytes.to_s}.join(""),
376
+ ],
377
+ [
378
+ memory_view.format,
379
+ memory_view.item_size,
380
+ memory_view.byte_size,
381
+ memory_view.to_s,
382
+ ])
383
+ end
384
+ end
385
+
386
+ test("Decimal256Array") do
387
+ values = [
388
+ Arrow::Decimal256.new("10.1"),
389
+ Arrow::Decimal256.new("11.1"),
390
+ Arrow::Decimal256.new("10.2"),
391
+ ]
392
+ data_type = Arrow::Decimal256DataType.new(3, 1)
393
+ array = Arrow::Decimal256Array.new(data_type, values)
394
+ Fiddle::MemoryView.export(array) do |memory_view|
395
+ assert_equal([
396
+ "q4",
397
+ 32,
398
+ 32 * values.size,
399
+ values.collect {|value| value.to_bytes.to_s}.join(""),
400
+ ],
401
+ [
402
+ memory_view.format,
403
+ memory_view.item_size,
404
+ memory_view.byte_size,
405
+ memory_view.to_s,
406
+ ])
407
+ end
408
+ end
409
+
410
+ test("Buffer") do
411
+ values = [0, nil, nil] * 3
412
+ array = Arrow::Int8Array.new(values)
413
+ buffer = array.null_bitmap
414
+ Fiddle::MemoryView.export(buffer) do |memory_view|
415
+ if little_endian?
416
+ template = "b"
417
+ else
418
+ template = "B"
419
+ end
420
+ assert_equal([
421
+ "#{template}8",
422
+ 1,
423
+ 2,
424
+ ["100" * 3].pack("#{template}*"),
425
+ ],
426
+ [
427
+ memory_view.format,
428
+ memory_view.item_size,
429
+ memory_view.byte_size,
430
+ memory_view.to_s,
431
+ ])
432
+ end
433
+ end
434
+ end
@@ -0,0 +1,46 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class TestRecordBatchReader < Test::Unit::TestCase
19
+ sub_test_case(".try_convert") do
20
+ test("Arrow::RecordBatch") do
21
+ record_batch =
22
+ Arrow::RecordBatch.new("count" => [1, 2, 3],
23
+ "private" => [true, false, true])
24
+ reader = Arrow::RecordBatchReader.try_convert(record_batch)
25
+ assert_equal(record_batch,
26
+ reader.read_next)
27
+ end
28
+
29
+ test("[Arrow::RecordBatch]") do
30
+ record_batch =
31
+ Arrow::RecordBatch.new("count" => [1, 2, 3],
32
+ "private" => [true, false, true])
33
+ reader = Arrow::RecordBatchReader.try_convert([record_batch])
34
+ assert_equal(record_batch,
35
+ reader.read_next)
36
+ end
37
+
38
+ test("Arrow::Table") do
39
+ table = Arrow::Table.new("count" => [1, 2, 3],
40
+ "private" => [true, false, true])
41
+ reader = Arrow::RecordBatchReader.try_convert(table)
42
+ assert_equal(table,
43
+ reader.read_all)
44
+ end
45
+ end
46
+ end
@@ -136,5 +136,47 @@ class RecordBatchTest < Test::Unit::TestCase
136
136
  end
137
137
  end
138
138
  end
139
+
140
+ sub_test_case("#[]") do
141
+ def setup
142
+ @record_batch = Arrow::RecordBatch.new(a: [true],
143
+ b: [true],
144
+ c: [true],
145
+ d: [true],
146
+ e: [true],
147
+ f: [true],
148
+ g: [true])
149
+ end
150
+
151
+ test("[String]") do
152
+ assert_equal(Arrow::Column.new(@record_batch, 0),
153
+ @record_batch["a"])
154
+ end
155
+
156
+ test("[Symbol]") do
157
+ assert_equal(Arrow::Column.new(@record_batch, 1),
158
+ @record_batch[:b])
159
+ end
160
+
161
+ test("[Integer]") do
162
+ assert_equal(Arrow::Column.new(@record_batch, 6),
163
+ @record_batch[-1])
164
+ end
165
+
166
+ test("[Range]") do
167
+ assert_equal(Arrow::RecordBatch.new(d: [true],
168
+ e: [true]),
169
+ @record_batch[3..4])
170
+ end
171
+
172
+ test("[[Symbol, String, Integer, Range]]") do
173
+ assert_equal(Arrow::RecordBatch.new(c: [true],
174
+ a: [true],
175
+ g: [true],
176
+ d: [true],
177
+ e: [true]),
178
+ @record_batch[[:c, "a", -1, 3..4]])
179
+ end
180
+ end
139
181
  end
140
182
  end