red-arrow 0.15.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +28 -16
- data/ext/arrow/converters.hpp +63 -33
- data/ext/arrow/raw-records.cpp +2 -1
- data/ext/arrow/values.cpp +2 -1
- data/lib/arrow/array-builder.rb +101 -52
- data/lib/arrow/array.rb +28 -10
- data/lib/arrow/{binary-array-builder.rb → buffer.rb} +7 -15
- data/lib/arrow/chunked-array.rb +2 -0
- data/lib/arrow/csv-loader.rb +15 -3
- data/lib/arrow/csv-read-options.rb +18 -0
- data/lib/arrow/data-type.rb +35 -2
- data/lib/arrow/decimal128-array-builder.rb +0 -2
- data/lib/arrow/dictionary-array.rb +24 -0
- data/lib/arrow/field.rb +1 -1
- data/lib/arrow/generic-filterable.rb +43 -0
- data/lib/arrow/generic-takeable.rb +38 -0
- data/lib/arrow/list-data-type.rb +58 -8
- data/lib/arrow/loader.rb +12 -1
- data/lib/arrow/null-array-builder.rb +1 -1
- data/lib/arrow/null-array.rb +24 -0
- data/lib/arrow/raw-table-converter.rb +47 -0
- data/lib/arrow/record-batch-iterator.rb +22 -0
- data/lib/arrow/record-batch.rb +8 -3
- data/lib/arrow/schema.rb +5 -2
- data/lib/arrow/struct-array-builder.rb +13 -7
- data/lib/arrow/struct-data-type.rb +0 -2
- data/lib/arrow/table-loader.rb +29 -6
- data/lib/arrow/table-saver.rb +37 -13
- data/lib/arrow/table.rb +20 -73
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +4 -2
- data/test/helper.rb +1 -0
- data/test/helper/omittable.rb +36 -0
- data/test/raw-records/test-dense-union-array.rb +1 -34
- data/test/raw-records/test-sparse-union-array.rb +1 -33
- data/test/run-test.rb +14 -3
- data/test/test-array-builder.rb +17 -0
- data/test/test-array.rb +104 -0
- data/test/test-buffer.rb +11 -0
- data/test/test-chunked-array.rb +96 -0
- data/test/test-csv-loader.rb +77 -2
- data/test/test-data-type.rb +11 -0
- data/test/test-dense-union-data-type.rb +2 -2
- data/test/test-dictionary-array.rb +41 -0
- data/test/test-feather.rb +21 -6
- data/test/test-list-data-type.rb +27 -1
- data/test/test-null-array.rb +23 -0
- data/test/test-record-batch-iterator.rb +37 -0
- data/test/test-record-batch.rb +14 -0
- data/test/test-schema.rb +16 -0
- data/test/test-slicer.rb +74 -30
- data/test/test-sparse-union-data-type.rb +2 -2
- data/test/test-struct-array-builder.rb +8 -4
- data/test/test-table.rb +153 -14
- data/test/test-timestamp-array.rb +19 -0
- data/test/values/test-dense-union-array.rb +1 -34
- data/test/values/test-sparse-union-array.rb +1 -33
- metadata +76 -63
data/test/test-feather.rb
CHANGED
@@ -18,17 +18,32 @@
|
|
18
18
|
class FeatherTest < Test::Unit::TestCase
|
19
19
|
include Helper::Fixture
|
20
20
|
|
21
|
-
def
|
21
|
+
def setup
|
22
22
|
columns = {
|
23
23
|
"message" => Arrow::StringArray.new(["Start", "Crash", "Shutdown"]),
|
24
24
|
"is_critical" => Arrow::BooleanArray.new([false, true, false]),
|
25
25
|
}
|
26
|
-
table = Arrow::Table.new(columns)
|
26
|
+
@table = Arrow::Table.new(columns)
|
27
27
|
|
28
|
-
output = Tempfile.new(["red-arrow", ".feather"])
|
29
|
-
|
30
|
-
|
28
|
+
@output = Tempfile.new(["red-arrow", ".feather"])
|
29
|
+
begin
|
30
|
+
yield(@output)
|
31
|
+
ensure
|
32
|
+
@output.close!
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_default
|
37
|
+
@table.save(@output.path)
|
38
|
+
@output.close
|
39
|
+
|
40
|
+
assert_equal(@table, Arrow::Table.load(@output.path))
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_compression
|
44
|
+
@table.save(@output.path, compression: :zstd)
|
45
|
+
@output.close
|
31
46
|
|
32
|
-
assert_equal(table, Arrow::Table.load(output.path))
|
47
|
+
assert_equal(@table, Arrow::Table.load(@output.path))
|
33
48
|
end
|
34
49
|
end
|
data/test/test-list-data-type.rb
CHANGED
@@ -23,7 +23,7 @@ class ListDataTypeTest < Test::Unit::TestCase
|
|
23
23
|
Arrow::ListDataType.new(field).to_s)
|
24
24
|
end
|
25
25
|
|
26
|
-
test("
|
26
|
+
test("name: String") do
|
27
27
|
assert_equal("list<tag: string>",
|
28
28
|
Arrow::ListDataType.new(name: "tag", type: :string).to_s)
|
29
29
|
end
|
@@ -39,5 +39,31 @@ class ListDataTypeTest < Test::Unit::TestCase
|
|
39
39
|
assert_equal("list<tag: string>",
|
40
40
|
Arrow::ListDataType.new(field: field_description).to_s)
|
41
41
|
end
|
42
|
+
|
43
|
+
test("Arrow::DataType") do
|
44
|
+
data_type = Arrow::BooleanDataType.new
|
45
|
+
assert_equal("list<item: bool>",
|
46
|
+
Arrow::ListDataType.new(data_type).to_s)
|
47
|
+
end
|
48
|
+
|
49
|
+
test("String") do
|
50
|
+
assert_equal("list<item: bool>",
|
51
|
+
Arrow::ListDataType.new("boolean").to_s)
|
52
|
+
end
|
53
|
+
|
54
|
+
test("Symbol") do
|
55
|
+
assert_equal("list<item: bool>",
|
56
|
+
Arrow::ListDataType.new(:boolean).to_s)
|
57
|
+
end
|
58
|
+
|
59
|
+
test("[data type name, additional information]") do
|
60
|
+
assert_equal("list<item: time32[ms]>",
|
61
|
+
Arrow::ListDataType.new([:time32, :milli]).to_s)
|
62
|
+
end
|
63
|
+
|
64
|
+
test("type: Symbol") do
|
65
|
+
assert_equal("list<item: bool>",
|
66
|
+
Arrow::ListDataType.new(type: :boolean).to_s)
|
67
|
+
end
|
42
68
|
end
|
43
69
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
class NullArrayTest < Test::Unit::TestCase
|
19
|
+
test("#[]") do
|
20
|
+
array = Arrow::NullArray.new(1)
|
21
|
+
assert_nil(array[0])
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
class RecordBatchIteratorTest < Test::Unit::TestCase
|
19
|
+
def setup
|
20
|
+
@schema = Arrow::Schema.new(visible: :boolean,
|
21
|
+
count: :uint32)
|
22
|
+
@record_batches = [
|
23
|
+
Arrow::RecordBatch.new(@schema,
|
24
|
+
visible: [true],
|
25
|
+
count: [1]),
|
26
|
+
Arrow::RecordBatch.new(@schema,
|
27
|
+
visible: [false, nil],
|
28
|
+
count: [nil, 3]),
|
29
|
+
]
|
30
|
+
@iterator = Arrow::RecordBatchIterator.new(@record_batches)
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_to_a
|
34
|
+
assert_equal(@record_batches,
|
35
|
+
@iterator.to_a)
|
36
|
+
end
|
37
|
+
end
|
data/test/test-record-batch.rb
CHANGED
@@ -22,6 +22,20 @@ class RecordBatchTest < Test::Unit::TestCase
|
|
22
22
|
count: :uint32)
|
23
23
|
end
|
24
24
|
|
25
|
+
test("[raw_table]") do
|
26
|
+
raw_table = {
|
27
|
+
visible: [true, nil, false],
|
28
|
+
count: [1, nil, 3],
|
29
|
+
}
|
30
|
+
record_batch = Arrow::RecordBatch.new(raw_table)
|
31
|
+
assert_equal([
|
32
|
+
{"visible" => true, "count" => 1},
|
33
|
+
{"visible" => nil, "count" => nil},
|
34
|
+
{"visible" => false, "count" => 3},
|
35
|
+
],
|
36
|
+
record_batch.each_record.collect(&:to_h))
|
37
|
+
end
|
38
|
+
|
25
39
|
test("[Schema, records]") do
|
26
40
|
records = [
|
27
41
|
{visible: true, count: 1},
|
data/test/test-schema.rb
CHANGED
@@ -16,6 +16,8 @@
|
|
16
16
|
# under the License.
|
17
17
|
|
18
18
|
class SchemaTest < Test::Unit::TestCase
|
19
|
+
include Helper::Omittable
|
20
|
+
|
19
21
|
def setup
|
20
22
|
@count_field = Arrow::Field.new("count", :uint32)
|
21
23
|
@visible_field = Arrow::Field.new("visible", :boolean)
|
@@ -114,5 +116,19 @@ class SchemaTest < Test::Unit::TestCase
|
|
114
116
|
end
|
115
117
|
end
|
116
118
|
end
|
119
|
+
|
120
|
+
sub_test_case("#to_s") do
|
121
|
+
test("show_metadata") do
|
122
|
+
require_gi_bindings(3, 4, 2)
|
123
|
+
|
124
|
+
schema = @schema.with_metadata("key" => "value")
|
125
|
+
assert_equal(<<-SCHEMA.chomp, schema.to_s(show_metadata: true))
|
126
|
+
count: uint32
|
127
|
+
visible: bool
|
128
|
+
-- metadata --
|
129
|
+
key: value
|
130
|
+
SCHEMA
|
131
|
+
end
|
132
|
+
end
|
117
133
|
end
|
118
134
|
end
|
data/test/test-slicer.rb
CHANGED
@@ -46,10 +46,14 @@ class SlicerTest < Test::Unit::TestCase
|
|
46
46
|
end
|
47
47
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
48
48
|
count visible
|
49
|
-
0
|
50
|
-
1
|
51
|
-
2
|
52
|
-
3
|
49
|
+
0
|
50
|
+
1 1 true
|
51
|
+
2
|
52
|
+
3 8 true
|
53
|
+
4 16 true
|
54
|
+
5
|
55
|
+
6
|
56
|
+
7 256 true
|
53
57
|
TABLE
|
54
58
|
end
|
55
59
|
|
@@ -66,7 +70,8 @@ class SlicerTest < Test::Unit::TestCase
|
|
66
70
|
4 16 true
|
67
71
|
5 32 false
|
68
72
|
6 64
|
69
|
-
7
|
73
|
+
7
|
74
|
+
8 256 true
|
70
75
|
TABLE
|
71
76
|
end
|
72
77
|
end
|
@@ -78,8 +83,12 @@ class SlicerTest < Test::Unit::TestCase
|
|
78
83
|
end
|
79
84
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
80
85
|
count visible
|
81
|
-
0
|
82
|
-
1
|
86
|
+
0
|
87
|
+
1 2 false
|
88
|
+
2
|
89
|
+
3 32 false
|
90
|
+
4
|
91
|
+
5
|
83
92
|
TABLE
|
84
93
|
end
|
85
94
|
|
@@ -90,6 +99,7 @@ class SlicerTest < Test::Unit::TestCase
|
|
90
99
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
91
100
|
count visible
|
92
101
|
0 0
|
102
|
+
1
|
93
103
|
TABLE
|
94
104
|
end
|
95
105
|
end
|
@@ -142,10 +152,14 @@ class SlicerTest < Test::Unit::TestCase
|
|
142
152
|
end
|
143
153
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
144
154
|
count visible
|
145
|
-
0
|
146
|
-
1
|
147
|
-
2
|
148
|
-
3
|
155
|
+
0
|
156
|
+
1 1 true
|
157
|
+
2
|
158
|
+
3 8 true
|
159
|
+
4 16 true
|
160
|
+
5
|
161
|
+
6
|
162
|
+
7 256 true
|
149
163
|
TABLE
|
150
164
|
end
|
151
165
|
end
|
@@ -172,8 +186,12 @@ class SlicerTest < Test::Unit::TestCase
|
|
172
186
|
end
|
173
187
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
174
188
|
count visible
|
175
|
-
0
|
176
|
-
1
|
189
|
+
0
|
190
|
+
1 2 false
|
191
|
+
2
|
192
|
+
3 32 false
|
193
|
+
4
|
194
|
+
5
|
177
195
|
TABLE
|
178
196
|
end
|
179
197
|
end
|
@@ -200,8 +218,12 @@ class SlicerTest < Test::Unit::TestCase
|
|
200
218
|
end
|
201
219
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
202
220
|
count visible
|
203
|
-
0
|
204
|
-
1
|
221
|
+
0
|
222
|
+
1 2 false
|
223
|
+
2
|
224
|
+
3 32 false
|
225
|
+
4
|
226
|
+
5
|
205
227
|
TABLE
|
206
228
|
end
|
207
229
|
end
|
@@ -217,6 +239,7 @@ class SlicerTest < Test::Unit::TestCase
|
|
217
239
|
2 2 false
|
218
240
|
3 4
|
219
241
|
4 8 true
|
242
|
+
5
|
220
243
|
TABLE
|
221
244
|
end
|
222
245
|
|
@@ -229,7 +252,8 @@ class SlicerTest < Test::Unit::TestCase
|
|
229
252
|
0 16 true
|
230
253
|
1 32 false
|
231
254
|
2 64
|
232
|
-
3
|
255
|
+
3
|
256
|
+
4 256 true
|
233
257
|
TABLE
|
234
258
|
end
|
235
259
|
|
@@ -245,6 +269,7 @@ class SlicerTest < Test::Unit::TestCase
|
|
245
269
|
3 4
|
246
270
|
4 8 true
|
247
271
|
5 16 true
|
272
|
+
6
|
248
273
|
TABLE
|
249
274
|
end
|
250
275
|
|
@@ -256,7 +281,8 @@ class SlicerTest < Test::Unit::TestCase
|
|
256
281
|
count visible
|
257
282
|
0 32 false
|
258
283
|
1 64
|
259
|
-
2
|
284
|
+
2
|
285
|
+
3 256 true
|
260
286
|
TABLE
|
261
287
|
end
|
262
288
|
|
@@ -268,7 +294,8 @@ class SlicerTest < Test::Unit::TestCase
|
|
268
294
|
count visible
|
269
295
|
0 32 false
|
270
296
|
1 64
|
271
|
-
2
|
297
|
+
2
|
298
|
+
3 256 true
|
272
299
|
TABLE
|
273
300
|
end
|
274
301
|
|
@@ -284,6 +311,7 @@ class SlicerTest < Test::Unit::TestCase
|
|
284
311
|
3 4
|
285
312
|
4 8 true
|
286
313
|
5 16 true
|
314
|
+
6
|
287
315
|
TABLE
|
288
316
|
end
|
289
317
|
|
@@ -296,7 +324,8 @@ class SlicerTest < Test::Unit::TestCase
|
|
296
324
|
0 16 true
|
297
325
|
1 32 false
|
298
326
|
2 64
|
299
|
-
3
|
327
|
+
3
|
328
|
+
4 256 true
|
300
329
|
TABLE
|
301
330
|
end
|
302
331
|
|
@@ -311,6 +340,7 @@ class SlicerTest < Test::Unit::TestCase
|
|
311
340
|
2 2 false
|
312
341
|
3 4
|
313
342
|
4 8 true
|
343
|
+
5
|
314
344
|
TABLE
|
315
345
|
end
|
316
346
|
|
@@ -324,6 +354,7 @@ class SlicerTest < Test::Unit::TestCase
|
|
324
354
|
1 4
|
325
355
|
2 16 true
|
326
356
|
3 64
|
357
|
+
4
|
327
358
|
TABLE
|
328
359
|
end
|
329
360
|
|
@@ -337,7 +368,8 @@ class SlicerTest < Test::Unit::TestCase
|
|
337
368
|
1 2 false
|
338
369
|
2 8 true
|
339
370
|
3 32 false
|
340
|
-
4
|
371
|
+
4
|
372
|
+
5 256 true
|
341
373
|
TABLE
|
342
374
|
end
|
343
375
|
|
@@ -347,8 +379,12 @@ class SlicerTest < Test::Unit::TestCase
|
|
347
379
|
end
|
348
380
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
349
381
|
count visible
|
350
|
-
0
|
351
|
-
1
|
382
|
+
0
|
383
|
+
1
|
384
|
+
2 16 true
|
385
|
+
3
|
386
|
+
4
|
387
|
+
5 256 true
|
352
388
|
TABLE
|
353
389
|
end
|
354
390
|
|
@@ -358,11 +394,15 @@ class SlicerTest < Test::Unit::TestCase
|
|
358
394
|
end
|
359
395
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
360
396
|
count visible
|
361
|
-
0
|
362
|
-
1
|
363
|
-
2
|
364
|
-
3
|
365
|
-
4
|
397
|
+
0
|
398
|
+
1 1 true
|
399
|
+
2
|
400
|
+
3 8 true
|
401
|
+
4 16 true
|
402
|
+
5 32 false
|
403
|
+
6
|
404
|
+
7
|
405
|
+
8 256 true
|
366
406
|
TABLE
|
367
407
|
end
|
368
408
|
|
@@ -372,9 +412,13 @@ class SlicerTest < Test::Unit::TestCase
|
|
372
412
|
end
|
373
413
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
374
414
|
count visible
|
375
|
-
0
|
376
|
-
1
|
377
|
-
2
|
415
|
+
0
|
416
|
+
1 1 true
|
417
|
+
2
|
418
|
+
3 8 true
|
419
|
+
4 32 false
|
420
|
+
5
|
421
|
+
6
|
378
422
|
TABLE
|
379
423
|
end
|
380
424
|
|
@@ -28,12 +28,12 @@ class SparseUnionDataTypeTest < Test::Unit::TestCase
|
|
28
28
|
end
|
29
29
|
|
30
30
|
test("ordered arguments") do
|
31
|
-
assert_equal("
|
31
|
+
assert_equal("sparse_union<visible: bool=2, count: int32=9>",
|
32
32
|
Arrow::SparseUnionDataType.new(@fields, [2, 9]).to_s)
|
33
33
|
end
|
34
34
|
|
35
35
|
test("description") do
|
36
|
-
assert_equal("
|
36
|
+
assert_equal("sparse_union<visible: bool=2, count: int32=9>",
|
37
37
|
Arrow::SparseUnionDataType.new(fields: @fields,
|
38
38
|
type_codes: [2, 9]).to_s)
|
39
39
|
end
|
@@ -38,10 +38,12 @@ class StructArrayBuilderTest < Test::Unit::TestCase
|
|
38
38
|
|
39
39
|
test("Array") do
|
40
40
|
@builder.append_value([true, 1])
|
41
|
+
@builder.append_value([])
|
42
|
+
@builder.append_value([false])
|
41
43
|
array = @builder.finish
|
42
44
|
assert_equal([
|
43
|
-
[true],
|
44
|
-
[1],
|
45
|
+
[true, nil, false],
|
46
|
+
[1, nil, nil],
|
45
47
|
],
|
46
48
|
[
|
47
49
|
array.find_field(0).to_a,
|
@@ -66,10 +68,12 @@ class StructArrayBuilderTest < Test::Unit::TestCase
|
|
66
68
|
|
67
69
|
test("Hash") do
|
68
70
|
@builder.append_value(count: 1, visible: true)
|
71
|
+
@builder.append_value(visible: false)
|
72
|
+
@builder.append_value(count: 2)
|
69
73
|
array = @builder.finish
|
70
74
|
assert_equal([
|
71
|
-
[true],
|
72
|
-
[1],
|
75
|
+
[true, false, nil],
|
76
|
+
[1, nil, 2],
|
73
77
|
],
|
74
78
|
[
|
75
79
|
array.find_field(0).to_a,
|