red-arrow 0.15.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +28 -16
- data/ext/arrow/converters.hpp +63 -33
- data/ext/arrow/raw-records.cpp +2 -1
- data/ext/arrow/values.cpp +2 -1
- data/lib/arrow/array-builder.rb +101 -52
- data/lib/arrow/array.rb +28 -10
- data/lib/arrow/{binary-array-builder.rb → buffer.rb} +7 -15
- data/lib/arrow/chunked-array.rb +2 -0
- data/lib/arrow/csv-loader.rb +15 -3
- data/lib/arrow/csv-read-options.rb +18 -0
- data/lib/arrow/data-type.rb +35 -2
- data/lib/arrow/decimal128-array-builder.rb +0 -2
- data/lib/arrow/dictionary-array.rb +24 -0
- data/lib/arrow/field.rb +1 -1
- data/lib/arrow/generic-filterable.rb +43 -0
- data/lib/arrow/generic-takeable.rb +38 -0
- data/lib/arrow/list-data-type.rb +58 -8
- data/lib/arrow/loader.rb +12 -1
- data/lib/arrow/null-array-builder.rb +1 -1
- data/lib/arrow/null-array.rb +24 -0
- data/lib/arrow/raw-table-converter.rb +47 -0
- data/lib/arrow/record-batch-iterator.rb +22 -0
- data/lib/arrow/record-batch.rb +8 -3
- data/lib/arrow/schema.rb +5 -2
- data/lib/arrow/struct-array-builder.rb +13 -7
- data/lib/arrow/struct-data-type.rb +0 -2
- data/lib/arrow/table-loader.rb +29 -6
- data/lib/arrow/table-saver.rb +37 -13
- data/lib/arrow/table.rb +20 -73
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +4 -2
- data/test/helper.rb +1 -0
- data/test/helper/omittable.rb +36 -0
- data/test/raw-records/test-dense-union-array.rb +1 -34
- data/test/raw-records/test-sparse-union-array.rb +1 -33
- data/test/run-test.rb +14 -3
- data/test/test-array-builder.rb +17 -0
- data/test/test-array.rb +104 -0
- data/test/test-buffer.rb +11 -0
- data/test/test-chunked-array.rb +96 -0
- data/test/test-csv-loader.rb +77 -2
- data/test/test-data-type.rb +11 -0
- data/test/test-dense-union-data-type.rb +2 -2
- data/test/test-dictionary-array.rb +41 -0
- data/test/test-feather.rb +21 -6
- data/test/test-list-data-type.rb +27 -1
- data/test/test-null-array.rb +23 -0
- data/test/test-record-batch-iterator.rb +37 -0
- data/test/test-record-batch.rb +14 -0
- data/test/test-schema.rb +16 -0
- data/test/test-slicer.rb +74 -30
- data/test/test-sparse-union-data-type.rb +2 -2
- data/test/test-struct-array-builder.rb +8 -4
- data/test/test-table.rb +153 -14
- data/test/test-timestamp-array.rb +19 -0
- data/test/values/test-dense-union-array.rb +1 -34
- data/test/values/test-sparse-union-array.rb +1 -33
- metadata +76 -63
data/test/test-feather.rb
CHANGED
@@ -18,17 +18,32 @@
|
|
18
18
|
class FeatherTest < Test::Unit::TestCase
|
19
19
|
include Helper::Fixture
|
20
20
|
|
21
|
-
def
|
21
|
+
def setup
|
22
22
|
columns = {
|
23
23
|
"message" => Arrow::StringArray.new(["Start", "Crash", "Shutdown"]),
|
24
24
|
"is_critical" => Arrow::BooleanArray.new([false, true, false]),
|
25
25
|
}
|
26
|
-
table = Arrow::Table.new(columns)
|
26
|
+
@table = Arrow::Table.new(columns)
|
27
27
|
|
28
|
-
output = Tempfile.new(["red-arrow", ".feather"])
|
29
|
-
|
30
|
-
|
28
|
+
@output = Tempfile.new(["red-arrow", ".feather"])
|
29
|
+
begin
|
30
|
+
yield(@output)
|
31
|
+
ensure
|
32
|
+
@output.close!
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_default
|
37
|
+
@table.save(@output.path)
|
38
|
+
@output.close
|
39
|
+
|
40
|
+
assert_equal(@table, Arrow::Table.load(@output.path))
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_compression
|
44
|
+
@table.save(@output.path, compression: :zstd)
|
45
|
+
@output.close
|
31
46
|
|
32
|
-
assert_equal(table, Arrow::Table.load(output.path))
|
47
|
+
assert_equal(@table, Arrow::Table.load(@output.path))
|
33
48
|
end
|
34
49
|
end
|
data/test/test-list-data-type.rb
CHANGED
@@ -23,7 +23,7 @@ class ListDataTypeTest < Test::Unit::TestCase
|
|
23
23
|
Arrow::ListDataType.new(field).to_s)
|
24
24
|
end
|
25
25
|
|
26
|
-
test("
|
26
|
+
test("name: String") do
|
27
27
|
assert_equal("list<tag: string>",
|
28
28
|
Arrow::ListDataType.new(name: "tag", type: :string).to_s)
|
29
29
|
end
|
@@ -39,5 +39,31 @@ class ListDataTypeTest < Test::Unit::TestCase
|
|
39
39
|
assert_equal("list<tag: string>",
|
40
40
|
Arrow::ListDataType.new(field: field_description).to_s)
|
41
41
|
end
|
42
|
+
|
43
|
+
test("Arrow::DataType") do
|
44
|
+
data_type = Arrow::BooleanDataType.new
|
45
|
+
assert_equal("list<item: bool>",
|
46
|
+
Arrow::ListDataType.new(data_type).to_s)
|
47
|
+
end
|
48
|
+
|
49
|
+
test("String") do
|
50
|
+
assert_equal("list<item: bool>",
|
51
|
+
Arrow::ListDataType.new("boolean").to_s)
|
52
|
+
end
|
53
|
+
|
54
|
+
test("Symbol") do
|
55
|
+
assert_equal("list<item: bool>",
|
56
|
+
Arrow::ListDataType.new(:boolean).to_s)
|
57
|
+
end
|
58
|
+
|
59
|
+
test("[data type name, additional information]") do
|
60
|
+
assert_equal("list<item: time32[ms]>",
|
61
|
+
Arrow::ListDataType.new([:time32, :milli]).to_s)
|
62
|
+
end
|
63
|
+
|
64
|
+
test("type: Symbol") do
|
65
|
+
assert_equal("list<item: bool>",
|
66
|
+
Arrow::ListDataType.new(type: :boolean).to_s)
|
67
|
+
end
|
42
68
|
end
|
43
69
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
class NullArrayTest < Test::Unit::TestCase
|
19
|
+
test("#[]") do
|
20
|
+
array = Arrow::NullArray.new(1)
|
21
|
+
assert_nil(array[0])
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
class RecordBatchIteratorTest < Test::Unit::TestCase
|
19
|
+
def setup
|
20
|
+
@schema = Arrow::Schema.new(visible: :boolean,
|
21
|
+
count: :uint32)
|
22
|
+
@record_batches = [
|
23
|
+
Arrow::RecordBatch.new(@schema,
|
24
|
+
visible: [true],
|
25
|
+
count: [1]),
|
26
|
+
Arrow::RecordBatch.new(@schema,
|
27
|
+
visible: [false, nil],
|
28
|
+
count: [nil, 3]),
|
29
|
+
]
|
30
|
+
@iterator = Arrow::RecordBatchIterator.new(@record_batches)
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_to_a
|
34
|
+
assert_equal(@record_batches,
|
35
|
+
@iterator.to_a)
|
36
|
+
end
|
37
|
+
end
|
data/test/test-record-batch.rb
CHANGED
@@ -22,6 +22,20 @@ class RecordBatchTest < Test::Unit::TestCase
|
|
22
22
|
count: :uint32)
|
23
23
|
end
|
24
24
|
|
25
|
+
test("[raw_table]") do
|
26
|
+
raw_table = {
|
27
|
+
visible: [true, nil, false],
|
28
|
+
count: [1, nil, 3],
|
29
|
+
}
|
30
|
+
record_batch = Arrow::RecordBatch.new(raw_table)
|
31
|
+
assert_equal([
|
32
|
+
{"visible" => true, "count" => 1},
|
33
|
+
{"visible" => nil, "count" => nil},
|
34
|
+
{"visible" => false, "count" => 3},
|
35
|
+
],
|
36
|
+
record_batch.each_record.collect(&:to_h))
|
37
|
+
end
|
38
|
+
|
25
39
|
test("[Schema, records]") do
|
26
40
|
records = [
|
27
41
|
{visible: true, count: 1},
|
data/test/test-schema.rb
CHANGED
@@ -16,6 +16,8 @@
|
|
16
16
|
# under the License.
|
17
17
|
|
18
18
|
class SchemaTest < Test::Unit::TestCase
|
19
|
+
include Helper::Omittable
|
20
|
+
|
19
21
|
def setup
|
20
22
|
@count_field = Arrow::Field.new("count", :uint32)
|
21
23
|
@visible_field = Arrow::Field.new("visible", :boolean)
|
@@ -114,5 +116,19 @@ class SchemaTest < Test::Unit::TestCase
|
|
114
116
|
end
|
115
117
|
end
|
116
118
|
end
|
119
|
+
|
120
|
+
sub_test_case("#to_s") do
|
121
|
+
test("show_metadata") do
|
122
|
+
require_gi_bindings(3, 4, 2)
|
123
|
+
|
124
|
+
schema = @schema.with_metadata("key" => "value")
|
125
|
+
assert_equal(<<-SCHEMA.chomp, schema.to_s(show_metadata: true))
|
126
|
+
count: uint32
|
127
|
+
visible: bool
|
128
|
+
-- metadata --
|
129
|
+
key: value
|
130
|
+
SCHEMA
|
131
|
+
end
|
132
|
+
end
|
117
133
|
end
|
118
134
|
end
|
data/test/test-slicer.rb
CHANGED
@@ -46,10 +46,14 @@ class SlicerTest < Test::Unit::TestCase
|
|
46
46
|
end
|
47
47
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
48
48
|
count visible
|
49
|
-
0
|
50
|
-
1
|
51
|
-
2
|
52
|
-
3
|
49
|
+
0
|
50
|
+
1 1 true
|
51
|
+
2
|
52
|
+
3 8 true
|
53
|
+
4 16 true
|
54
|
+
5
|
55
|
+
6
|
56
|
+
7 256 true
|
53
57
|
TABLE
|
54
58
|
end
|
55
59
|
|
@@ -66,7 +70,8 @@ class SlicerTest < Test::Unit::TestCase
|
|
66
70
|
4 16 true
|
67
71
|
5 32 false
|
68
72
|
6 64
|
69
|
-
7
|
73
|
+
7
|
74
|
+
8 256 true
|
70
75
|
TABLE
|
71
76
|
end
|
72
77
|
end
|
@@ -78,8 +83,12 @@ class SlicerTest < Test::Unit::TestCase
|
|
78
83
|
end
|
79
84
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
80
85
|
count visible
|
81
|
-
0
|
82
|
-
1
|
86
|
+
0
|
87
|
+
1 2 false
|
88
|
+
2
|
89
|
+
3 32 false
|
90
|
+
4
|
91
|
+
5
|
83
92
|
TABLE
|
84
93
|
end
|
85
94
|
|
@@ -90,6 +99,7 @@ class SlicerTest < Test::Unit::TestCase
|
|
90
99
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
91
100
|
count visible
|
92
101
|
0 0
|
102
|
+
1
|
93
103
|
TABLE
|
94
104
|
end
|
95
105
|
end
|
@@ -142,10 +152,14 @@ class SlicerTest < Test::Unit::TestCase
|
|
142
152
|
end
|
143
153
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
144
154
|
count visible
|
145
|
-
0
|
146
|
-
1
|
147
|
-
2
|
148
|
-
3
|
155
|
+
0
|
156
|
+
1 1 true
|
157
|
+
2
|
158
|
+
3 8 true
|
159
|
+
4 16 true
|
160
|
+
5
|
161
|
+
6
|
162
|
+
7 256 true
|
149
163
|
TABLE
|
150
164
|
end
|
151
165
|
end
|
@@ -172,8 +186,12 @@ class SlicerTest < Test::Unit::TestCase
|
|
172
186
|
end
|
173
187
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
174
188
|
count visible
|
175
|
-
0
|
176
|
-
1
|
189
|
+
0
|
190
|
+
1 2 false
|
191
|
+
2
|
192
|
+
3 32 false
|
193
|
+
4
|
194
|
+
5
|
177
195
|
TABLE
|
178
196
|
end
|
179
197
|
end
|
@@ -200,8 +218,12 @@ class SlicerTest < Test::Unit::TestCase
|
|
200
218
|
end
|
201
219
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
202
220
|
count visible
|
203
|
-
0
|
204
|
-
1
|
221
|
+
0
|
222
|
+
1 2 false
|
223
|
+
2
|
224
|
+
3 32 false
|
225
|
+
4
|
226
|
+
5
|
205
227
|
TABLE
|
206
228
|
end
|
207
229
|
end
|
@@ -217,6 +239,7 @@ class SlicerTest < Test::Unit::TestCase
|
|
217
239
|
2 2 false
|
218
240
|
3 4
|
219
241
|
4 8 true
|
242
|
+
5
|
220
243
|
TABLE
|
221
244
|
end
|
222
245
|
|
@@ -229,7 +252,8 @@ class SlicerTest < Test::Unit::TestCase
|
|
229
252
|
0 16 true
|
230
253
|
1 32 false
|
231
254
|
2 64
|
232
|
-
3
|
255
|
+
3
|
256
|
+
4 256 true
|
233
257
|
TABLE
|
234
258
|
end
|
235
259
|
|
@@ -245,6 +269,7 @@ class SlicerTest < Test::Unit::TestCase
|
|
245
269
|
3 4
|
246
270
|
4 8 true
|
247
271
|
5 16 true
|
272
|
+
6
|
248
273
|
TABLE
|
249
274
|
end
|
250
275
|
|
@@ -256,7 +281,8 @@ class SlicerTest < Test::Unit::TestCase
|
|
256
281
|
count visible
|
257
282
|
0 32 false
|
258
283
|
1 64
|
259
|
-
2
|
284
|
+
2
|
285
|
+
3 256 true
|
260
286
|
TABLE
|
261
287
|
end
|
262
288
|
|
@@ -268,7 +294,8 @@ class SlicerTest < Test::Unit::TestCase
|
|
268
294
|
count visible
|
269
295
|
0 32 false
|
270
296
|
1 64
|
271
|
-
2
|
297
|
+
2
|
298
|
+
3 256 true
|
272
299
|
TABLE
|
273
300
|
end
|
274
301
|
|
@@ -284,6 +311,7 @@ class SlicerTest < Test::Unit::TestCase
|
|
284
311
|
3 4
|
285
312
|
4 8 true
|
286
313
|
5 16 true
|
314
|
+
6
|
287
315
|
TABLE
|
288
316
|
end
|
289
317
|
|
@@ -296,7 +324,8 @@ class SlicerTest < Test::Unit::TestCase
|
|
296
324
|
0 16 true
|
297
325
|
1 32 false
|
298
326
|
2 64
|
299
|
-
3
|
327
|
+
3
|
328
|
+
4 256 true
|
300
329
|
TABLE
|
301
330
|
end
|
302
331
|
|
@@ -311,6 +340,7 @@ class SlicerTest < Test::Unit::TestCase
|
|
311
340
|
2 2 false
|
312
341
|
3 4
|
313
342
|
4 8 true
|
343
|
+
5
|
314
344
|
TABLE
|
315
345
|
end
|
316
346
|
|
@@ -324,6 +354,7 @@ class SlicerTest < Test::Unit::TestCase
|
|
324
354
|
1 4
|
325
355
|
2 16 true
|
326
356
|
3 64
|
357
|
+
4
|
327
358
|
TABLE
|
328
359
|
end
|
329
360
|
|
@@ -337,7 +368,8 @@ class SlicerTest < Test::Unit::TestCase
|
|
337
368
|
1 2 false
|
338
369
|
2 8 true
|
339
370
|
3 32 false
|
340
|
-
4
|
371
|
+
4
|
372
|
+
5 256 true
|
341
373
|
TABLE
|
342
374
|
end
|
343
375
|
|
@@ -347,8 +379,12 @@ class SlicerTest < Test::Unit::TestCase
|
|
347
379
|
end
|
348
380
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
349
381
|
count visible
|
350
|
-
0
|
351
|
-
1
|
382
|
+
0
|
383
|
+
1
|
384
|
+
2 16 true
|
385
|
+
3
|
386
|
+
4
|
387
|
+
5 256 true
|
352
388
|
TABLE
|
353
389
|
end
|
354
390
|
|
@@ -358,11 +394,15 @@ class SlicerTest < Test::Unit::TestCase
|
|
358
394
|
end
|
359
395
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
360
396
|
count visible
|
361
|
-
0
|
362
|
-
1
|
363
|
-
2
|
364
|
-
3
|
365
|
-
4
|
397
|
+
0
|
398
|
+
1 1 true
|
399
|
+
2
|
400
|
+
3 8 true
|
401
|
+
4 16 true
|
402
|
+
5 32 false
|
403
|
+
6
|
404
|
+
7
|
405
|
+
8 256 true
|
366
406
|
TABLE
|
367
407
|
end
|
368
408
|
|
@@ -372,9 +412,13 @@ class SlicerTest < Test::Unit::TestCase
|
|
372
412
|
end
|
373
413
|
assert_equal(<<-TABLE, sliced_table.to_s)
|
374
414
|
count visible
|
375
|
-
0
|
376
|
-
1
|
377
|
-
2
|
415
|
+
0
|
416
|
+
1 1 true
|
417
|
+
2
|
418
|
+
3 8 true
|
419
|
+
4 32 false
|
420
|
+
5
|
421
|
+
6
|
378
422
|
TABLE
|
379
423
|
end
|
380
424
|
|
@@ -28,12 +28,12 @@ class SparseUnionDataTypeTest < Test::Unit::TestCase
|
|
28
28
|
end
|
29
29
|
|
30
30
|
test("ordered arguments") do
|
31
|
-
assert_equal("
|
31
|
+
assert_equal("sparse_union<visible: bool=2, count: int32=9>",
|
32
32
|
Arrow::SparseUnionDataType.new(@fields, [2, 9]).to_s)
|
33
33
|
end
|
34
34
|
|
35
35
|
test("description") do
|
36
|
-
assert_equal("
|
36
|
+
assert_equal("sparse_union<visible: bool=2, count: int32=9>",
|
37
37
|
Arrow::SparseUnionDataType.new(fields: @fields,
|
38
38
|
type_codes: [2, 9]).to_s)
|
39
39
|
end
|
@@ -38,10 +38,12 @@ class StructArrayBuilderTest < Test::Unit::TestCase
|
|
38
38
|
|
39
39
|
test("Array") do
|
40
40
|
@builder.append_value([true, 1])
|
41
|
+
@builder.append_value([])
|
42
|
+
@builder.append_value([false])
|
41
43
|
array = @builder.finish
|
42
44
|
assert_equal([
|
43
|
-
[true],
|
44
|
-
[1],
|
45
|
+
[true, nil, false],
|
46
|
+
[1, nil, nil],
|
45
47
|
],
|
46
48
|
[
|
47
49
|
array.find_field(0).to_a,
|
@@ -66,10 +68,12 @@ class StructArrayBuilderTest < Test::Unit::TestCase
|
|
66
68
|
|
67
69
|
test("Hash") do
|
68
70
|
@builder.append_value(count: 1, visible: true)
|
71
|
+
@builder.append_value(visible: false)
|
72
|
+
@builder.append_value(count: 2)
|
69
73
|
array = @builder.finish
|
70
74
|
assert_equal([
|
71
|
-
[true],
|
72
|
-
[1],
|
75
|
+
[true, false, nil],
|
76
|
+
[1, nil, 2],
|
73
77
|
],
|
74
78
|
[
|
75
79
|
array.find_field(0).to_a,
|