red-arrow 0.11.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of red-arrow might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/Rakefile +4 -0
- data/lib/arrow/array-builder.rb +6 -2
- data/lib/arrow/array.rb +6 -2
- data/{test/test-csv-reader.rb → lib/arrow/compression-type.rb} +16 -13
- data/lib/arrow/csv-loader.rb +102 -2
- data/lib/arrow/csv-read-options.rb +25 -0
- data/lib/arrow/data-type.rb +135 -0
- data/lib/arrow/decimal128-array-builder.rb +64 -0
- data/lib/arrow/decimal128-data-type.rb +69 -0
- data/lib/arrow/dense-union-data-type.rb +90 -0
- data/lib/arrow/dictionary-data-type.rb +106 -0
- data/lib/arrow/field-containable.rb +35 -0
- data/lib/arrow/field.rb +92 -8
- data/lib/arrow/file-output-stream.rb +34 -0
- data/lib/arrow/list-array-builder.rb +96 -0
- data/lib/arrow/list-data-type.rb +68 -0
- data/lib/arrow/loader.rb +30 -5
- data/lib/arrow/{csv-reader.rb → path-extension.rb} +19 -28
- data/lib/arrow/record-batch-builder.rb +115 -0
- data/lib/arrow/record-batch.rb +25 -0
- data/lib/arrow/schema.rb +97 -0
- data/lib/arrow/sparse-union-data-type.rb +90 -0
- data/lib/arrow/struct-array-builder.rb +146 -0
- data/lib/arrow/struct-array.rb +34 -0
- data/lib/arrow/struct-data-type.rb +130 -0
- data/lib/arrow/struct.rb +68 -0
- data/lib/arrow/table-loader.rb +65 -25
- data/lib/arrow/table-saver.rb +73 -24
- data/lib/arrow/table.rb +11 -2
- data/lib/arrow/time32-data-type.rb +61 -0
- data/lib/arrow/time64-data-type.rb +61 -0
- data/lib/arrow/timestamp-data-type.rb +57 -0
- data/lib/arrow/version.rb +5 -7
- data/lib/arrow/writable.rb +22 -0
- data/red-arrow.gemspec +8 -4
- data/test/helper.rb +1 -2
- data/test/test-csv-loader.rb +27 -0
- data/test/test-data-type.rb +47 -0
- data/test/test-decimal128-array-builder.rb +95 -0
- data/test/test-decimal128-array.rb +38 -0
- data/test/test-decimal128-data-type.rb +31 -0
- data/test/test-dense-union-data-type.rb +41 -0
- data/test/test-dictionary-data-type.rb +40 -0
- data/test/test-feather.rb +34 -0
- data/test/test-field.rb +71 -0
- data/test/test-file-output-stream.rb +54 -0
- data/test/test-list-array-builder.rb +79 -0
- data/test/test-list-array.rb +32 -0
- data/test/test-list-data-type.rb +43 -0
- data/test/test-record-batch-builder.rb +116 -0
- data/test/test-record-batch.rb +82 -27
- data/test/test-schema.rb +104 -0
- data/test/test-sparse-union-data-type.rb +41 -0
- data/test/test-struct-array-builder.rb +180 -0
- data/test/test-struct-array.rb +60 -15
- data/test/test-struct-data-type.rb +112 -0
- data/test/test-struct.rb +81 -0
- data/test/test-table.rb +165 -29
- data/test/test-time32-data-type.rb +42 -0
- data/test/test-time64-data-type.rb +42 -0
- data/test/test-timestamp-data-type.rb +42 -0
- metadata +99 -10
data/test/test-record-batch.rb
CHANGED
@@ -16,42 +16,97 @@
|
|
16
16
|
# under the License.
|
17
17
|
|
18
18
|
class RecordBatchTest < Test::Unit::TestCase
|
19
|
-
sub_test_case(".
|
20
|
-
setup
|
21
|
-
|
22
|
-
|
19
|
+
sub_test_case(".new") do
|
20
|
+
def setup
|
21
|
+
@schema = Arrow::Schema.new(visible: :boolean,
|
22
|
+
count: :uint32)
|
23
|
+
end
|
24
|
+
|
25
|
+
test("[Schema, records]") do
|
26
|
+
records = [
|
27
|
+
{visible: true, count: 1},
|
28
|
+
nil,
|
29
|
+
[false, 3],
|
23
30
|
]
|
24
|
-
|
25
|
-
|
26
|
-
|
31
|
+
record_batch = Arrow::RecordBatch.new(@schema, records)
|
32
|
+
assert_equal([
|
33
|
+
{"visible" => true, "count" => 1},
|
34
|
+
{"visible" => nil, "count" => nil},
|
35
|
+
{"visible" => false, "count" => 3},
|
36
|
+
],
|
37
|
+
record_batch.each_record.collect(&:to_h))
|
27
38
|
end
|
28
39
|
|
29
|
-
test("
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
40
|
+
test("[Schema, columns]") do
|
41
|
+
columns = {
|
42
|
+
visible: [true, nil, false],
|
43
|
+
count: [1, 2, nil],
|
44
|
+
}
|
45
|
+
record_batch = Arrow::RecordBatch.new(@schema, columns)
|
34
46
|
assert_equal([
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
[3, 3],
|
47
|
+
{"visible" => true, "count" => 1},
|
48
|
+
{"visible" => nil, "count" => 2},
|
49
|
+
{"visible" => false, "count" => nil},
|
39
50
|
],
|
40
|
-
|
51
|
+
record_batch.each_record.collect(&:to_h))
|
41
52
|
end
|
42
53
|
|
43
|
-
test("
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
54
|
+
test("[Schema, n_rows, columns]") do
|
55
|
+
columns = [
|
56
|
+
Arrow::BooleanArray.new([true, nil, false]),
|
57
|
+
Arrow::UInt32Array.new([1, 2, nil]),
|
58
|
+
]
|
59
|
+
n_rows = columns[0].length
|
60
|
+
record_batch = Arrow::RecordBatch.new(@schema, n_rows, columns)
|
48
61
|
assert_equal([
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
[3, 3],
|
62
|
+
{"visible" => true, "count" => 1},
|
63
|
+
{"visible" => nil, "count" => 2},
|
64
|
+
{"visible" => false, "count" => nil},
|
53
65
|
],
|
54
|
-
|
66
|
+
record_batch.each_record.collect(&:to_h))
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
sub_test_case("instance methods") do
|
71
|
+
def setup
|
72
|
+
@schema = Arrow::Schema.new(count: :uint32)
|
73
|
+
@counts = Arrow::UInt32Array.new([1, 2, 4, 8])
|
74
|
+
@record_batch = Arrow::RecordBatch.new(@schema, @counts.length, [@counts])
|
75
|
+
end
|
76
|
+
|
77
|
+
sub_test_case("#each") do
|
78
|
+
test("default") do
|
79
|
+
records = []
|
80
|
+
@record_batch.each do |record|
|
81
|
+
records << [record, record.index]
|
82
|
+
end
|
83
|
+
assert_equal([
|
84
|
+
[0, 0],
|
85
|
+
[1, 1],
|
86
|
+
[2, 2],
|
87
|
+
[3, 3],
|
88
|
+
],
|
89
|
+
records.collect {|record, i| [record.index, i]})
|
90
|
+
end
|
91
|
+
|
92
|
+
test("reuse_record: true") do
|
93
|
+
records = []
|
94
|
+
@record_batch.each(reuse_record: true) do |record|
|
95
|
+
records << [record, record.index]
|
96
|
+
end
|
97
|
+
assert_equal([
|
98
|
+
[3, 0],
|
99
|
+
[3, 1],
|
100
|
+
[3, 2],
|
101
|
+
[3, 3],
|
102
|
+
],
|
103
|
+
records.collect {|record, i| [record.index, i]})
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
test("#to_table") do
|
108
|
+
assert_equal(Arrow::Table.new(@schema, [@counts]),
|
109
|
+
@record_batch.to_table)
|
55
110
|
end
|
56
111
|
end
|
57
112
|
end
|
data/test/test-schema.rb
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
class SchemaTest < Test::Unit::TestCase
|
19
|
+
def setup
|
20
|
+
@count_field = Arrow::Field.new("count", :uint32)
|
21
|
+
@visible_field = Arrow::Field.new("visible", :boolean)
|
22
|
+
end
|
23
|
+
|
24
|
+
sub_test_case(".new") do
|
25
|
+
test("[Arrow::Field]") do
|
26
|
+
fields = [
|
27
|
+
@count_field,
|
28
|
+
@visible_field,
|
29
|
+
]
|
30
|
+
assert_equal("count: uint32\n" +
|
31
|
+
"visible: bool",
|
32
|
+
Arrow::Schema.new(fields).to_s)
|
33
|
+
end
|
34
|
+
|
35
|
+
test("[Arrow::Field, Hash]") do
|
36
|
+
fields = [
|
37
|
+
@count_field,
|
38
|
+
{name: "visible", type: :boolean},
|
39
|
+
]
|
40
|
+
assert_equal("count: uint32\n" +
|
41
|
+
"visible: bool",
|
42
|
+
Arrow::Schema.new(fields).to_s)
|
43
|
+
end
|
44
|
+
|
45
|
+
test("{String, Symbol => Arrow::DataType}") do
|
46
|
+
fields = {
|
47
|
+
"count" => Arrow::UInt32DataType.new,
|
48
|
+
:visible => :boolean,
|
49
|
+
}
|
50
|
+
assert_equal("count: uint32\n" +
|
51
|
+
"visible: bool",
|
52
|
+
Arrow::Schema.new(fields).to_s)
|
53
|
+
end
|
54
|
+
|
55
|
+
test("{String, Symbol => Hash}") do
|
56
|
+
fields = {
|
57
|
+
"count" => {type: :uint32},
|
58
|
+
:tags => {
|
59
|
+
type: :list,
|
60
|
+
field: {
|
61
|
+
name: "tag",
|
62
|
+
type: :string,
|
63
|
+
},
|
64
|
+
},
|
65
|
+
}
|
66
|
+
assert_equal("count: uint32\n" +
|
67
|
+
"tags: list<tag: string>",
|
68
|
+
Arrow::Schema.new(fields).to_s)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
sub_test_case("instance methods") do
|
73
|
+
def setup
|
74
|
+
super
|
75
|
+
@schema = Arrow::Schema.new([@count_field, @visible_field])
|
76
|
+
end
|
77
|
+
|
78
|
+
sub_test_case("#[]") do
|
79
|
+
test("[String]") do
|
80
|
+
assert_equal([@count_field, @visible_field],
|
81
|
+
[@schema["count"], @schema["visible"]])
|
82
|
+
end
|
83
|
+
|
84
|
+
test("[Symbol]") do
|
85
|
+
assert_equal([@count_field, @visible_field],
|
86
|
+
[@schema[:count], @schema[:visible]])
|
87
|
+
end
|
88
|
+
|
89
|
+
test("[Integer]") do
|
90
|
+
assert_equal([@count_field, @visible_field],
|
91
|
+
[@schema[0], @schema[1]])
|
92
|
+
end
|
93
|
+
|
94
|
+
test("[invalid]") do
|
95
|
+
invalid = []
|
96
|
+
message = "field name or index must be String, Symbol or Integer"
|
97
|
+
message << ": <#{invalid.inspect}>"
|
98
|
+
assert_raise(ArgumentError.new(message)) do
|
99
|
+
@schema[invalid]
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
class SparseUnionDataTypeTest < Test::Unit::TestCase
|
19
|
+
sub_test_case(".new") do
|
20
|
+
def setup
|
21
|
+
@fields = [
|
22
|
+
Arrow::Field.new("visible", :boolean),
|
23
|
+
{
|
24
|
+
name: "count",
|
25
|
+
type: :int32,
|
26
|
+
},
|
27
|
+
]
|
28
|
+
end
|
29
|
+
|
30
|
+
test("ordered arguments") do
|
31
|
+
assert_equal("union[sparse]<visible: bool=2, count: int32=9>",
|
32
|
+
Arrow::SparseUnionDataType.new(@fields, [2, 9]).to_s)
|
33
|
+
end
|
34
|
+
|
35
|
+
test("description") do
|
36
|
+
assert_equal("union[sparse]<visible: bool=2, count: int32=9>",
|
37
|
+
Arrow::SparseUnionDataType.new(fields: @fields,
|
38
|
+
type_codes: [2, 9]).to_s)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,180 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
class StructArrayBuilderTest < Test::Unit::TestCase
|
19
|
+
def setup
|
20
|
+
@data_type = Arrow::StructDataType.new(visible: {type: :boolean},
|
21
|
+
count: {type: :uint64})
|
22
|
+
@builder = Arrow::StructArrayBuilder.new(@data_type)
|
23
|
+
end
|
24
|
+
|
25
|
+
sub_test_case("#append_value") do
|
26
|
+
test("nil") do
|
27
|
+
@builder.append_value(nil)
|
28
|
+
array = @builder.finish
|
29
|
+
assert_equal([
|
30
|
+
[nil],
|
31
|
+
[nil],
|
32
|
+
],
|
33
|
+
[
|
34
|
+
array.find_field(0).to_a,
|
35
|
+
array.find_field(1).to_a,
|
36
|
+
])
|
37
|
+
end
|
38
|
+
|
39
|
+
test("Array") do
|
40
|
+
@builder.append_value([true, 1])
|
41
|
+
array = @builder.finish
|
42
|
+
assert_equal([
|
43
|
+
[true],
|
44
|
+
[1],
|
45
|
+
],
|
46
|
+
[
|
47
|
+
array.find_field(0).to_a,
|
48
|
+
array.find_field(1).to_a,
|
49
|
+
])
|
50
|
+
end
|
51
|
+
|
52
|
+
test("Arrow::Struct") do
|
53
|
+
source_array = Arrow::StructArray.new(@data_type, [[true, 1]])
|
54
|
+
struct = source_array.get_value(0)
|
55
|
+
@builder.append_value(struct)
|
56
|
+
array = @builder.finish
|
57
|
+
assert_equal([
|
58
|
+
[true],
|
59
|
+
[1],
|
60
|
+
],
|
61
|
+
[
|
62
|
+
array.find_field(0).to_a,
|
63
|
+
array.find_field(1).to_a,
|
64
|
+
])
|
65
|
+
end
|
66
|
+
|
67
|
+
test("Hash") do
|
68
|
+
@builder.append_value(count: 1, visible: true)
|
69
|
+
array = @builder.finish
|
70
|
+
assert_equal([
|
71
|
+
[true],
|
72
|
+
[1],
|
73
|
+
],
|
74
|
+
[
|
75
|
+
array.find_field(0).to_a,
|
76
|
+
array.find_field(1).to_a,
|
77
|
+
])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
sub_test_case("#append_values") do
|
82
|
+
test("[nil]") do
|
83
|
+
@builder.append_values([nil])
|
84
|
+
array = @builder.finish
|
85
|
+
assert_equal([
|
86
|
+
[nil],
|
87
|
+
[nil],
|
88
|
+
],
|
89
|
+
[
|
90
|
+
array.find_field(0).to_a,
|
91
|
+
array.find_field(1).to_a,
|
92
|
+
])
|
93
|
+
end
|
94
|
+
|
95
|
+
test("[Array]") do
|
96
|
+
@builder.append_values([[true, 1]])
|
97
|
+
array = @builder.finish
|
98
|
+
assert_equal([
|
99
|
+
[true],
|
100
|
+
[1],
|
101
|
+
],
|
102
|
+
[
|
103
|
+
array.find_field(0).to_a,
|
104
|
+
array.find_field(1).to_a,
|
105
|
+
])
|
106
|
+
end
|
107
|
+
|
108
|
+
test("[Hash]") do
|
109
|
+
@builder.append_values([{count: 1, visible: true}])
|
110
|
+
array = @builder.finish
|
111
|
+
assert_equal([
|
112
|
+
[true],
|
113
|
+
[1],
|
114
|
+
],
|
115
|
+
[
|
116
|
+
array.find_field(0).to_a,
|
117
|
+
array.find_field(1).to_a,
|
118
|
+
])
|
119
|
+
end
|
120
|
+
|
121
|
+
test("[nil, Array, Hash]") do
|
122
|
+
@builder.append_values([
|
123
|
+
nil,
|
124
|
+
[true, 1],
|
125
|
+
{count: 2, visible: false},
|
126
|
+
])
|
127
|
+
array = @builder.finish
|
128
|
+
assert_equal([
|
129
|
+
[nil, true, false],
|
130
|
+
[nil, 1, 2],
|
131
|
+
],
|
132
|
+
[
|
133
|
+
array.find_field(0).to_a,
|
134
|
+
array.find_field(1).to_a,
|
135
|
+
])
|
136
|
+
end
|
137
|
+
|
138
|
+
test("is_valids") do
|
139
|
+
@builder.append_values([
|
140
|
+
[true, 1],
|
141
|
+
[false, 2],
|
142
|
+
[true, 3],
|
143
|
+
],
|
144
|
+
[
|
145
|
+
true,
|
146
|
+
false,
|
147
|
+
true,
|
148
|
+
])
|
149
|
+
array = @builder.finish
|
150
|
+
assert_equal([
|
151
|
+
[true, nil, true],
|
152
|
+
[1, nil, 3],
|
153
|
+
],
|
154
|
+
[
|
155
|
+
array.find_field(0).to_a,
|
156
|
+
array.find_field(1).to_a,
|
157
|
+
])
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
sub_test_case("#append") do
|
162
|
+
test("backward compatibility") do
|
163
|
+
@builder.append
|
164
|
+
@builder.get_field_builder(0).append(true)
|
165
|
+
@builder.get_field_builder(1).append(1)
|
166
|
+
@builder.append
|
167
|
+
@builder.get_field_builder(0).append(false)
|
168
|
+
@builder.get_field_builder(1).append(2)
|
169
|
+
array = @builder.finish
|
170
|
+
assert_equal([
|
171
|
+
[true, 1],
|
172
|
+
[false, 2],
|
173
|
+
],
|
174
|
+
[
|
175
|
+
array.get_value(0).values,
|
176
|
+
array.get_value(1).values,
|
177
|
+
])
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|