red-arrow 18.1.0 → 19.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/arrow/array.rb +7 -4
- data/lib/arrow/column.rb +4 -4
- data/lib/arrow/jruby/array-builder.rb +114 -0
- data/lib/arrow/jruby/array.rb +109 -0
- data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
- data/lib/arrow/jruby/compression-type.rb +26 -0
- data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
- data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
- data/lib/arrow/jruby/decimal128.rb +28 -0
- data/lib/arrow/jruby/decimal256.rb +28 -0
- data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
- data/lib/arrow/jruby/file-system.rb +24 -0
- data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
- data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
- data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
- data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
- data/lib/arrow/jruby/sort-options.rb +24 -0
- data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
- data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
- data/lib/arrow/jruby/writable.rb +24 -0
- data/lib/arrow/jruby.rb +52 -0
- data/lib/arrow/libraries.rb +126 -0
- data/lib/arrow/list-array-builder.rb +1 -0
- data/lib/arrow/loader.rb +3 -111
- data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
- data/lib/arrow/version.rb +1 -1
- data/lib/arrow.rb +2 -7
- data/red-arrow.gemspec +15 -6
- metadata +26 -229
- data/test/each-raw-record/test-basic-arrays.rb +0 -411
- data/test/each-raw-record/test-dense-union-array.rb +0 -566
- data/test/each-raw-record/test-dictionary-array.rb +0 -341
- data/test/each-raw-record/test-list-array.rb +0 -628
- data/test/each-raw-record/test-map-array.rb +0 -507
- data/test/each-raw-record/test-multiple-columns.rb +0 -72
- data/test/each-raw-record/test-sparse-union-array.rb +0 -528
- data/test/each-raw-record/test-struct-array.rb +0 -529
- data/test/each-raw-record/test-table.rb +0 -47
- data/test/fixture/TestOrcFile.test1.orc +0 -0
- data/test/fixture/with-header-float.csv +0 -20
- data/test/fixture/with-header.csv +0 -20
- data/test/fixture/without-header-float.csv +0 -19
- data/test/fixture/without-header.csv +0 -19
- data/test/helper/omittable.rb +0 -49
- data/test/helper.rb +0 -31
- data/test/raw-records/test-basic-arrays.rb +0 -405
- data/test/raw-records/test-dense-union-array.rb +0 -566
- data/test/raw-records/test-dictionary-array.rb +0 -341
- data/test/raw-records/test-list-array.rb +0 -628
- data/test/raw-records/test-map-array.rb +0 -507
- data/test/raw-records/test-multiple-columns.rb +0 -65
- data/test/raw-records/test-sparse-union-array.rb +0 -556
- data/test/raw-records/test-struct-array.rb +0 -529
- data/test/raw-records/test-table.rb +0 -47
- data/test/run-test.rb +0 -71
- data/test/test-array-builder.rb +0 -198
- data/test/test-array.rb +0 -332
- data/test/test-bigdecimal.rb +0 -40
- data/test/test-binary-dictionary-array-builder.rb +0 -103
- data/test/test-buffer.rb +0 -49
- data/test/test-chunked-array.rb +0 -198
- data/test/test-column.rb +0 -123
- data/test/test-csv-loader.rb +0 -297
- data/test/test-data-type.rb +0 -84
- data/test/test-date32-array.rb +0 -24
- data/test/test-date64-array.rb +0 -25
- data/test/test-decimal128-array-builder.rb +0 -126
- data/test/test-decimal128-array.rb +0 -47
- data/test/test-decimal128-data-type.rb +0 -31
- data/test/test-decimal128.rb +0 -126
- data/test/test-decimal256-array-builder.rb +0 -126
- data/test/test-decimal256-array.rb +0 -47
- data/test/test-decimal256-data-type.rb +0 -31
- data/test/test-decimal256.rb +0 -126
- data/test/test-dense-union-array.rb +0 -42
- data/test/test-dense-union-data-type.rb +0 -41
- data/test/test-dictionary-array.rb +0 -41
- data/test/test-dictionary-data-type.rb +0 -40
- data/test/test-expression.rb +0 -51
- data/test/test-feather.rb +0 -49
- data/test/test-field.rb +0 -117
- data/test/test-file-output-stream.rb +0 -54
- data/test/test-fixed-size-binary-array-builder.rb +0 -92
- data/test/test-fixed-size-binary-array.rb +0 -36
- data/test/test-float-scalar.rb +0 -46
- data/test/test-function.rb +0 -210
- data/test/test-group.rb +0 -193
- data/test/test-half-float-array.rb +0 -43
- data/test/test-half-float.rb +0 -130
- data/test/test-list-array-builder.rb +0 -79
- data/test/test-list-array.rb +0 -32
- data/test/test-list-data-type.rb +0 -69
- data/test/test-map-array-builder.rb +0 -110
- data/test/test-map-array.rb +0 -33
- data/test/test-memory-view.rb +0 -434
- data/test/test-orc.rb +0 -173
- data/test/test-ractor.rb +0 -34
- data/test/test-record-batch-builder.rb +0 -125
- data/test/test-record-batch-file-reader.rb +0 -136
- data/test/test-record-batch-iterator.rb +0 -37
- data/test/test-record-batch-reader.rb +0 -46
- data/test/test-record-batch-stream-reader.rb +0 -129
- data/test/test-record-batch.rb +0 -182
- data/test/test-scalar.rb +0 -65
- data/test/test-schema.rb +0 -134
- data/test/test-slicer.rb +0 -589
- data/test/test-sort-indices.rb +0 -40
- data/test/test-sort-key.rb +0 -81
- data/test/test-sort-options.rb +0 -58
- data/test/test-sparse-union-array.rb +0 -38
- data/test/test-sparse-union-data-type.rb +0 -41
- data/test/test-stream-listener.rb +0 -60
- data/test/test-string-dictionary-array-builder.rb +0 -103
- data/test/test-struct-array-builder.rb +0 -184
- data/test/test-struct-array.rb +0 -94
- data/test/test-struct-data-type.rb +0 -112
- data/test/test-table.rb +0 -1530
- data/test/test-tensor.rb +0 -297
- data/test/test-time.rb +0 -288
- data/test/test-time32-array.rb +0 -81
- data/test/test-time32-data-type.rb +0 -42
- data/test/test-time64-array.rb +0 -81
- data/test/test-time64-data-type.rb +0 -42
- data/test/test-timestamp-array.rb +0 -45
- data/test/test-timestamp-data-type.rb +0 -42
- data/test/values/test-basic-arrays.rb +0 -335
- data/test/values/test-dense-union-array.rb +0 -552
- data/test/values/test-dictionary-array.rb +0 -325
- data/test/values/test-list-array.rb +0 -587
- data/test/values/test-map-array.rb +0 -489
- data/test/values/test-sparse-union-array.rb +0 -543
- data/test/values/test-struct-array.rb +0 -524
data/test/test-buffer.rb
DELETED
@@ -1,49 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
class BufferTest < Test::Unit::TestCase
|
19
|
-
sub_test_case(".new") do
|
20
|
-
test("GC") do
|
21
|
-
data = "Hello"
|
22
|
-
data_id = data.object_id
|
23
|
-
_buffer = Arrow::Buffer.new(data)
|
24
|
-
data = nil
|
25
|
-
GC.start
|
26
|
-
assert_equal("Hello", ObjectSpace._id2ref(data_id))
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
sub_test_case("instance methods") do
|
31
|
-
def setup
|
32
|
-
@buffer = Arrow::Buffer.new("Hello")
|
33
|
-
end
|
34
|
-
|
35
|
-
sub_test_case("#==") do
|
36
|
-
test("Arrow::Buffer") do
|
37
|
-
assert do
|
38
|
-
@buffer == @buffer
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
test("not Arrow::Buffer") do
|
43
|
-
assert do
|
44
|
-
not (@buffer == 29)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
data/test/test-chunked-array.rb
DELETED
@@ -1,198 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
class ChunkedArrayTest < Test::Unit::TestCase
|
19
|
-
test("#each") do
|
20
|
-
arrays = [
|
21
|
-
Arrow::BooleanArray.new([true, false]),
|
22
|
-
Arrow::BooleanArray.new([nil, true]),
|
23
|
-
]
|
24
|
-
chunked_array = Arrow::ChunkedArray.new(arrays)
|
25
|
-
assert_equal([true, false, nil, true],
|
26
|
-
chunked_array.to_a)
|
27
|
-
end
|
28
|
-
|
29
|
-
sub_test_case("#pack") do
|
30
|
-
test("basic array") do
|
31
|
-
arrays = [
|
32
|
-
Arrow::BooleanArray.new([true, false]),
|
33
|
-
Arrow::BooleanArray.new([nil, true]),
|
34
|
-
]
|
35
|
-
chunked_array = Arrow::ChunkedArray.new(arrays)
|
36
|
-
packed_chunked_array = chunked_array.pack
|
37
|
-
assert_equal([
|
38
|
-
Arrow::BooleanArray,
|
39
|
-
[true, false, nil, true],
|
40
|
-
],
|
41
|
-
[
|
42
|
-
packed_chunked_array.class,
|
43
|
-
packed_chunked_array.to_a,
|
44
|
-
])
|
45
|
-
end
|
46
|
-
|
47
|
-
test("TimestampArray") do
|
48
|
-
type = Arrow::TimestampDataType.new(:nano)
|
49
|
-
arrays = [
|
50
|
-
Arrow::TimestampArrayBuilder.new(type).build([Time.at(0)]),
|
51
|
-
Arrow::TimestampArrayBuilder.new(type).build([Time.at(1)]),
|
52
|
-
]
|
53
|
-
chunked_array = Arrow::ChunkedArray.new(arrays)
|
54
|
-
packed_chunked_array = chunked_array.pack
|
55
|
-
assert_equal([
|
56
|
-
Arrow::TimestampArray,
|
57
|
-
[Time.at(0), Time.at(1)],
|
58
|
-
],
|
59
|
-
[
|
60
|
-
packed_chunked_array.class,
|
61
|
-
packed_chunked_array.to_a,
|
62
|
-
])
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
sub_test_case("#==") do
|
67
|
-
def setup
|
68
|
-
arrays = [
|
69
|
-
Arrow::BooleanArray.new([true]),
|
70
|
-
Arrow::BooleanArray.new([false, true]),
|
71
|
-
]
|
72
|
-
@chunked_array = Arrow::ChunkedArray.new(arrays)
|
73
|
-
end
|
74
|
-
|
75
|
-
test("Arrow::ChunkedArray") do
|
76
|
-
assert do
|
77
|
-
@chunked_array == @chunked_array
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
test("not Arrow::ChunkedArray") do
|
82
|
-
assert do
|
83
|
-
not (@chunked_array == 29)
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
sub_test_case("#filter") do
|
89
|
-
def setup
|
90
|
-
arrays = [
|
91
|
-
Arrow::BooleanArray.new([false, true]),
|
92
|
-
Arrow::BooleanArray.new([false, true, false]),
|
93
|
-
]
|
94
|
-
@chunked_array = Arrow::ChunkedArray.new(arrays)
|
95
|
-
@options = Arrow::FilterOptions.new
|
96
|
-
@options.null_selection_behavior = :emit_null
|
97
|
-
end
|
98
|
-
|
99
|
-
test("Array: boolean") do
|
100
|
-
filter = [nil, true, true, false, true]
|
101
|
-
chunks = [
|
102
|
-
Arrow::BooleanArray.new([nil, true]),
|
103
|
-
Arrow::BooleanArray.new([false, false]),
|
104
|
-
]
|
105
|
-
filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
|
106
|
-
assert_equal(filtered_chunked_array,
|
107
|
-
@chunked_array.filter(filter, @options))
|
108
|
-
end
|
109
|
-
|
110
|
-
test("Arrow::BooleanArray") do
|
111
|
-
filter = Arrow::BooleanArray.new([nil, true, true, false, true])
|
112
|
-
chunks = [
|
113
|
-
Arrow::BooleanArray.new([nil, true]),
|
114
|
-
Arrow::BooleanArray.new([false, false]),
|
115
|
-
]
|
116
|
-
filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
|
117
|
-
assert_equal(filtered_chunked_array,
|
118
|
-
@chunked_array.filter(filter, @options))
|
119
|
-
end
|
120
|
-
|
121
|
-
test("Arrow::ChunkedArray") do
|
122
|
-
chunks = [
|
123
|
-
Arrow::BooleanArray.new([nil, true]),
|
124
|
-
Arrow::BooleanArray.new([true, false, true]),
|
125
|
-
]
|
126
|
-
filter = Arrow::ChunkedArray.new(chunks)
|
127
|
-
filtered_chunks = [
|
128
|
-
Arrow::BooleanArray.new([nil, true]),
|
129
|
-
Arrow::BooleanArray.new([false, false]),
|
130
|
-
]
|
131
|
-
filtered_chunked_array = Arrow::ChunkedArray.new(filtered_chunks)
|
132
|
-
assert_equal(filtered_chunked_array,
|
133
|
-
@chunked_array.filter(filter, @options))
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
sub_test_case("#take") do
|
138
|
-
def setup
|
139
|
-
chunks = [
|
140
|
-
Arrow::Int16Array.new([1, 0]),
|
141
|
-
Arrow::Int16Array.new([2]),
|
142
|
-
]
|
143
|
-
@chunked_array = Arrow::ChunkedArray.new(chunks)
|
144
|
-
end
|
145
|
-
|
146
|
-
test("Arrow: boolean") do
|
147
|
-
chunks = [
|
148
|
-
Arrow::Int16Array.new([0, 1]),
|
149
|
-
Arrow::Int16Array.new([2])
|
150
|
-
]
|
151
|
-
taken_chunked_array = Arrow::ChunkedArray.new(chunks)
|
152
|
-
indices = [1, 0, 2]
|
153
|
-
assert_equal(taken_chunked_array,
|
154
|
-
@chunked_array.take(indices))
|
155
|
-
end
|
156
|
-
|
157
|
-
test("Arrow::Array") do
|
158
|
-
chunks = [
|
159
|
-
Arrow::Int16Array.new([0, 1]),
|
160
|
-
Arrow::Int16Array.new([2])
|
161
|
-
]
|
162
|
-
taken_chunked_array = Arrow::ChunkedArray.new(chunks)
|
163
|
-
indices = Arrow::Int16Array.new([1, 0, 2])
|
164
|
-
assert_equal(taken_chunked_array,
|
165
|
-
@chunked_array.take(indices))
|
166
|
-
end
|
167
|
-
|
168
|
-
test("Arrow::ChunkedArray") do
|
169
|
-
taken_chunks = [
|
170
|
-
Arrow::Int16Array.new([0, 1]),
|
171
|
-
Arrow::Int16Array.new([2])
|
172
|
-
]
|
173
|
-
taken_chunked_array = Arrow::ChunkedArray.new(taken_chunks)
|
174
|
-
indices_chunks = [
|
175
|
-
Arrow::Int16Array.new([1, 0]),
|
176
|
-
Arrow::Int16Array.new([2])
|
177
|
-
]
|
178
|
-
indices = Arrow::ChunkedArray.new(indices_chunks)
|
179
|
-
assert_equal(taken_chunked_array,
|
180
|
-
@chunked_array.take(indices))
|
181
|
-
end
|
182
|
-
end
|
183
|
-
|
184
|
-
test("#cast") do
|
185
|
-
chunked_array = Arrow::ChunkedArray.new([[1, nil, 3]])
|
186
|
-
assert_equal(Arrow::ChunkedArray.new([["1", nil, "3"]]),
|
187
|
-
chunked_array.cast(:string))
|
188
|
-
end
|
189
|
-
|
190
|
-
test("#index") do
|
191
|
-
arrays = [
|
192
|
-
Arrow::Int32Array.new([1, 2]),
|
193
|
-
Arrow::Int32Array.new([3, 4, 5]),
|
194
|
-
]
|
195
|
-
chunked_array = Arrow::ChunkedArray.new(arrays)
|
196
|
-
assert_equal(2, chunked_array.index(3))
|
197
|
-
end
|
198
|
-
end
|
data/test/test-column.rb
DELETED
@@ -1,123 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
class ColumnTest < Test::Unit::TestCase
|
19
|
-
def setup
|
20
|
-
table = Arrow::Table.new("visible" => [true, nil, false])
|
21
|
-
@column = table.visible
|
22
|
-
end
|
23
|
-
|
24
|
-
test("#name") do
|
25
|
-
assert_equal("visible", @column.name)
|
26
|
-
end
|
27
|
-
|
28
|
-
test("#data_type") do
|
29
|
-
assert_equal(Arrow::BooleanDataType.new, @column.data_type)
|
30
|
-
end
|
31
|
-
|
32
|
-
test("#null?") do
|
33
|
-
assert do
|
34
|
-
@column.null?(1)
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
test("#valid?") do
|
39
|
-
assert do
|
40
|
-
@column.valid?(0)
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
test("#each") do
|
45
|
-
assert_equal([true, nil, false], @column.each.to_a)
|
46
|
-
end
|
47
|
-
|
48
|
-
test("#reverse_each") do
|
49
|
-
assert_equal([false, nil, true], @column.reverse_each.to_a)
|
50
|
-
end
|
51
|
-
|
52
|
-
test("#n_rows") do
|
53
|
-
assert_equal(3, @column.n_rows)
|
54
|
-
end
|
55
|
-
|
56
|
-
test("#n_nulls") do
|
57
|
-
assert_equal(1, @column.n_nulls)
|
58
|
-
end
|
59
|
-
|
60
|
-
sub_test_case("#==") do
|
61
|
-
test("same value") do
|
62
|
-
table1 = Arrow::Table.new("visible" => [true, false])
|
63
|
-
table2 = Arrow::Table.new("visible" => [true, false])
|
64
|
-
assert do
|
65
|
-
table1.visible == table2.visible
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
test("different name") do
|
70
|
-
table1 = Arrow::Table.new("visible" => [true, false])
|
71
|
-
table2 = Arrow::Table.new("invisible" => [true, false])
|
72
|
-
assert do
|
73
|
-
not table1.visible == table2.invisible
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
test("different value") do
|
78
|
-
table1 = Arrow::Table.new("visible" => [true, false])
|
79
|
-
table2 = Arrow::Table.new("visible" => [true, true])
|
80
|
-
assert do
|
81
|
-
not table1.visible == table2.visible
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
test("not Arrow::Column") do
|
86
|
-
table = Arrow::Table.new("visible" => [true, false])
|
87
|
-
assert do
|
88
|
-
not table.visible == 29
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
test("#count") do
|
94
|
-
table = Arrow::Table.new("revenue" => [1, nil, 3])
|
95
|
-
assert_equal(2, table["revenue"].count)
|
96
|
-
end
|
97
|
-
|
98
|
-
test("#min") do
|
99
|
-
table = Arrow::Table.new("revenue" => [1, 2, 3])
|
100
|
-
assert_equal(1, table["revenue"].min)
|
101
|
-
end
|
102
|
-
|
103
|
-
test("#max") do
|
104
|
-
table = Arrow::Table.new("revenue" => [1, 2, 3])
|
105
|
-
assert_equal(3, table["revenue"].max)
|
106
|
-
end
|
107
|
-
|
108
|
-
test("#sum") do
|
109
|
-
table = Arrow::Table.new("revenue" => [1, 2, 3])
|
110
|
-
assert_equal(6, table["revenue"].sum)
|
111
|
-
end
|
112
|
-
|
113
|
-
test("#uniq") do
|
114
|
-
table = Arrow::Table.new("revenue" => [1, 2, 2])
|
115
|
-
assert_equal([1, 2], table["revenue"].uniq)
|
116
|
-
end
|
117
|
-
|
118
|
-
test("#cast") do
|
119
|
-
table = Arrow::Table.new("revenue" => [1, nil, 3])
|
120
|
-
assert_equal(Arrow::ChunkedArray.new([["1", nil, "3"]]),
|
121
|
-
table["revenue"].cast(:string))
|
122
|
-
end
|
123
|
-
end
|
data/test/test-csv-loader.rb
DELETED
@@ -1,297 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
class CSVLoaderTest < Test::Unit::TestCase
|
19
|
-
include Helper::Fixture
|
20
|
-
include Helper::Omittable
|
21
|
-
|
22
|
-
def load_csv(input)
|
23
|
-
Arrow::CSVLoader.load(input, skip_lines: /^#/)
|
24
|
-
end
|
25
|
-
|
26
|
-
sub_test_case(".load") do
|
27
|
-
test("String: data: with header") do
|
28
|
-
data = fixture_path("with-header-float.csv").read
|
29
|
-
assert_equal(<<-TABLE, load_csv(data).to_s)
|
30
|
-
name score
|
31
|
-
(utf8) (double)
|
32
|
-
0 alice 10.100000
|
33
|
-
1 bob 29.200000
|
34
|
-
2 chris -1.300000
|
35
|
-
TABLE
|
36
|
-
end
|
37
|
-
|
38
|
-
test("String: data: without header") do
|
39
|
-
data = fixture_path("without-header-float.csv").read
|
40
|
-
assert_equal(<<-TABLE, load_csv(data).to_s)
|
41
|
-
0 1
|
42
|
-
(utf8) (double)
|
43
|
-
0 alice 10.100000
|
44
|
-
1 bob 29.200000
|
45
|
-
2 chris -1.300000
|
46
|
-
TABLE
|
47
|
-
end
|
48
|
-
|
49
|
-
test("String: path: with header") do
|
50
|
-
path = fixture_path("with-header-float.csv").to_s
|
51
|
-
assert_equal(<<-TABLE, load_csv(path).to_s)
|
52
|
-
name score
|
53
|
-
(utf8) (double)
|
54
|
-
0 alice 10.100000
|
55
|
-
1 bob 29.200000
|
56
|
-
2 chris -1.300000
|
57
|
-
TABLE
|
58
|
-
end
|
59
|
-
|
60
|
-
test("String: path: without header") do
|
61
|
-
path = fixture_path("without-header-float.csv").to_s
|
62
|
-
assert_equal(<<-TABLE, load_csv(path).to_s)
|
63
|
-
0 1
|
64
|
-
(utf8) (double)
|
65
|
-
0 alice 10.100000
|
66
|
-
1 bob 29.200000
|
67
|
-
2 chris -1.300000
|
68
|
-
TABLE
|
69
|
-
end
|
70
|
-
|
71
|
-
test("Pathname: with header") do
|
72
|
-
path = fixture_path("with-header-float.csv")
|
73
|
-
assert_equal(<<-TABLE, load_csv(path).to_s)
|
74
|
-
name score
|
75
|
-
(utf8) (double)
|
76
|
-
0 alice 10.100000
|
77
|
-
1 bob 29.200000
|
78
|
-
2 chris -1.300000
|
79
|
-
TABLE
|
80
|
-
end
|
81
|
-
|
82
|
-
test("Pathname: without header") do
|
83
|
-
path = fixture_path("without-header-float.csv")
|
84
|
-
assert_equal(<<-TABLE, load_csv(path).to_s)
|
85
|
-
0 1
|
86
|
-
(utf8) (double)
|
87
|
-
0 alice 10.100000
|
88
|
-
1 bob 29.200000
|
89
|
-
2 chris -1.300000
|
90
|
-
TABLE
|
91
|
-
end
|
92
|
-
|
93
|
-
test("null: with double quote") do
|
94
|
-
path = fixture_path("null-with-double-quote.csv").to_s
|
95
|
-
assert_equal(<<-TABLE, load_csv(path).to_s)
|
96
|
-
name score
|
97
|
-
(utf8) (int8)
|
98
|
-
0 alice 10
|
99
|
-
1 bob (null)
|
100
|
-
2 chris -1
|
101
|
-
TABLE
|
102
|
-
end
|
103
|
-
|
104
|
-
test("null: without double quote") do
|
105
|
-
path = fixture_path("null-without-double-quote.csv").to_s
|
106
|
-
assert_equal(<<-TABLE, load_csv(path).to_s)
|
107
|
-
name score
|
108
|
-
(utf8) (int8)
|
109
|
-
0 alice 10
|
110
|
-
1 bob (null)
|
111
|
-
2 chris -1
|
112
|
-
TABLE
|
113
|
-
end
|
114
|
-
|
115
|
-
test("number: float, integer") do
|
116
|
-
path = fixture_path("float-integer.csv").to_s
|
117
|
-
assert_equal([2.9, 10, -1.1],
|
118
|
-
load_csv(path)[:score].to_a)
|
119
|
-
end
|
120
|
-
|
121
|
-
test("number: integer, float") do
|
122
|
-
path = fixture_path("integer-float.csv").to_s
|
123
|
-
assert_equal([10.0, 2.9, -1.1],
|
124
|
-
load_csv(path)[:score].to_a)
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
sub_test_case("CSVReader") do
|
129
|
-
def load_csv(data, **options)
|
130
|
-
Arrow::CSVLoader.load(data, **options)
|
131
|
-
end
|
132
|
-
|
133
|
-
sub_test_case(":headers") do
|
134
|
-
test("true") do
|
135
|
-
values = Arrow::StringArray.new(["a", "b", "c"])
|
136
|
-
assert_equal(Arrow::Table.new(value: values),
|
137
|
-
load_csv(<<-CSV, headers: true))
|
138
|
-
value
|
139
|
-
a
|
140
|
-
b
|
141
|
-
c
|
142
|
-
CSV
|
143
|
-
end
|
144
|
-
|
145
|
-
test(":first_line") do
|
146
|
-
values = Arrow::StringArray.new(["a", "b", "c"])
|
147
|
-
assert_equal(Arrow::Table.new(value: values),
|
148
|
-
load_csv(<<-CSV, headers: :first_line))
|
149
|
-
value
|
150
|
-
a
|
151
|
-
b
|
152
|
-
c
|
153
|
-
CSV
|
154
|
-
end
|
155
|
-
|
156
|
-
test("truthy") do
|
157
|
-
values = Arrow::StringArray.new(["a", "b", "c"])
|
158
|
-
assert_equal(Arrow::Table.new(value: values),
|
159
|
-
load_csv(<<-CSV, headers: 0))
|
160
|
-
value
|
161
|
-
a
|
162
|
-
b
|
163
|
-
c
|
164
|
-
CSV
|
165
|
-
end
|
166
|
-
|
167
|
-
test("Array of column names") do
|
168
|
-
values = Arrow::StringArray.new(["a", "b", "c"])
|
169
|
-
assert_equal(Arrow::Table.new(column: values),
|
170
|
-
load_csv(<<-CSV, headers: ["column"]))
|
171
|
-
a
|
172
|
-
b
|
173
|
-
c
|
174
|
-
CSV
|
175
|
-
end
|
176
|
-
|
177
|
-
test("false") do
|
178
|
-
values = Arrow::StringArray.new(["a", "b", "c"])
|
179
|
-
assert_equal(Arrow::Table.new(f0: values),
|
180
|
-
load_csv(<<-CSV, headers: false))
|
181
|
-
a
|
182
|
-
b
|
183
|
-
c
|
184
|
-
CSV
|
185
|
-
end
|
186
|
-
|
187
|
-
test("nil") do
|
188
|
-
values = Arrow::StringArray.new(["a", "b", "c"])
|
189
|
-
assert_equal(Arrow::Table.new(f0: values),
|
190
|
-
load_csv(<<-CSV, headers: nil))
|
191
|
-
a
|
192
|
-
b
|
193
|
-
c
|
194
|
-
CSV
|
195
|
-
end
|
196
|
-
|
197
|
-
test("string") do
|
198
|
-
values = Arrow::StringArray.new(["a", "b", "c"])
|
199
|
-
assert_equal(Arrow::Table.new(column: values),
|
200
|
-
load_csv(<<-CSV, headers: "column"))
|
201
|
-
a
|
202
|
-
b
|
203
|
-
c
|
204
|
-
CSV
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
test(":column_types") do
|
209
|
-
assert_equal(Arrow::Table.new(:count => Arrow::UInt16Array.new([1, 2, 4])),
|
210
|
-
load_csv(<<-CSV, column_types: {count: :uint16}))
|
211
|
-
count
|
212
|
-
1
|
213
|
-
2
|
214
|
-
4
|
215
|
-
CSV
|
216
|
-
end
|
217
|
-
|
218
|
-
test(":schema") do
|
219
|
-
table = Arrow::Table.new(:count => Arrow::UInt16Array.new([1, 2, 4]))
|
220
|
-
assert_equal(table,
|
221
|
-
load_csv(<<-CSV, schema: table.schema))
|
222
|
-
count
|
223
|
-
1
|
224
|
-
2
|
225
|
-
4
|
226
|
-
CSV
|
227
|
-
end
|
228
|
-
|
229
|
-
test(":encoding") do
|
230
|
-
messages = [
|
231
|
-
"\u3042", # U+3042 HIRAGANA LETTER A
|
232
|
-
"\u3044", # U+3044 HIRAGANA LETTER I
|
233
|
-
"\u3046", # U+3046 HIRAGANA LETTER U
|
234
|
-
]
|
235
|
-
table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
|
236
|
-
encoding = "cp932"
|
237
|
-
assert_equal(table,
|
238
|
-
load_csv((["message"] + messages).join("\n").encode(encoding),
|
239
|
-
schema: table.schema,
|
240
|
-
encoding: encoding))
|
241
|
-
end
|
242
|
-
|
243
|
-
test(":encoding and :compression") do
|
244
|
-
messages = [
|
245
|
-
"\u3042", # U+3042 HIRAGANA LETTER A
|
246
|
-
"\u3044", # U+3044 HIRAGANA LETTER I
|
247
|
-
"\u3046", # U+3046 HIRAGANA LETTER U
|
248
|
-
]
|
249
|
-
table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
|
250
|
-
encoding = "cp932"
|
251
|
-
csv = (["message"] + messages).join("\n").encode(encoding)
|
252
|
-
assert_equal(table,
|
253
|
-
load_csv(Zlib::Deflate.deflate(csv),
|
254
|
-
schema: table.schema,
|
255
|
-
encoding: encoding,
|
256
|
-
compression: :gzip))
|
257
|
-
end
|
258
|
-
|
259
|
-
sub_test_case(":timestamp_parsers") do
|
260
|
-
test(":iso8601") do
|
261
|
-
require_glib(2, 58, 0)
|
262
|
-
data_type = Arrow::TimestampDataType.new(:second,
|
263
|
-
GLib::TimeZone.new("UTC"))
|
264
|
-
timestamps = [
|
265
|
-
Time.iso8601("2024-03-16T23:54:12Z"),
|
266
|
-
Time.iso8601("2024-03-16T23:54:13Z"),
|
267
|
-
Time.iso8601("2024-03-16T23:54:14Z"),
|
268
|
-
]
|
269
|
-
values = Arrow::TimestampArray.new(data_type, timestamps)
|
270
|
-
assert_equal(Arrow::Table.new(value: values),
|
271
|
-
load_csv(<<-CSV, headers: true, timestamp_parsers: [:iso8601]))
|
272
|
-
value
|
273
|
-
#{timestamps[0].iso8601}
|
274
|
-
#{timestamps[1].iso8601}
|
275
|
-
#{timestamps[2].iso8601}
|
276
|
-
CSV
|
277
|
-
end
|
278
|
-
|
279
|
-
test("String") do
|
280
|
-
timestamps = [
|
281
|
-
Time.iso8601("2024-03-16T23:54:12Z"),
|
282
|
-
Time.iso8601("2024-03-16T23:54:13Z"),
|
283
|
-
Time.iso8601("2024-03-16T23:54:14Z"),
|
284
|
-
]
|
285
|
-
values = Arrow::TimestampArray.new(:second, timestamps)
|
286
|
-
format = "%Y-%m-%dT%H:%M:%S"
|
287
|
-
assert_equal(Arrow::Table.new(value: values).schema,
|
288
|
-
load_csv(<<-CSV, headers: true, timestamp_parsers: [format]).schema)
|
289
|
-
value
|
290
|
-
#{timestamps[0].iso8601.chomp("Z")}
|
291
|
-
#{timestamps[1].iso8601.chomp("Z")}
|
292
|
-
#{timestamps[2].iso8601.chomp("Z")}
|
293
|
-
CSV
|
294
|
-
end
|
295
|
-
end
|
296
|
-
end
|
297
|
-
end
|