red-arrow 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of red-arrow might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Rakefile +49 -4
- data/ext/arrow/arrow.cpp +43 -0
- data/ext/arrow/extconf.rb +52 -0
- data/ext/arrow/record-batch.cpp +756 -0
- data/ext/arrow/red-arrow.hpp +60 -0
- data/lib/arrow.rb +2 -1
- data/lib/arrow/array-builder.rb +4 -0
- data/lib/arrow/array.rb +11 -1
- data/lib/arrow/bigdecimal-extension.rb +24 -0
- data/lib/arrow/binary-array-builder.rb +36 -0
- data/lib/arrow/block-closable.rb +5 -1
- data/lib/arrow/csv-loader.rb +28 -6
- data/lib/arrow/data-type.rb +8 -4
- data/lib/arrow/decimal128-array-builder.rb +2 -2
- data/lib/arrow/decimal128.rb +42 -0
- data/lib/arrow/list-array-builder.rb +1 -1
- data/lib/arrow/loader.rb +8 -0
- data/lib/arrow/null-array-builder.rb +26 -0
- data/lib/arrow/record-batch-builder.rb +8 -9
- data/lib/arrow/struct-array-builder.rb +3 -3
- data/lib/arrow/struct-array.rb +15 -7
- data/lib/arrow/struct.rb +11 -0
- data/lib/arrow/table-loader.rb +14 -14
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +8 -4
- data/test/raw-records/record-batch/test-basic-arrays.rb +349 -0
- data/test/raw-records/record-batch/test-dense-union-array.rb +486 -0
- data/test/raw-records/record-batch/test-list-array.rb +498 -0
- data/test/raw-records/record-batch/test-multiple-columns.rb +49 -0
- data/test/raw-records/record-batch/test-sparse-union-array.rb +474 -0
- data/test/raw-records/record-batch/test-struct-array.rb +426 -0
- data/test/run-test.rb +25 -2
- data/test/test-array.rb +38 -9
- data/test/test-bigdecimal.rb +23 -0
- data/{dependency-check/Rakefile → test/test-buffer.rb} +15 -20
- data/test/test-chunked-array.rb +22 -0
- data/test/test-column.rb +24 -0
- data/test/test-csv-loader.rb +30 -0
- data/test/test-data-type.rb +25 -0
- data/test/test-decimal128.rb +64 -0
- data/test/test-field.rb +20 -0
- data/test/test-group.rb +2 -2
- data/test/test-record-batch-builder.rb +9 -0
- data/test/test-record-batch.rb +14 -0
- data/test/test-schema.rb +14 -0
- data/test/test-struct-array.rb +16 -3
- data/test/test-table.rb +14 -0
- data/test/test-tensor.rb +56 -0
- metadata +117 -47
data/test/test-array.rb
CHANGED
@@ -24,15 +24,44 @@ class ArrayTest < Test::Unit::TestCase
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
27
|
+
sub_test_case("instance methods") do
|
28
|
+
def setup
|
29
|
+
@values = [true, false, nil, true]
|
30
|
+
@array = Arrow::BooleanArray.new(@values)
|
31
|
+
end
|
32
|
+
|
33
|
+
test("#each") do
|
34
|
+
assert_equal(@values, @array.to_a)
|
35
|
+
end
|
36
|
+
|
37
|
+
sub_test_case("#[]") do
|
38
|
+
test("valid range") do
|
39
|
+
assert_equal(@values,
|
40
|
+
@array.length.times.collect {|i| @array[i]})
|
41
|
+
end
|
32
42
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
43
|
+
test("out of range") do
|
44
|
+
assert_nil(@array[@array.length])
|
45
|
+
end
|
46
|
+
|
47
|
+
test("negative index") do
|
48
|
+
assert_equal(@values.last,
|
49
|
+
@array[-1])
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
sub_test_case("#==") do
|
54
|
+
test("Arrow::Array") do
|
55
|
+
assert do
|
56
|
+
@array == @array
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
test("not Arrow::Array") do
|
61
|
+
assert do
|
62
|
+
not (@array == 29)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
37
66
|
end
|
38
67
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
class BigDecimalTest < Test::Unit::TestCase
|
19
|
+
test("#to_arrow") do
|
20
|
+
assert_equal(Arrow::Decimal128.new("3.14"),
|
21
|
+
BigDecimal("3.14").to_arrow)
|
22
|
+
end
|
23
|
+
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# -*- ruby -*-
|
2
|
-
#
|
3
1
|
# Licensed to the Apache Software Foundation (ASF) under one
|
4
2
|
# or more contributor license agreements. See the NOTICE file
|
5
3
|
# distributed with this work for additional information
|
@@ -17,26 +15,23 @@
|
|
17
15
|
# specific language governing permissions and limitations
|
18
16
|
# under the License.
|
19
17
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
task :default => "nothing"
|
26
|
-
else
|
27
|
-
task :default => "dependency:check"
|
28
|
-
end
|
18
|
+
class BufferTest < Test::Unit::TestCase
|
19
|
+
sub_test_case("instance methods") do
|
20
|
+
def setup
|
21
|
+
@buffer = Arrow::Buffer.new("Hello")
|
22
|
+
end
|
29
23
|
|
30
|
-
|
31
|
-
|
24
|
+
sub_test_case("#==") do
|
25
|
+
test("Arrow::Buffer") do
|
26
|
+
assert do
|
27
|
+
@buffer == @buffer
|
28
|
+
end
|
29
|
+
end
|
32
30
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
unless NativePackageInstaller.install(:debian => "libarrow-glib-dev",
|
38
|
-
:redhat => "arrow-glib-devel")
|
39
|
-
exit(false)
|
31
|
+
test("not Arrow::Buffer") do
|
32
|
+
assert do
|
33
|
+
not (@buffer == 29)
|
34
|
+
end
|
40
35
|
end
|
41
36
|
end
|
42
37
|
end
|
data/test/test-chunked-array.rb
CHANGED
@@ -62,4 +62,26 @@ class ChunkedArrayTest < Test::Unit::TestCase
|
|
62
62
|
])
|
63
63
|
end
|
64
64
|
end
|
65
|
+
|
66
|
+
sub_test_case("#==") do
|
67
|
+
def setup
|
68
|
+
arrays = [
|
69
|
+
Arrow::BooleanArray.new([true]),
|
70
|
+
Arrow::BooleanArray.new([false, true]),
|
71
|
+
]
|
72
|
+
@chunked_array = Arrow::ChunkedArray.new(arrays)
|
73
|
+
end
|
74
|
+
|
75
|
+
test("Arrow::ChunkedArray") do
|
76
|
+
assert do
|
77
|
+
@chunked_array == @chunked_array
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
test("not Arrow::ChunkedArray") do
|
82
|
+
assert do
|
83
|
+
not (@chunked_array == 29)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
65
87
|
end
|
data/test/test-column.rb
CHANGED
@@ -40,4 +40,28 @@ class ColumnTest < Test::Unit::TestCase
|
|
40
40
|
assert_equal([1, [true, false, nil, true]],
|
41
41
|
[packed_column.data.n_chunks, packed_column.to_a])
|
42
42
|
end
|
43
|
+
|
44
|
+
sub_test_case("#==") do
|
45
|
+
def setup
|
46
|
+
arrays = [
|
47
|
+
Arrow::BooleanArray.new([true]),
|
48
|
+
Arrow::BooleanArray.new([false, true]),
|
49
|
+
]
|
50
|
+
chunked_array = Arrow::ChunkedArray.new(arrays)
|
51
|
+
@column = Arrow::Column.new(Arrow::Field.new("visible", :boolean),
|
52
|
+
chunked_array)
|
53
|
+
end
|
54
|
+
|
55
|
+
test("Arrow::Column") do
|
56
|
+
assert do
|
57
|
+
@column == @column
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
test("not Arrow::Column") do
|
62
|
+
assert do
|
63
|
+
not (@column == 29)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
43
67
|
end
|
data/test/test-csv-loader.rb
CHANGED
@@ -141,5 +141,35 @@ count
|
|
141
141
|
4
|
142
142
|
CSV
|
143
143
|
end
|
144
|
+
|
145
|
+
test(":encoding") do
|
146
|
+
messages = [
|
147
|
+
"\u3042", # U+3042 HIRAGANA LETTER A
|
148
|
+
"\u3044", # U+3044 HIRAGANA LETTER I
|
149
|
+
"\u3046", # U+3046 HIRAGANA LETTER U
|
150
|
+
]
|
151
|
+
table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
|
152
|
+
encoding = "cp932"
|
153
|
+
assert_equal(table,
|
154
|
+
load_csv((["message"] + messages).join("\n").encode(encoding),
|
155
|
+
schema: table.schema,
|
156
|
+
encoding: encoding))
|
157
|
+
end
|
158
|
+
|
159
|
+
test(":encoding and :compression") do
|
160
|
+
messages = [
|
161
|
+
"\u3042", # U+3042 HIRAGANA LETTER A
|
162
|
+
"\u3044", # U+3044 HIRAGANA LETTER I
|
163
|
+
"\u3046", # U+3046 HIRAGANA LETTER U
|
164
|
+
]
|
165
|
+
table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
|
166
|
+
encoding = "cp932"
|
167
|
+
csv = (["message"] + messages).join("\n").encode(encoding)
|
168
|
+
assert_equal(table,
|
169
|
+
load_csv(Zlib::Deflate.deflate(csv),
|
170
|
+
schema: table.schema,
|
171
|
+
encoding: encoding,
|
172
|
+
compression: :gzip))
|
173
|
+
end
|
144
174
|
end
|
145
175
|
end
|
data/test/test-data-type.rb
CHANGED
@@ -43,5 +43,30 @@ class DataTypeTest < Test::Unit::TestCase
|
|
43
43
|
assert_equal(Arrow::ListDataType.new(field),
|
44
44
|
Arrow::DataType.resolve(type: :list, field: field))
|
45
45
|
end
|
46
|
+
|
47
|
+
test("_") do
|
48
|
+
assert_equal(Arrow::FixedSizeBinaryDataType.new(10),
|
49
|
+
Arrow::DataType.resolve([:fixed_size_binary, 10]))
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
sub_test_case("instance methods") do
|
54
|
+
def setup
|
55
|
+
@data_type = Arrow::StringDataType.new
|
56
|
+
end
|
57
|
+
|
58
|
+
sub_test_case("#==") do
|
59
|
+
test("Arrow::DataType") do
|
60
|
+
assert do
|
61
|
+
@data_type == @data_type
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
test("not Arrow::DataType") do
|
66
|
+
assert do
|
67
|
+
not (@data_type == 29)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
46
71
|
end
|
47
72
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
class Decimal128Test < Test::Unit::TestCase
|
19
|
+
sub_test_case("instance methods") do
|
20
|
+
def setup
|
21
|
+
@decimal128 = Arrow::Decimal128.new("10.1")
|
22
|
+
end
|
23
|
+
|
24
|
+
sub_test_case("#==") do
|
25
|
+
test("Arrow::Decimal128") do
|
26
|
+
assert do
|
27
|
+
@decimal128 == @decimal128
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
test("not Arrow::Decimal128") do
|
32
|
+
assert do
|
33
|
+
not (@decimal128 == 10.1)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
sub_test_case("#!=") do
|
39
|
+
test("Arrow::Decimal128") do
|
40
|
+
assert do
|
41
|
+
not (@decimal128 != @decimal128)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
test("not Arrow::Decimal128") do
|
46
|
+
assert do
|
47
|
+
@decimal128 != 10.1
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
sub_test_case("#to_s") do
|
53
|
+
test("default") do
|
54
|
+
assert_equal("101",
|
55
|
+
@decimal128.to_s)
|
56
|
+
end
|
57
|
+
|
58
|
+
test("scale") do
|
59
|
+
assert_equal("10.1",
|
60
|
+
@decimal128.to_s(1))
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/test/test-field.rb
CHANGED
@@ -68,4 +68,24 @@ class FieldTest < Test::Unit::TestCase
|
|
68
68
|
Arrow::Field.new(description).to_s)
|
69
69
|
end
|
70
70
|
end
|
71
|
+
|
72
|
+
sub_test_case("instance methods") do
|
73
|
+
def setup
|
74
|
+
@field = Arrow::Field.new("count", :uint32)
|
75
|
+
end
|
76
|
+
|
77
|
+
sub_test_case("#==") do
|
78
|
+
test("Arrow::Field") do
|
79
|
+
assert do
|
80
|
+
@field == @field
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
test("not Arrow::Field") do
|
85
|
+
assert do
|
86
|
+
not (@field == 29)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
71
91
|
end
|
data/test/test-group.rb
CHANGED
@@ -43,8 +43,8 @@ class GroupTest < Test::Unit::TestCase
|
|
43
43
|
table = Arrow::Table.new(raw_table)
|
44
44
|
assert_equal(<<-TABLE, table.group(:time).count.to_s)
|
45
45
|
time int
|
46
|
-
0
|
47
|
-
1
|
46
|
+
0 #{time_values[0].iso8601} 1
|
47
|
+
1 #{time_values[1].iso8601} 1
|
48
48
|
TABLE
|
49
49
|
end
|
50
50
|
end
|
@@ -112,5 +112,14 @@ class RecordBatchBuilderTest < Test::Unit::TestCase
|
|
112
112
|
arrays),
|
113
113
|
@builder.flush)
|
114
114
|
end
|
115
|
+
|
116
|
+
test("#column_builders") do
|
117
|
+
column_builders = [
|
118
|
+
@builder.get_column_builder(0),
|
119
|
+
@builder.get_column_builder(1),
|
120
|
+
]
|
121
|
+
assert_equal(column_builders,
|
122
|
+
@builder.column_builders)
|
123
|
+
end
|
115
124
|
end
|
116
125
|
end
|
data/test/test-record-batch.rb
CHANGED
@@ -108,5 +108,19 @@ class RecordBatchTest < Test::Unit::TestCase
|
|
108
108
|
assert_equal(Arrow::Table.new(@schema, [@counts]),
|
109
109
|
@record_batch.to_table)
|
110
110
|
end
|
111
|
+
|
112
|
+
sub_test_case("#==") do
|
113
|
+
test("Arrow::RecordBatch") do
|
114
|
+
assert do
|
115
|
+
@record_batch == @record_batch
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
test("not Arrow::RecordBatch") do
|
120
|
+
assert do
|
121
|
+
not (@record_batch == 29)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
111
125
|
end
|
112
126
|
end
|
data/test/test-schema.rb
CHANGED
@@ -100,5 +100,19 @@ class SchemaTest < Test::Unit::TestCase
|
|
100
100
|
end
|
101
101
|
end
|
102
102
|
end
|
103
|
+
|
104
|
+
sub_test_case("#==") do
|
105
|
+
test("Arrow::Schema") do
|
106
|
+
assert do
|
107
|
+
@schema == @schema
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
test("not Arrow::Schema") do
|
112
|
+
assert do
|
113
|
+
not (@schema == 29)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
103
117
|
end
|
104
118
|
end
|
data/test/test-struct-array.rb
CHANGED
@@ -49,9 +49,22 @@ class StructArrayTest < Test::Unit::TestCase
|
|
49
49
|
end
|
50
50
|
|
51
51
|
test("#[]") do
|
52
|
-
|
53
|
-
|
54
|
-
|
52
|
+
assert_equal([
|
53
|
+
Arrow::Struct.new(@array, 0),
|
54
|
+
Arrow::Struct.new(@array, 1),
|
55
|
+
],
|
56
|
+
@array.to_a)
|
57
|
+
end
|
58
|
+
|
59
|
+
test("#get_value") do
|
60
|
+
assert_equal([
|
61
|
+
Arrow::Struct.new(@array, 0),
|
62
|
+
Arrow::Struct.new(@array, 1),
|
63
|
+
],
|
64
|
+
[
|
65
|
+
@array.get_value(0),
|
66
|
+
@array.get_value(1),
|
67
|
+
])
|
55
68
|
end
|
56
69
|
|
57
70
|
sub_test_case("#find_field") do
|
data/test/test-table.rb
CHANGED
@@ -596,5 +596,19 @@ visible: false
|
|
596
596
|
end
|
597
597
|
end
|
598
598
|
end
|
599
|
+
|
600
|
+
sub_test_case("#==") do
|
601
|
+
test("Arrow::Table") do
|
602
|
+
assert do
|
603
|
+
@table == @table
|
604
|
+
end
|
605
|
+
end
|
606
|
+
|
607
|
+
test("not Arrow::Table") do
|
608
|
+
assert do
|
609
|
+
not (@table == 29)
|
610
|
+
end
|
611
|
+
end
|
612
|
+
end
|
599
613
|
end
|
600
614
|
end
|