red-arrow 0.8.1 → 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/text/news.md +44 -0
- data/lib/arrow/array-builder.rb +0 -1
- data/lib/arrow/array.rb +9 -1
- data/lib/arrow/chunked-array.rb +43 -7
- data/lib/arrow/column.rb +18 -0
- data/lib/arrow/csv-loader.rb +17 -2
- data/lib/arrow/data-type.rb +81 -0
- data/lib/arrow/group.rb +133 -0
- data/lib/arrow/loader.rb +4 -1
- data/lib/arrow/slicer.rb +76 -19
- data/lib/arrow/table-formatter.rb +21 -47
- data/lib/arrow/table-list-formatter.rb +35 -0
- data/lib/arrow/table-table-formatter.rb +69 -0
- data/lib/arrow/table.rb +62 -40
- data/lib/arrow/version.rb +1 -1
- data/test/fixture/null-with-double-quote.csv +4 -0
- data/test/fixture/null-without-double-quote.csv +4 -0
- data/test/run-test.rb +3 -1
- data/test/test-chunked-array.rb +39 -2
- data/test/test-column.rb +13 -0
- data/test/test-csv-loader.rb +20 -0
- data/test/test-group.rb +111 -0
- data/test/test-slicer.rb +42 -0
- data/test/test-table.rb +52 -10
- metadata +12 -2
data/lib/arrow/version.rb
CHANGED
data/test/run-test.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
#
|
3
|
-
# Copyright 2017 Kouhei Sutou <kou@clear-code.com>
|
3
|
+
# Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
6
|
# you may not use this file except in compliance with the License.
|
@@ -14,6 +14,8 @@
|
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
16
|
|
17
|
+
ENV["TZ"] = "Asia/Tokyo"
|
18
|
+
|
17
19
|
$VERBOSE = true
|
18
20
|
|
19
21
|
require "pathname"
|
data/test/test-chunked-array.rb
CHANGED
@@ -14,12 +14,49 @@
|
|
14
14
|
|
15
15
|
class ChunkedArrayTest < Test::Unit::TestCase
|
16
16
|
test("#each") do
|
17
|
-
|
17
|
+
arrays = [
|
18
18
|
Arrow::BooleanArray.new([true, false]),
|
19
19
|
Arrow::BooleanArray.new([nil, true]),
|
20
20
|
]
|
21
|
-
chunked_array = Arrow::ChunkedArray.new(
|
21
|
+
chunked_array = Arrow::ChunkedArray.new(arrays)
|
22
22
|
assert_equal([true, false, nil, true],
|
23
23
|
chunked_array.to_a)
|
24
24
|
end
|
25
|
+
|
26
|
+
sub_test_case("#pack") do
|
27
|
+
test("basic array") do
|
28
|
+
arrays = [
|
29
|
+
Arrow::BooleanArray.new([true, false]),
|
30
|
+
Arrow::BooleanArray.new([nil, true]),
|
31
|
+
]
|
32
|
+
chunked_array = Arrow::ChunkedArray.new(arrays)
|
33
|
+
packed_chunked_array = chunked_array.pack
|
34
|
+
assert_equal([
|
35
|
+
Arrow::BooleanArray,
|
36
|
+
[true, false, nil, true],
|
37
|
+
],
|
38
|
+
[
|
39
|
+
packed_chunked_array.class,
|
40
|
+
packed_chunked_array.to_a,
|
41
|
+
])
|
42
|
+
end
|
43
|
+
|
44
|
+
test("TimestampArray") do
|
45
|
+
type = Arrow::TimestampDataType.new(:nano)
|
46
|
+
arrays = [
|
47
|
+
Arrow::TimestampArrayBuilder.new(type).build([Time.at(0)]),
|
48
|
+
Arrow::TimestampArrayBuilder.new(type).build([Time.at(1)]),
|
49
|
+
]
|
50
|
+
chunked_array = Arrow::ChunkedArray.new(arrays)
|
51
|
+
packed_chunked_array = chunked_array.pack
|
52
|
+
assert_equal([
|
53
|
+
Arrow::TimestampArray,
|
54
|
+
[Time.at(0), Time.at(1)],
|
55
|
+
],
|
56
|
+
[
|
57
|
+
packed_chunked_array.class,
|
58
|
+
packed_chunked_array.to_a,
|
59
|
+
])
|
60
|
+
end
|
61
|
+
end
|
25
62
|
end
|
data/test/test-column.rb
CHANGED
@@ -24,4 +24,17 @@ class ColumnTest < Test::Unit::TestCase
|
|
24
24
|
assert_equal([true, false, nil, true],
|
25
25
|
column.to_a)
|
26
26
|
end
|
27
|
+
|
28
|
+
test("#pack") do
|
29
|
+
arrays = [
|
30
|
+
Arrow::BooleanArray.new([true, false]),
|
31
|
+
Arrow::BooleanArray.new([nil, true]),
|
32
|
+
]
|
33
|
+
chunked_array = Arrow::ChunkedArray.new(arrays)
|
34
|
+
column = Arrow::Column.new(Arrow::Field.new("visible", :boolean),
|
35
|
+
chunked_array)
|
36
|
+
packed_column = column.pack
|
37
|
+
assert_equal([1, [true, false, nil, true]],
|
38
|
+
[packed_column.data.n_chunks, packed_column.to_a])
|
39
|
+
end
|
27
40
|
end
|
data/test/test-csv-loader.rb
CHANGED
@@ -75,5 +75,25 @@ class CSVLoaderTest < Test::Unit::TestCase
|
|
75
75
|
2 chris -1
|
76
76
|
TABLE
|
77
77
|
end
|
78
|
+
|
79
|
+
test("null: with double quote") do
|
80
|
+
path = fixture_path("null-with-double-quote.csv").to_s
|
81
|
+
assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
|
82
|
+
name score
|
83
|
+
0 alice 10
|
84
|
+
1 bob
|
85
|
+
2 chris -1
|
86
|
+
TABLE
|
87
|
+
end
|
88
|
+
|
89
|
+
test("null: without double quote") do
|
90
|
+
path = fixture_path("null-without-double-quote.csv").to_s
|
91
|
+
assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
|
92
|
+
name score
|
93
|
+
0 alice 10
|
94
|
+
1 bob
|
95
|
+
2 chris -1
|
96
|
+
TABLE
|
97
|
+
end
|
78
98
|
end
|
79
99
|
end
|
data/test/test-group.rb
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
# Copyright 2018 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
class GroupTest < Test::Unit::TestCase
|
16
|
+
include Helper::Fixture
|
17
|
+
|
18
|
+
def setup
|
19
|
+
raw_table = {
|
20
|
+
:group_key1 => Arrow::UInt8Array.new([1, 1, 2, 3, 3, 3]),
|
21
|
+
:group_key2 => Arrow::UInt8Array.new([1, 1, 1, 1, 2, 2]),
|
22
|
+
:int => Arrow::Int32Array.new([-1, -2, nil, -4, -5, -6]),
|
23
|
+
:uint => Arrow::UInt32Array.new([1, nil, 3, 4, 5, 6]),
|
24
|
+
:float => Arrow::FloatArray.new([nil, 2.2, 3.3, 4.4, 5.5, 6.6]),
|
25
|
+
:string => Arrow::StringArray.new(["a", "b", "c", nil, "e", "f"]),
|
26
|
+
}
|
27
|
+
@table = Arrow::Table.new(raw_table)
|
28
|
+
end
|
29
|
+
|
30
|
+
sub_test_case("key") do
|
31
|
+
test("Time") do
|
32
|
+
time_values = [
|
33
|
+
Time.parse("2018-01-29"),
|
34
|
+
Time.parse("2018-01-30"),
|
35
|
+
]
|
36
|
+
raw_table = {
|
37
|
+
:time => Arrow::ArrayBuilder.build(time_values),
|
38
|
+
:int => Arrow::Int32Array.new([-1, -2]),
|
39
|
+
}
|
40
|
+
table = Arrow::Table.new(raw_table)
|
41
|
+
assert_equal(<<-TABLE, table.group(:time).count.to_s)
|
42
|
+
time int
|
43
|
+
0 2018-01-29T00:00:00+09:00 1
|
44
|
+
1 2018-01-30T00:00:00+09:00 1
|
45
|
+
TABLE
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
sub_test_case("#count") do
|
50
|
+
test("single") do
|
51
|
+
assert_equal(<<-TABLE, @table.group(:group_key1).count.to_s)
|
52
|
+
group_key1 group_key2 int uint float string
|
53
|
+
0 1 2 2 1 1 2
|
54
|
+
1 2 1 0 1 1 1
|
55
|
+
2 3 3 3 3 3 2
|
56
|
+
TABLE
|
57
|
+
end
|
58
|
+
|
59
|
+
test("multiple") do
|
60
|
+
assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).count.to_s)
|
61
|
+
group_key1 group_key2 int uint float string
|
62
|
+
0 1 1 2 1 1 2
|
63
|
+
1 2 1 0 1 1 1
|
64
|
+
2 3 1 1 1 1 0
|
65
|
+
3 3 2 2 2 2 2
|
66
|
+
TABLE
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
sub_test_case("#sum") do
|
71
|
+
test("single") do
|
72
|
+
assert_equal(<<-TABLE, @table.group(:group_key1).sum.to_s)
|
73
|
+
group_key1 group_key2 int uint float
|
74
|
+
0 1 2 -3 1 2.200000
|
75
|
+
1 2 1 0 3 3.300000
|
76
|
+
2 3 5 -15 15 16.500000
|
77
|
+
TABLE
|
78
|
+
end
|
79
|
+
|
80
|
+
test("multiple") do
|
81
|
+
assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).sum.to_s)
|
82
|
+
group_key1 group_key2 int uint float
|
83
|
+
0 1 1 -3 1 2.200000
|
84
|
+
1 2 1 0 3 3.300000
|
85
|
+
2 3 1 -4 4 4.400000
|
86
|
+
3 3 2 -11 11 12.100000
|
87
|
+
TABLE
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
sub_test_case("#average") do
|
92
|
+
test("single") do
|
93
|
+
assert_equal(<<-TABLE, @table.group(:group_key1).average.to_s)
|
94
|
+
group_key1 group_key2 int uint float
|
95
|
+
0 1 1.000000 -1.500000 1.000000 2.200000
|
96
|
+
1 2 1.000000 0.000000 3.000000 3.300000
|
97
|
+
2 3 1.666667 -5.000000 5.000000 5.500000
|
98
|
+
TABLE
|
99
|
+
end
|
100
|
+
|
101
|
+
test("multiple") do
|
102
|
+
assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).average.to_s)
|
103
|
+
group_key1 group_key2 int uint float
|
104
|
+
0 1 1 -1.500000 1.000000 2.200000
|
105
|
+
1 2 1 0.000000 3.000000 3.300000
|
106
|
+
2 3 1 -4.000000 4.000000 4.400000
|
107
|
+
3 3 2 -5.500000 5.500000 6.050000
|
108
|
+
TABLE
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
data/test/test-slicer.rb
CHANGED
@@ -106,6 +106,21 @@ class SlicerTest < Test::Unit::TestCase
|
|
106
106
|
TABLE
|
107
107
|
end
|
108
108
|
|
109
|
+
test("column.valid?") do
|
110
|
+
sliced_table = @table.slice do |slicer|
|
111
|
+
slicer.visible.valid?
|
112
|
+
end
|
113
|
+
assert_equal(<<-TABLE, sliced_table.to_s)
|
114
|
+
count visible
|
115
|
+
0 1 true
|
116
|
+
1 2 false
|
117
|
+
2 8 true
|
118
|
+
3 16 true
|
119
|
+
4 32 false
|
120
|
+
5 256 true
|
121
|
+
TABLE
|
122
|
+
end
|
123
|
+
|
109
124
|
sub_test_case("column ==") do
|
110
125
|
test("nil") do
|
111
126
|
sliced_table = @table.slice do |slicer|
|
@@ -298,6 +313,33 @@ class SlicerTest < Test::Unit::TestCase
|
|
298
313
|
TABLE
|
299
314
|
end
|
300
315
|
|
316
|
+
test("column.in") do
|
317
|
+
sliced_table = @table.slice do |slicer|
|
318
|
+
slicer.count.in?([1, 4, 16, 64])
|
319
|
+
end
|
320
|
+
assert_equal(<<-TABLE, sliced_table.to_s)
|
321
|
+
count visible
|
322
|
+
0 1 true
|
323
|
+
1 4
|
324
|
+
2 16 true
|
325
|
+
3 64
|
326
|
+
TABLE
|
327
|
+
end
|
328
|
+
|
329
|
+
test("!column.in") do
|
330
|
+
sliced_table = @table.slice do |slicer|
|
331
|
+
!slicer.count.in?([1, 4, 16, 64])
|
332
|
+
end
|
333
|
+
assert_equal(<<-TABLE, sliced_table.to_s)
|
334
|
+
count visible
|
335
|
+
0 0
|
336
|
+
1 2 false
|
337
|
+
2 8 true
|
338
|
+
3 32 false
|
339
|
+
4 256 true
|
340
|
+
TABLE
|
341
|
+
end
|
342
|
+
|
301
343
|
test("condition & condition") do
|
302
344
|
sliced_table = @table.slice do |slicer|
|
303
345
|
slicer.visible & (slicer.count >= 16)
|
data/test/test-table.rb
CHANGED
@@ -58,14 +58,21 @@ class TableTest < Test::Unit::TestCase
|
|
58
58
|
TABLE
|
59
59
|
end
|
60
60
|
|
61
|
-
test("Integer") do
|
61
|
+
test("Integer: positive") do
|
62
62
|
assert_equal(<<-TABLE, @table.slice(2).to_s)
|
63
63
|
count visible
|
64
64
|
0 4
|
65
65
|
TABLE
|
66
66
|
end
|
67
67
|
|
68
|
-
test("
|
68
|
+
test("Integer: negative") do
|
69
|
+
assert_equal(<<-TABLE, @table.slice(-1).to_s)
|
70
|
+
count visible
|
71
|
+
0 128
|
72
|
+
TABLE
|
73
|
+
end
|
74
|
+
|
75
|
+
test("Range: positive: include end") do
|
69
76
|
assert_equal(<<-TABLE, @table.slice(2..4).to_s)
|
70
77
|
count visible
|
71
78
|
0 4
|
@@ -74,7 +81,7 @@ class TableTest < Test::Unit::TestCase
|
|
74
81
|
TABLE
|
75
82
|
end
|
76
83
|
|
77
|
-
test("Range: exclude end") do
|
84
|
+
test("Range: positive: exclude end") do
|
78
85
|
assert_equal(<<-TABLE, @table.slice(2...4).to_s)
|
79
86
|
count visible
|
80
87
|
0 4
|
@@ -82,7 +89,24 @@ class TableTest < Test::Unit::TestCase
|
|
82
89
|
TABLE
|
83
90
|
end
|
84
91
|
|
85
|
-
test("
|
92
|
+
test("Range: negative: include end") do
|
93
|
+
assert_equal(<<-TABLE, @table.slice(-4..-2).to_s)
|
94
|
+
count visible
|
95
|
+
0 16 true
|
96
|
+
1 32 false
|
97
|
+
2 64
|
98
|
+
TABLE
|
99
|
+
end
|
100
|
+
|
101
|
+
test("Range: negative: exclude end") do
|
102
|
+
assert_equal(<<-TABLE, @table.slice(-4...-2).to_s)
|
103
|
+
count visible
|
104
|
+
0 16 true
|
105
|
+
1 32 false
|
106
|
+
TABLE
|
107
|
+
end
|
108
|
+
|
109
|
+
test("[from, to]: positive") do
|
86
110
|
assert_equal(<<-TABLE, @table.slice([0, 2]).to_s)
|
87
111
|
count visible
|
88
112
|
0 1 true
|
@@ -90,6 +114,14 @@ class TableTest < Test::Unit::TestCase
|
|
90
114
|
TABLE
|
91
115
|
end
|
92
116
|
|
117
|
+
test("[from, to]: negative") do
|
118
|
+
assert_equal(<<-TABLE, @table.slice([-4, 2]).to_s)
|
119
|
+
count visible
|
120
|
+
0 16 true
|
121
|
+
1 32 false
|
122
|
+
TABLE
|
123
|
+
end
|
124
|
+
|
93
125
|
test("Integer, Range, ...") do
|
94
126
|
assert_equal(<<-TABLE, @table.slice(0, 4...7).to_s)
|
95
127
|
count visible
|
@@ -109,12 +141,6 @@ class TableTest < Test::Unit::TestCase
|
|
109
141
|
test("[Symbol]") do
|
110
142
|
assert_equal(@visible_column, @table[:visible])
|
111
143
|
end
|
112
|
-
|
113
|
-
test("[String, Symbol]") do
|
114
|
-
assert_equal(Arrow::Table.new(@table.schema,
|
115
|
-
[@visible_column, @count_column]).to_s,
|
116
|
-
@table["visible", :count].to_s)
|
117
|
-
end
|
118
144
|
end
|
119
145
|
|
120
146
|
sub_test_case("#merge") do
|
@@ -391,4 +417,20 @@ class TableTest < Test::Unit::TestCase
|
|
391
417
|
end
|
392
418
|
end
|
393
419
|
end
|
420
|
+
|
421
|
+
test("#pack") do
|
422
|
+
packed_table = @table.pack
|
423
|
+
column_n_chunks = packed_table.columns.collect {|c| c.data.n_chunks}
|
424
|
+
assert_equal([[1, 1], <<-TABLE], [column_n_chunks, packed_table.to_s])
|
425
|
+
count visible
|
426
|
+
0 1 true
|
427
|
+
1 2 false
|
428
|
+
2 4
|
429
|
+
3 8 true
|
430
|
+
4 16 true
|
431
|
+
5 32 false
|
432
|
+
6 64
|
433
|
+
7 128
|
434
|
+
TABLE
|
435
|
+
end
|
394
436
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-arrow
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: gobject-introspection
|
@@ -150,11 +150,13 @@ files:
|
|
150
150
|
- lib/arrow/compatibility.rb
|
151
151
|
- lib/arrow/csv-loader.rb
|
152
152
|
- lib/arrow/csv-reader.rb
|
153
|
+
- lib/arrow/data-type.rb
|
153
154
|
- lib/arrow/date32-array-builder.rb
|
154
155
|
- lib/arrow/date32-array.rb
|
155
156
|
- lib/arrow/date64-array-builder.rb
|
156
157
|
- lib/arrow/date64-array.rb
|
157
158
|
- lib/arrow/field.rb
|
159
|
+
- lib/arrow/group.rb
|
158
160
|
- lib/arrow/loader.rb
|
159
161
|
- lib/arrow/record-batch-file-reader.rb
|
160
162
|
- lib/arrow/record-batch-stream-reader.rb
|
@@ -163,14 +165,18 @@ files:
|
|
163
165
|
- lib/arrow/record.rb
|
164
166
|
- lib/arrow/slicer.rb
|
165
167
|
- lib/arrow/table-formatter.rb
|
168
|
+
- lib/arrow/table-list-formatter.rb
|
166
169
|
- lib/arrow/table-loader.rb
|
167
170
|
- lib/arrow/table-saver.rb
|
171
|
+
- lib/arrow/table-table-formatter.rb
|
168
172
|
- lib/arrow/table.rb
|
169
173
|
- lib/arrow/tensor.rb
|
170
174
|
- lib/arrow/timestamp-array-builder.rb
|
171
175
|
- lib/arrow/timestamp-array.rb
|
172
176
|
- lib/arrow/version.rb
|
173
177
|
- red-arrow.gemspec
|
178
|
+
- test/fixture/null-with-double-quote.csv
|
179
|
+
- test/fixture/null-without-double-quote.csv
|
174
180
|
- test/fixture/with-header.csv
|
175
181
|
- test/fixture/without-header.csv
|
176
182
|
- test/helper.rb
|
@@ -184,6 +190,7 @@ files:
|
|
184
190
|
- test/test-csv-reader.rb
|
185
191
|
- test/test-date32-array.rb
|
186
192
|
- test/test-date64-array.rb
|
193
|
+
- test/test-group.rb
|
187
194
|
- test/test-record-batch-file-reader.rb
|
188
195
|
- test/test-record-batch.rb
|
189
196
|
- test/test-slicer.rb
|
@@ -217,7 +224,9 @@ summary: Red Arrow is a Ruby bindings of Apache Arrow. Red Arrow is based on GOb
|
|
217
224
|
test_files:
|
218
225
|
- test/test-record-batch-file-reader.rb
|
219
226
|
- test/fixture/with-header.csv
|
227
|
+
- test/fixture/null-without-double-quote.csv
|
220
228
|
- test/fixture/without-header.csv
|
229
|
+
- test/fixture/null-with-double-quote.csv
|
221
230
|
- test/test-csv-reader.rb
|
222
231
|
- test/test-column.rb
|
223
232
|
- test/test-array-builder.rb
|
@@ -230,6 +239,7 @@ test_files:
|
|
230
239
|
- test/test-table.rb
|
231
240
|
- test/test-csv-loader.rb
|
232
241
|
- test/test-array.rb
|
242
|
+
- test/test-group.rb
|
233
243
|
- test/helper/fixture.rb
|
234
244
|
- test/test-date64-array.rb
|
235
245
|
- test/test-slicer.rb
|