red-arrow 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/text/news.md +44 -0
- data/lib/arrow/array-builder.rb +0 -1
- data/lib/arrow/array.rb +9 -1
- data/lib/arrow/chunked-array.rb +43 -7
- data/lib/arrow/column.rb +18 -0
- data/lib/arrow/csv-loader.rb +17 -2
- data/lib/arrow/data-type.rb +81 -0
- data/lib/arrow/group.rb +133 -0
- data/lib/arrow/loader.rb +4 -1
- data/lib/arrow/slicer.rb +76 -19
- data/lib/arrow/table-formatter.rb +21 -47
- data/lib/arrow/table-list-formatter.rb +35 -0
- data/lib/arrow/table-table-formatter.rb +69 -0
- data/lib/arrow/table.rb +62 -40
- data/lib/arrow/version.rb +1 -1
- data/test/fixture/null-with-double-quote.csv +4 -0
- data/test/fixture/null-without-double-quote.csv +4 -0
- data/test/run-test.rb +3 -1
- data/test/test-chunked-array.rb +39 -2
- data/test/test-column.rb +13 -0
- data/test/test-csv-loader.rb +20 -0
- data/test/test-group.rb +111 -0
- data/test/test-slicer.rb +42 -0
- data/test/test-table.rb +52 -10
- metadata +12 -2
data/lib/arrow/version.rb
CHANGED
data/test/run-test.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
#
|
3
|
-
# Copyright 2017 Kouhei Sutou <kou@clear-code.com>
|
3
|
+
# Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
6
|
# you may not use this file except in compliance with the License.
|
@@ -14,6 +14,8 @@
|
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
16
|
|
17
|
+
ENV["TZ"] = "Asia/Tokyo"
|
18
|
+
|
17
19
|
$VERBOSE = true
|
18
20
|
|
19
21
|
require "pathname"
|
data/test/test-chunked-array.rb
CHANGED
@@ -14,12 +14,49 @@
|
|
14
14
|
|
15
15
|
class ChunkedArrayTest < Test::Unit::TestCase
|
16
16
|
test("#each") do
|
17
|
-
|
17
|
+
arrays = [
|
18
18
|
Arrow::BooleanArray.new([true, false]),
|
19
19
|
Arrow::BooleanArray.new([nil, true]),
|
20
20
|
]
|
21
|
-
chunked_array = Arrow::ChunkedArray.new(
|
21
|
+
chunked_array = Arrow::ChunkedArray.new(arrays)
|
22
22
|
assert_equal([true, false, nil, true],
|
23
23
|
chunked_array.to_a)
|
24
24
|
end
|
25
|
+
|
26
|
+
sub_test_case("#pack") do
|
27
|
+
test("basic array") do
|
28
|
+
arrays = [
|
29
|
+
Arrow::BooleanArray.new([true, false]),
|
30
|
+
Arrow::BooleanArray.new([nil, true]),
|
31
|
+
]
|
32
|
+
chunked_array = Arrow::ChunkedArray.new(arrays)
|
33
|
+
packed_chunked_array = chunked_array.pack
|
34
|
+
assert_equal([
|
35
|
+
Arrow::BooleanArray,
|
36
|
+
[true, false, nil, true],
|
37
|
+
],
|
38
|
+
[
|
39
|
+
packed_chunked_array.class,
|
40
|
+
packed_chunked_array.to_a,
|
41
|
+
])
|
42
|
+
end
|
43
|
+
|
44
|
+
test("TimestampArray") do
|
45
|
+
type = Arrow::TimestampDataType.new(:nano)
|
46
|
+
arrays = [
|
47
|
+
Arrow::TimestampArrayBuilder.new(type).build([Time.at(0)]),
|
48
|
+
Arrow::TimestampArrayBuilder.new(type).build([Time.at(1)]),
|
49
|
+
]
|
50
|
+
chunked_array = Arrow::ChunkedArray.new(arrays)
|
51
|
+
packed_chunked_array = chunked_array.pack
|
52
|
+
assert_equal([
|
53
|
+
Arrow::TimestampArray,
|
54
|
+
[Time.at(0), Time.at(1)],
|
55
|
+
],
|
56
|
+
[
|
57
|
+
packed_chunked_array.class,
|
58
|
+
packed_chunked_array.to_a,
|
59
|
+
])
|
60
|
+
end
|
61
|
+
end
|
25
62
|
end
|
data/test/test-column.rb
CHANGED
@@ -24,4 +24,17 @@ class ColumnTest < Test::Unit::TestCase
|
|
24
24
|
assert_equal([true, false, nil, true],
|
25
25
|
column.to_a)
|
26
26
|
end
|
27
|
+
|
28
|
+
test("#pack") do
|
29
|
+
arrays = [
|
30
|
+
Arrow::BooleanArray.new([true, false]),
|
31
|
+
Arrow::BooleanArray.new([nil, true]),
|
32
|
+
]
|
33
|
+
chunked_array = Arrow::ChunkedArray.new(arrays)
|
34
|
+
column = Arrow::Column.new(Arrow::Field.new("visible", :boolean),
|
35
|
+
chunked_array)
|
36
|
+
packed_column = column.pack
|
37
|
+
assert_equal([1, [true, false, nil, true]],
|
38
|
+
[packed_column.data.n_chunks, packed_column.to_a])
|
39
|
+
end
|
27
40
|
end
|
data/test/test-csv-loader.rb
CHANGED
@@ -75,5 +75,25 @@ class CSVLoaderTest < Test::Unit::TestCase
|
|
75
75
|
2 chris -1
|
76
76
|
TABLE
|
77
77
|
end
|
78
|
+
|
79
|
+
test("null: with double quote") do
|
80
|
+
path = fixture_path("null-with-double-quote.csv").to_s
|
81
|
+
assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
|
82
|
+
name score
|
83
|
+
0 alice 10
|
84
|
+
1 bob
|
85
|
+
2 chris -1
|
86
|
+
TABLE
|
87
|
+
end
|
88
|
+
|
89
|
+
test("null: without double quote") do
|
90
|
+
path = fixture_path("null-without-double-quote.csv").to_s
|
91
|
+
assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
|
92
|
+
name score
|
93
|
+
0 alice 10
|
94
|
+
1 bob
|
95
|
+
2 chris -1
|
96
|
+
TABLE
|
97
|
+
end
|
78
98
|
end
|
79
99
|
end
|
data/test/test-group.rb
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
# Copyright 2018 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
class GroupTest < Test::Unit::TestCase
|
16
|
+
include Helper::Fixture
|
17
|
+
|
18
|
+
def setup
|
19
|
+
raw_table = {
|
20
|
+
:group_key1 => Arrow::UInt8Array.new([1, 1, 2, 3, 3, 3]),
|
21
|
+
:group_key2 => Arrow::UInt8Array.new([1, 1, 1, 1, 2, 2]),
|
22
|
+
:int => Arrow::Int32Array.new([-1, -2, nil, -4, -5, -6]),
|
23
|
+
:uint => Arrow::UInt32Array.new([1, nil, 3, 4, 5, 6]),
|
24
|
+
:float => Arrow::FloatArray.new([nil, 2.2, 3.3, 4.4, 5.5, 6.6]),
|
25
|
+
:string => Arrow::StringArray.new(["a", "b", "c", nil, "e", "f"]),
|
26
|
+
}
|
27
|
+
@table = Arrow::Table.new(raw_table)
|
28
|
+
end
|
29
|
+
|
30
|
+
sub_test_case("key") do
|
31
|
+
test("Time") do
|
32
|
+
time_values = [
|
33
|
+
Time.parse("2018-01-29"),
|
34
|
+
Time.parse("2018-01-30"),
|
35
|
+
]
|
36
|
+
raw_table = {
|
37
|
+
:time => Arrow::ArrayBuilder.build(time_values),
|
38
|
+
:int => Arrow::Int32Array.new([-1, -2]),
|
39
|
+
}
|
40
|
+
table = Arrow::Table.new(raw_table)
|
41
|
+
assert_equal(<<-TABLE, table.group(:time).count.to_s)
|
42
|
+
time int
|
43
|
+
0 2018-01-29T00:00:00+09:00 1
|
44
|
+
1 2018-01-30T00:00:00+09:00 1
|
45
|
+
TABLE
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
sub_test_case("#count") do
|
50
|
+
test("single") do
|
51
|
+
assert_equal(<<-TABLE, @table.group(:group_key1).count.to_s)
|
52
|
+
group_key1 group_key2 int uint float string
|
53
|
+
0 1 2 2 1 1 2
|
54
|
+
1 2 1 0 1 1 1
|
55
|
+
2 3 3 3 3 3 2
|
56
|
+
TABLE
|
57
|
+
end
|
58
|
+
|
59
|
+
test("multiple") do
|
60
|
+
assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).count.to_s)
|
61
|
+
group_key1 group_key2 int uint float string
|
62
|
+
0 1 1 2 1 1 2
|
63
|
+
1 2 1 0 1 1 1
|
64
|
+
2 3 1 1 1 1 0
|
65
|
+
3 3 2 2 2 2 2
|
66
|
+
TABLE
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
sub_test_case("#sum") do
|
71
|
+
test("single") do
|
72
|
+
assert_equal(<<-TABLE, @table.group(:group_key1).sum.to_s)
|
73
|
+
group_key1 group_key2 int uint float
|
74
|
+
0 1 2 -3 1 2.200000
|
75
|
+
1 2 1 0 3 3.300000
|
76
|
+
2 3 5 -15 15 16.500000
|
77
|
+
TABLE
|
78
|
+
end
|
79
|
+
|
80
|
+
test("multiple") do
|
81
|
+
assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).sum.to_s)
|
82
|
+
group_key1 group_key2 int uint float
|
83
|
+
0 1 1 -3 1 2.200000
|
84
|
+
1 2 1 0 3 3.300000
|
85
|
+
2 3 1 -4 4 4.400000
|
86
|
+
3 3 2 -11 11 12.100000
|
87
|
+
TABLE
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
sub_test_case("#average") do
|
92
|
+
test("single") do
|
93
|
+
assert_equal(<<-TABLE, @table.group(:group_key1).average.to_s)
|
94
|
+
group_key1 group_key2 int uint float
|
95
|
+
0 1 1.000000 -1.500000 1.000000 2.200000
|
96
|
+
1 2 1.000000 0.000000 3.000000 3.300000
|
97
|
+
2 3 1.666667 -5.000000 5.000000 5.500000
|
98
|
+
TABLE
|
99
|
+
end
|
100
|
+
|
101
|
+
test("multiple") do
|
102
|
+
assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).average.to_s)
|
103
|
+
group_key1 group_key2 int uint float
|
104
|
+
0 1 1 -1.500000 1.000000 2.200000
|
105
|
+
1 2 1 0.000000 3.000000 3.300000
|
106
|
+
2 3 1 -4.000000 4.000000 4.400000
|
107
|
+
3 3 2 -5.500000 5.500000 6.050000
|
108
|
+
TABLE
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
data/test/test-slicer.rb
CHANGED
@@ -106,6 +106,21 @@ class SlicerTest < Test::Unit::TestCase
|
|
106
106
|
TABLE
|
107
107
|
end
|
108
108
|
|
109
|
+
test("column.valid?") do
|
110
|
+
sliced_table = @table.slice do |slicer|
|
111
|
+
slicer.visible.valid?
|
112
|
+
end
|
113
|
+
assert_equal(<<-TABLE, sliced_table.to_s)
|
114
|
+
count visible
|
115
|
+
0 1 true
|
116
|
+
1 2 false
|
117
|
+
2 8 true
|
118
|
+
3 16 true
|
119
|
+
4 32 false
|
120
|
+
5 256 true
|
121
|
+
TABLE
|
122
|
+
end
|
123
|
+
|
109
124
|
sub_test_case("column ==") do
|
110
125
|
test("nil") do
|
111
126
|
sliced_table = @table.slice do |slicer|
|
@@ -298,6 +313,33 @@ class SlicerTest < Test::Unit::TestCase
|
|
298
313
|
TABLE
|
299
314
|
end
|
300
315
|
|
316
|
+
test("column.in") do
|
317
|
+
sliced_table = @table.slice do |slicer|
|
318
|
+
slicer.count.in?([1, 4, 16, 64])
|
319
|
+
end
|
320
|
+
assert_equal(<<-TABLE, sliced_table.to_s)
|
321
|
+
count visible
|
322
|
+
0 1 true
|
323
|
+
1 4
|
324
|
+
2 16 true
|
325
|
+
3 64
|
326
|
+
TABLE
|
327
|
+
end
|
328
|
+
|
329
|
+
test("!column.in") do
|
330
|
+
sliced_table = @table.slice do |slicer|
|
331
|
+
!slicer.count.in?([1, 4, 16, 64])
|
332
|
+
end
|
333
|
+
assert_equal(<<-TABLE, sliced_table.to_s)
|
334
|
+
count visible
|
335
|
+
0 0
|
336
|
+
1 2 false
|
337
|
+
2 8 true
|
338
|
+
3 32 false
|
339
|
+
4 256 true
|
340
|
+
TABLE
|
341
|
+
end
|
342
|
+
|
301
343
|
test("condition & condition") do
|
302
344
|
sliced_table = @table.slice do |slicer|
|
303
345
|
slicer.visible & (slicer.count >= 16)
|
data/test/test-table.rb
CHANGED
@@ -58,14 +58,21 @@ class TableTest < Test::Unit::TestCase
|
|
58
58
|
TABLE
|
59
59
|
end
|
60
60
|
|
61
|
-
test("Integer") do
|
61
|
+
test("Integer: positive") do
|
62
62
|
assert_equal(<<-TABLE, @table.slice(2).to_s)
|
63
63
|
count visible
|
64
64
|
0 4
|
65
65
|
TABLE
|
66
66
|
end
|
67
67
|
|
68
|
-
test("
|
68
|
+
test("Integer: negative") do
|
69
|
+
assert_equal(<<-TABLE, @table.slice(-1).to_s)
|
70
|
+
count visible
|
71
|
+
0 128
|
72
|
+
TABLE
|
73
|
+
end
|
74
|
+
|
75
|
+
test("Range: positive: include end") do
|
69
76
|
assert_equal(<<-TABLE, @table.slice(2..4).to_s)
|
70
77
|
count visible
|
71
78
|
0 4
|
@@ -74,7 +81,7 @@ class TableTest < Test::Unit::TestCase
|
|
74
81
|
TABLE
|
75
82
|
end
|
76
83
|
|
77
|
-
test("Range: exclude end") do
|
84
|
+
test("Range: positive: exclude end") do
|
78
85
|
assert_equal(<<-TABLE, @table.slice(2...4).to_s)
|
79
86
|
count visible
|
80
87
|
0 4
|
@@ -82,7 +89,24 @@ class TableTest < Test::Unit::TestCase
|
|
82
89
|
TABLE
|
83
90
|
end
|
84
91
|
|
85
|
-
test("
|
92
|
+
test("Range: negative: include end") do
|
93
|
+
assert_equal(<<-TABLE, @table.slice(-4..-2).to_s)
|
94
|
+
count visible
|
95
|
+
0 16 true
|
96
|
+
1 32 false
|
97
|
+
2 64
|
98
|
+
TABLE
|
99
|
+
end
|
100
|
+
|
101
|
+
test("Range: negative: exclude end") do
|
102
|
+
assert_equal(<<-TABLE, @table.slice(-4...-2).to_s)
|
103
|
+
count visible
|
104
|
+
0 16 true
|
105
|
+
1 32 false
|
106
|
+
TABLE
|
107
|
+
end
|
108
|
+
|
109
|
+
test("[from, to]: positive") do
|
86
110
|
assert_equal(<<-TABLE, @table.slice([0, 2]).to_s)
|
87
111
|
count visible
|
88
112
|
0 1 true
|
@@ -90,6 +114,14 @@ class TableTest < Test::Unit::TestCase
|
|
90
114
|
TABLE
|
91
115
|
end
|
92
116
|
|
117
|
+
test("[from, to]: negative") do
|
118
|
+
assert_equal(<<-TABLE, @table.slice([-4, 2]).to_s)
|
119
|
+
count visible
|
120
|
+
0 16 true
|
121
|
+
1 32 false
|
122
|
+
TABLE
|
123
|
+
end
|
124
|
+
|
93
125
|
test("Integer, Range, ...") do
|
94
126
|
assert_equal(<<-TABLE, @table.slice(0, 4...7).to_s)
|
95
127
|
count visible
|
@@ -109,12 +141,6 @@ class TableTest < Test::Unit::TestCase
|
|
109
141
|
test("[Symbol]") do
|
110
142
|
assert_equal(@visible_column, @table[:visible])
|
111
143
|
end
|
112
|
-
|
113
|
-
test("[String, Symbol]") do
|
114
|
-
assert_equal(Arrow::Table.new(@table.schema,
|
115
|
-
[@visible_column, @count_column]).to_s,
|
116
|
-
@table["visible", :count].to_s)
|
117
|
-
end
|
118
144
|
end
|
119
145
|
|
120
146
|
sub_test_case("#merge") do
|
@@ -391,4 +417,20 @@ class TableTest < Test::Unit::TestCase
|
|
391
417
|
end
|
392
418
|
end
|
393
419
|
end
|
420
|
+
|
421
|
+
test("#pack") do
|
422
|
+
packed_table = @table.pack
|
423
|
+
column_n_chunks = packed_table.columns.collect {|c| c.data.n_chunks}
|
424
|
+
assert_equal([[1, 1], <<-TABLE], [column_n_chunks, packed_table.to_s])
|
425
|
+
count visible
|
426
|
+
0 1 true
|
427
|
+
1 2 false
|
428
|
+
2 4
|
429
|
+
3 8 true
|
430
|
+
4 16 true
|
431
|
+
5 32 false
|
432
|
+
6 64
|
433
|
+
7 128
|
434
|
+
TABLE
|
435
|
+
end
|
394
436
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-arrow
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: gobject-introspection
|
@@ -150,11 +150,13 @@ files:
|
|
150
150
|
- lib/arrow/compatibility.rb
|
151
151
|
- lib/arrow/csv-loader.rb
|
152
152
|
- lib/arrow/csv-reader.rb
|
153
|
+
- lib/arrow/data-type.rb
|
153
154
|
- lib/arrow/date32-array-builder.rb
|
154
155
|
- lib/arrow/date32-array.rb
|
155
156
|
- lib/arrow/date64-array-builder.rb
|
156
157
|
- lib/arrow/date64-array.rb
|
157
158
|
- lib/arrow/field.rb
|
159
|
+
- lib/arrow/group.rb
|
158
160
|
- lib/arrow/loader.rb
|
159
161
|
- lib/arrow/record-batch-file-reader.rb
|
160
162
|
- lib/arrow/record-batch-stream-reader.rb
|
@@ -163,14 +165,18 @@ files:
|
|
163
165
|
- lib/arrow/record.rb
|
164
166
|
- lib/arrow/slicer.rb
|
165
167
|
- lib/arrow/table-formatter.rb
|
168
|
+
- lib/arrow/table-list-formatter.rb
|
166
169
|
- lib/arrow/table-loader.rb
|
167
170
|
- lib/arrow/table-saver.rb
|
171
|
+
- lib/arrow/table-table-formatter.rb
|
168
172
|
- lib/arrow/table.rb
|
169
173
|
- lib/arrow/tensor.rb
|
170
174
|
- lib/arrow/timestamp-array-builder.rb
|
171
175
|
- lib/arrow/timestamp-array.rb
|
172
176
|
- lib/arrow/version.rb
|
173
177
|
- red-arrow.gemspec
|
178
|
+
- test/fixture/null-with-double-quote.csv
|
179
|
+
- test/fixture/null-without-double-quote.csv
|
174
180
|
- test/fixture/with-header.csv
|
175
181
|
- test/fixture/without-header.csv
|
176
182
|
- test/helper.rb
|
@@ -184,6 +190,7 @@ files:
|
|
184
190
|
- test/test-csv-reader.rb
|
185
191
|
- test/test-date32-array.rb
|
186
192
|
- test/test-date64-array.rb
|
193
|
+
- test/test-group.rb
|
187
194
|
- test/test-record-batch-file-reader.rb
|
188
195
|
- test/test-record-batch.rb
|
189
196
|
- test/test-slicer.rb
|
@@ -217,7 +224,9 @@ summary: Red Arrow is a Ruby bindings of Apache Arrow. Red Arrow is based on GOb
|
|
217
224
|
test_files:
|
218
225
|
- test/test-record-batch-file-reader.rb
|
219
226
|
- test/fixture/with-header.csv
|
227
|
+
- test/fixture/null-without-double-quote.csv
|
220
228
|
- test/fixture/without-header.csv
|
229
|
+
- test/fixture/null-with-double-quote.csv
|
221
230
|
- test/test-csv-reader.rb
|
222
231
|
- test/test-column.rb
|
223
232
|
- test/test-array-builder.rb
|
@@ -230,6 +239,7 @@ test_files:
|
|
230
239
|
- test/test-table.rb
|
231
240
|
- test/test-csv-loader.rb
|
232
241
|
- test/test-array.rb
|
242
|
+
- test/test-group.rb
|
233
243
|
- test/helper/fixture.rb
|
234
244
|
- test/test-date64-array.rb
|
235
245
|
- test/test-slicer.rb
|