red-arrow 0.8.1 → 0.8.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/arrow/version.rb CHANGED
@@ -13,5 +13,5 @@
13
13
  # limitations under the License.
14
14
 
15
15
  module Arrow
16
- VERSION = "0.8.1"
16
+ VERSION = "0.8.2"
17
17
  end
@@ -0,0 +1,4 @@
1
+ name,score
2
+ alice,10
3
+ bob,""
4
+ chris,-1
@@ -0,0 +1,4 @@
1
+ name,score
2
+ alice,10
3
+ bob,
4
+ chris,-1
data/test/run-test.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
  #
3
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
3
+ # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
4
4
  #
5
5
  # Licensed under the Apache License, Version 2.0 (the "License");
6
6
  # you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
16
 
17
+ ENV["TZ"] = "Asia/Tokyo"
18
+
17
19
  $VERBOSE = true
18
20
 
19
21
  require "pathname"
@@ -14,12 +14,49 @@
14
14
 
15
15
  class ChunkedArrayTest < Test::Unit::TestCase
16
16
  test("#each") do
17
- arrayes = [
17
+ arrays = [
18
18
  Arrow::BooleanArray.new([true, false]),
19
19
  Arrow::BooleanArray.new([nil, true]),
20
20
  ]
21
- chunked_array = Arrow::ChunkedArray.new(arrayes)
21
+ chunked_array = Arrow::ChunkedArray.new(arrays)
22
22
  assert_equal([true, false, nil, true],
23
23
  chunked_array.to_a)
24
24
  end
25
+
26
+ sub_test_case("#pack") do
27
+ test("basic array") do
28
+ arrays = [
29
+ Arrow::BooleanArray.new([true, false]),
30
+ Arrow::BooleanArray.new([nil, true]),
31
+ ]
32
+ chunked_array = Arrow::ChunkedArray.new(arrays)
33
+ packed_chunked_array = chunked_array.pack
34
+ assert_equal([
35
+ Arrow::BooleanArray,
36
+ [true, false, nil, true],
37
+ ],
38
+ [
39
+ packed_chunked_array.class,
40
+ packed_chunked_array.to_a,
41
+ ])
42
+ end
43
+
44
+ test("TimestampArray") do
45
+ type = Arrow::TimestampDataType.new(:nano)
46
+ arrays = [
47
+ Arrow::TimestampArrayBuilder.new(type).build([Time.at(0)]),
48
+ Arrow::TimestampArrayBuilder.new(type).build([Time.at(1)]),
49
+ ]
50
+ chunked_array = Arrow::ChunkedArray.new(arrays)
51
+ packed_chunked_array = chunked_array.pack
52
+ assert_equal([
53
+ Arrow::TimestampArray,
54
+ [Time.at(0), Time.at(1)],
55
+ ],
56
+ [
57
+ packed_chunked_array.class,
58
+ packed_chunked_array.to_a,
59
+ ])
60
+ end
61
+ end
25
62
  end
data/test/test-column.rb CHANGED
@@ -24,4 +24,17 @@ class ColumnTest < Test::Unit::TestCase
24
24
  assert_equal([true, false, nil, true],
25
25
  column.to_a)
26
26
  end
27
+
28
+ test("#pack") do
29
+ arrays = [
30
+ Arrow::BooleanArray.new([true, false]),
31
+ Arrow::BooleanArray.new([nil, true]),
32
+ ]
33
+ chunked_array = Arrow::ChunkedArray.new(arrays)
34
+ column = Arrow::Column.new(Arrow::Field.new("visible", :boolean),
35
+ chunked_array)
36
+ packed_column = column.pack
37
+ assert_equal([1, [true, false, nil, true]],
38
+ [packed_column.data.n_chunks, packed_column.to_a])
39
+ end
27
40
  end
@@ -75,5 +75,25 @@ class CSVLoaderTest < Test::Unit::TestCase
75
75
  2 chris -1
76
76
  TABLE
77
77
  end
78
+
79
+ test("null: with double quote") do
80
+ path = fixture_path("null-with-double-quote.csv").to_s
81
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
82
+ name score
83
+ 0 alice 10
84
+ 1 bob
85
+ 2 chris -1
86
+ TABLE
87
+ end
88
+
89
+ test("null: without double quote") do
90
+ path = fixture_path("null-without-double-quote.csv").to_s
91
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
92
+ name score
93
+ 0 alice 10
94
+ 1 bob
95
+ 2 chris -1
96
+ TABLE
97
+ end
78
98
  end
79
99
  end
@@ -0,0 +1,111 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ class GroupTest < Test::Unit::TestCase
16
+ include Helper::Fixture
17
+
18
+ def setup
19
+ raw_table = {
20
+ :group_key1 => Arrow::UInt8Array.new([1, 1, 2, 3, 3, 3]),
21
+ :group_key2 => Arrow::UInt8Array.new([1, 1, 1, 1, 2, 2]),
22
+ :int => Arrow::Int32Array.new([-1, -2, nil, -4, -5, -6]),
23
+ :uint => Arrow::UInt32Array.new([1, nil, 3, 4, 5, 6]),
24
+ :float => Arrow::FloatArray.new([nil, 2.2, 3.3, 4.4, 5.5, 6.6]),
25
+ :string => Arrow::StringArray.new(["a", "b", "c", nil, "e", "f"]),
26
+ }
27
+ @table = Arrow::Table.new(raw_table)
28
+ end
29
+
30
+ sub_test_case("key") do
31
+ test("Time") do
32
+ time_values = [
33
+ Time.parse("2018-01-29"),
34
+ Time.parse("2018-01-30"),
35
+ ]
36
+ raw_table = {
37
+ :time => Arrow::ArrayBuilder.build(time_values),
38
+ :int => Arrow::Int32Array.new([-1, -2]),
39
+ }
40
+ table = Arrow::Table.new(raw_table)
41
+ assert_equal(<<-TABLE, table.group(:time).count.to_s)
42
+ time int
43
+ 0 2018-01-29T00:00:00+09:00 1
44
+ 1 2018-01-30T00:00:00+09:00 1
45
+ TABLE
46
+ end
47
+ end
48
+
49
+ sub_test_case("#count") do
50
+ test("single") do
51
+ assert_equal(<<-TABLE, @table.group(:group_key1).count.to_s)
52
+ group_key1 group_key2 int uint float string
53
+ 0 1 2 2 1 1 2
54
+ 1 2 1 0 1 1 1
55
+ 2 3 3 3 3 3 2
56
+ TABLE
57
+ end
58
+
59
+ test("multiple") do
60
+ assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).count.to_s)
61
+ group_key1 group_key2 int uint float string
62
+ 0 1 1 2 1 1 2
63
+ 1 2 1 0 1 1 1
64
+ 2 3 1 1 1 1 0
65
+ 3 3 2 2 2 2 2
66
+ TABLE
67
+ end
68
+ end
69
+
70
+ sub_test_case("#sum") do
71
+ test("single") do
72
+ assert_equal(<<-TABLE, @table.group(:group_key1).sum.to_s)
73
+ group_key1 group_key2 int uint float
74
+ 0 1 2 -3 1 2.200000
75
+ 1 2 1 0 3 3.300000
76
+ 2 3 5 -15 15 16.500000
77
+ TABLE
78
+ end
79
+
80
+ test("multiple") do
81
+ assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).sum.to_s)
82
+ group_key1 group_key2 int uint float
83
+ 0 1 1 -3 1 2.200000
84
+ 1 2 1 0 3 3.300000
85
+ 2 3 1 -4 4 4.400000
86
+ 3 3 2 -11 11 12.100000
87
+ TABLE
88
+ end
89
+ end
90
+
91
+ sub_test_case("#average") do
92
+ test("single") do
93
+ assert_equal(<<-TABLE, @table.group(:group_key1).average.to_s)
94
+ group_key1 group_key2 int uint float
95
+ 0 1 1.000000 -1.500000 1.000000 2.200000
96
+ 1 2 1.000000 0.000000 3.000000 3.300000
97
+ 2 3 1.666667 -5.000000 5.000000 5.500000
98
+ TABLE
99
+ end
100
+
101
+ test("multiple") do
102
+ assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).average.to_s)
103
+ group_key1 group_key2 int uint float
104
+ 0 1 1 -1.500000 1.000000 2.200000
105
+ 1 2 1 0.000000 3.000000 3.300000
106
+ 2 3 1 -4.000000 4.000000 4.400000
107
+ 3 3 2 -5.500000 5.500000 6.050000
108
+ TABLE
109
+ end
110
+ end
111
+ end
data/test/test-slicer.rb CHANGED
@@ -106,6 +106,21 @@ class SlicerTest < Test::Unit::TestCase
106
106
  TABLE
107
107
  end
108
108
 
109
+ test("column.valid?") do
110
+ sliced_table = @table.slice do |slicer|
111
+ slicer.visible.valid?
112
+ end
113
+ assert_equal(<<-TABLE, sliced_table.to_s)
114
+ count visible
115
+ 0 1 true
116
+ 1 2 false
117
+ 2 8 true
118
+ 3 16 true
119
+ 4 32 false
120
+ 5 256 true
121
+ TABLE
122
+ end
123
+
109
124
  sub_test_case("column ==") do
110
125
  test("nil") do
111
126
  sliced_table = @table.slice do |slicer|
@@ -298,6 +313,33 @@ class SlicerTest < Test::Unit::TestCase
298
313
  TABLE
299
314
  end
300
315
 
316
+ test("column.in") do
317
+ sliced_table = @table.slice do |slicer|
318
+ slicer.count.in?([1, 4, 16, 64])
319
+ end
320
+ assert_equal(<<-TABLE, sliced_table.to_s)
321
+ count visible
322
+ 0 1 true
323
+ 1 4
324
+ 2 16 true
325
+ 3 64
326
+ TABLE
327
+ end
328
+
329
+ test("!column.in") do
330
+ sliced_table = @table.slice do |slicer|
331
+ !slicer.count.in?([1, 4, 16, 64])
332
+ end
333
+ assert_equal(<<-TABLE, sliced_table.to_s)
334
+ count visible
335
+ 0 0
336
+ 1 2 false
337
+ 2 8 true
338
+ 3 32 false
339
+ 4 256 true
340
+ TABLE
341
+ end
342
+
301
343
  test("condition & condition") do
302
344
  sliced_table = @table.slice do |slicer|
303
345
  slicer.visible & (slicer.count >= 16)
data/test/test-table.rb CHANGED
@@ -58,14 +58,21 @@ class TableTest < Test::Unit::TestCase
58
58
  TABLE
59
59
  end
60
60
 
61
- test("Integer") do
61
+ test("Integer: positive") do
62
62
  assert_equal(<<-TABLE, @table.slice(2).to_s)
63
63
  count visible
64
64
  0 4
65
65
  TABLE
66
66
  end
67
67
 
68
- test("Range: include end") do
68
+ test("Integer: negative") do
69
+ assert_equal(<<-TABLE, @table.slice(-1).to_s)
70
+ count visible
71
+ 0 128
72
+ TABLE
73
+ end
74
+
75
+ test("Range: positive: include end") do
69
76
  assert_equal(<<-TABLE, @table.slice(2..4).to_s)
70
77
  count visible
71
78
  0 4
@@ -74,7 +81,7 @@ class TableTest < Test::Unit::TestCase
74
81
  TABLE
75
82
  end
76
83
 
77
- test("Range: exclude end") do
84
+ test("Range: positive: exclude end") do
78
85
  assert_equal(<<-TABLE, @table.slice(2...4).to_s)
79
86
  count visible
80
87
  0 4
@@ -82,7 +89,24 @@ class TableTest < Test::Unit::TestCase
82
89
  TABLE
83
90
  end
84
91
 
85
- test("[from, to]") do
92
+ test("Range: negative: include end") do
93
+ assert_equal(<<-TABLE, @table.slice(-4..-2).to_s)
94
+ count visible
95
+ 0 16 true
96
+ 1 32 false
97
+ 2 64
98
+ TABLE
99
+ end
100
+
101
+ test("Range: negative: exclude end") do
102
+ assert_equal(<<-TABLE, @table.slice(-4...-2).to_s)
103
+ count visible
104
+ 0 16 true
105
+ 1 32 false
106
+ TABLE
107
+ end
108
+
109
+ test("[from, to]: positive") do
86
110
  assert_equal(<<-TABLE, @table.slice([0, 2]).to_s)
87
111
  count visible
88
112
  0 1 true
@@ -90,6 +114,14 @@ class TableTest < Test::Unit::TestCase
90
114
  TABLE
91
115
  end
92
116
 
117
+ test("[from, to]: negative") do
118
+ assert_equal(<<-TABLE, @table.slice([-4, 2]).to_s)
119
+ count visible
120
+ 0 16 true
121
+ 1 32 false
122
+ TABLE
123
+ end
124
+
93
125
  test("Integer, Range, ...") do
94
126
  assert_equal(<<-TABLE, @table.slice(0, 4...7).to_s)
95
127
  count visible
@@ -109,12 +141,6 @@ class TableTest < Test::Unit::TestCase
109
141
  test("[Symbol]") do
110
142
  assert_equal(@visible_column, @table[:visible])
111
143
  end
112
-
113
- test("[String, Symbol]") do
114
- assert_equal(Arrow::Table.new(@table.schema,
115
- [@visible_column, @count_column]).to_s,
116
- @table["visible", :count].to_s)
117
- end
118
144
  end
119
145
 
120
146
  sub_test_case("#merge") do
@@ -391,4 +417,20 @@ class TableTest < Test::Unit::TestCase
391
417
  end
392
418
  end
393
419
  end
420
+
421
+ test("#pack") do
422
+ packed_table = @table.pack
423
+ column_n_chunks = packed_table.columns.collect {|c| c.data.n_chunks}
424
+ assert_equal([[1, 1], <<-TABLE], [column_n_chunks, packed_table.to_s])
425
+ count visible
426
+ 0 1 true
427
+ 1 2 false
428
+ 2 4
429
+ 3 8 true
430
+ 4 16 true
431
+ 5 32 false
432
+ 6 64
433
+ 7 128
434
+ TABLE
435
+ end
394
436
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.8.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-05 00:00:00.000000000 Z
11
+ date: 2018-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: gobject-introspection
@@ -150,11 +150,13 @@ files:
150
150
  - lib/arrow/compatibility.rb
151
151
  - lib/arrow/csv-loader.rb
152
152
  - lib/arrow/csv-reader.rb
153
+ - lib/arrow/data-type.rb
153
154
  - lib/arrow/date32-array-builder.rb
154
155
  - lib/arrow/date32-array.rb
155
156
  - lib/arrow/date64-array-builder.rb
156
157
  - lib/arrow/date64-array.rb
157
158
  - lib/arrow/field.rb
159
+ - lib/arrow/group.rb
158
160
  - lib/arrow/loader.rb
159
161
  - lib/arrow/record-batch-file-reader.rb
160
162
  - lib/arrow/record-batch-stream-reader.rb
@@ -163,14 +165,18 @@ files:
163
165
  - lib/arrow/record.rb
164
166
  - lib/arrow/slicer.rb
165
167
  - lib/arrow/table-formatter.rb
168
+ - lib/arrow/table-list-formatter.rb
166
169
  - lib/arrow/table-loader.rb
167
170
  - lib/arrow/table-saver.rb
171
+ - lib/arrow/table-table-formatter.rb
168
172
  - lib/arrow/table.rb
169
173
  - lib/arrow/tensor.rb
170
174
  - lib/arrow/timestamp-array-builder.rb
171
175
  - lib/arrow/timestamp-array.rb
172
176
  - lib/arrow/version.rb
173
177
  - red-arrow.gemspec
178
+ - test/fixture/null-with-double-quote.csv
179
+ - test/fixture/null-without-double-quote.csv
174
180
  - test/fixture/with-header.csv
175
181
  - test/fixture/without-header.csv
176
182
  - test/helper.rb
@@ -184,6 +190,7 @@ files:
184
190
  - test/test-csv-reader.rb
185
191
  - test/test-date32-array.rb
186
192
  - test/test-date64-array.rb
193
+ - test/test-group.rb
187
194
  - test/test-record-batch-file-reader.rb
188
195
  - test/test-record-batch.rb
189
196
  - test/test-slicer.rb
@@ -217,7 +224,9 @@ summary: Red Arrow is a Ruby bindings of Apache Arrow. Red Arrow is based on GOb
217
224
  test_files:
218
225
  - test/test-record-batch-file-reader.rb
219
226
  - test/fixture/with-header.csv
227
+ - test/fixture/null-without-double-quote.csv
220
228
  - test/fixture/without-header.csv
229
+ - test/fixture/null-with-double-quote.csv
221
230
  - test/test-csv-reader.rb
222
231
  - test/test-column.rb
223
232
  - test/test-array-builder.rb
@@ -230,6 +239,7 @@ test_files:
230
239
  - test/test-table.rb
231
240
  - test/test-csv-loader.rb
232
241
  - test/test-array.rb
242
+ - test/test-group.rb
233
243
  - test/helper/fixture.rb
234
244
  - test/test-date64-array.rb
235
245
  - test/test-slicer.rb