red-arrow 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/arrow/version.rb CHANGED
@@ -13,5 +13,5 @@
13
13
  # limitations under the License.
14
14
 
15
15
  module Arrow
16
- VERSION = "0.8.1"
16
+ VERSION = "0.8.2"
17
17
  end
@@ -0,0 +1,4 @@
1
+ name,score
2
+ alice,10
3
+ bob,""
4
+ chris,-1
@@ -0,0 +1,4 @@
1
+ name,score
2
+ alice,10
3
+ bob,
4
+ chris,-1
data/test/run-test.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
  #
3
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
3
+ # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
4
4
  #
5
5
  # Licensed under the Apache License, Version 2.0 (the "License");
6
6
  # you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
16
 
17
+ ENV["TZ"] = "Asia/Tokyo"
18
+
17
19
  $VERBOSE = true
18
20
 
19
21
  require "pathname"
@@ -14,12 +14,49 @@
14
14
 
15
15
  class ChunkedArrayTest < Test::Unit::TestCase
16
16
  test("#each") do
17
- arrayes = [
17
+ arrays = [
18
18
  Arrow::BooleanArray.new([true, false]),
19
19
  Arrow::BooleanArray.new([nil, true]),
20
20
  ]
21
- chunked_array = Arrow::ChunkedArray.new(arrayes)
21
+ chunked_array = Arrow::ChunkedArray.new(arrays)
22
22
  assert_equal([true, false, nil, true],
23
23
  chunked_array.to_a)
24
24
  end
25
+
26
+ sub_test_case("#pack") do
27
+ test("basic array") do
28
+ arrays = [
29
+ Arrow::BooleanArray.new([true, false]),
30
+ Arrow::BooleanArray.new([nil, true]),
31
+ ]
32
+ chunked_array = Arrow::ChunkedArray.new(arrays)
33
+ packed_chunked_array = chunked_array.pack
34
+ assert_equal([
35
+ Arrow::BooleanArray,
36
+ [true, false, nil, true],
37
+ ],
38
+ [
39
+ packed_chunked_array.class,
40
+ packed_chunked_array.to_a,
41
+ ])
42
+ end
43
+
44
+ test("TimestampArray") do
45
+ type = Arrow::TimestampDataType.new(:nano)
46
+ arrays = [
47
+ Arrow::TimestampArrayBuilder.new(type).build([Time.at(0)]),
48
+ Arrow::TimestampArrayBuilder.new(type).build([Time.at(1)]),
49
+ ]
50
+ chunked_array = Arrow::ChunkedArray.new(arrays)
51
+ packed_chunked_array = chunked_array.pack
52
+ assert_equal([
53
+ Arrow::TimestampArray,
54
+ [Time.at(0), Time.at(1)],
55
+ ],
56
+ [
57
+ packed_chunked_array.class,
58
+ packed_chunked_array.to_a,
59
+ ])
60
+ end
61
+ end
25
62
  end
data/test/test-column.rb CHANGED
@@ -24,4 +24,17 @@ class ColumnTest < Test::Unit::TestCase
24
24
  assert_equal([true, false, nil, true],
25
25
  column.to_a)
26
26
  end
27
+
28
+ test("#pack") do
29
+ arrays = [
30
+ Arrow::BooleanArray.new([true, false]),
31
+ Arrow::BooleanArray.new([nil, true]),
32
+ ]
33
+ chunked_array = Arrow::ChunkedArray.new(arrays)
34
+ column = Arrow::Column.new(Arrow::Field.new("visible", :boolean),
35
+ chunked_array)
36
+ packed_column = column.pack
37
+ assert_equal([1, [true, false, nil, true]],
38
+ [packed_column.data.n_chunks, packed_column.to_a])
39
+ end
27
40
  end
@@ -75,5 +75,25 @@ class CSVLoaderTest < Test::Unit::TestCase
75
75
  2 chris -1
76
76
  TABLE
77
77
  end
78
+
79
+ test("null: with double quote") do
80
+ path = fixture_path("null-with-double-quote.csv").to_s
81
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
82
+ name score
83
+ 0 alice 10
84
+ 1 bob
85
+ 2 chris -1
86
+ TABLE
87
+ end
88
+
89
+ test("null: without double quote") do
90
+ path = fixture_path("null-without-double-quote.csv").to_s
91
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
92
+ name score
93
+ 0 alice 10
94
+ 1 bob
95
+ 2 chris -1
96
+ TABLE
97
+ end
78
98
  end
79
99
  end
@@ -0,0 +1,111 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ class GroupTest < Test::Unit::TestCase
16
+ include Helper::Fixture
17
+
18
+ def setup
19
+ raw_table = {
20
+ :group_key1 => Arrow::UInt8Array.new([1, 1, 2, 3, 3, 3]),
21
+ :group_key2 => Arrow::UInt8Array.new([1, 1, 1, 1, 2, 2]),
22
+ :int => Arrow::Int32Array.new([-1, -2, nil, -4, -5, -6]),
23
+ :uint => Arrow::UInt32Array.new([1, nil, 3, 4, 5, 6]),
24
+ :float => Arrow::FloatArray.new([nil, 2.2, 3.3, 4.4, 5.5, 6.6]),
25
+ :string => Arrow::StringArray.new(["a", "b", "c", nil, "e", "f"]),
26
+ }
27
+ @table = Arrow::Table.new(raw_table)
28
+ end
29
+
30
+ sub_test_case("key") do
31
+ test("Time") do
32
+ time_values = [
33
+ Time.parse("2018-01-29"),
34
+ Time.parse("2018-01-30"),
35
+ ]
36
+ raw_table = {
37
+ :time => Arrow::ArrayBuilder.build(time_values),
38
+ :int => Arrow::Int32Array.new([-1, -2]),
39
+ }
40
+ table = Arrow::Table.new(raw_table)
41
+ assert_equal(<<-TABLE, table.group(:time).count.to_s)
42
+ time int
43
+ 0 2018-01-29T00:00:00+09:00 1
44
+ 1 2018-01-30T00:00:00+09:00 1
45
+ TABLE
46
+ end
47
+ end
48
+
49
+ sub_test_case("#count") do
50
+ test("single") do
51
+ assert_equal(<<-TABLE, @table.group(:group_key1).count.to_s)
52
+ group_key1 group_key2 int uint float string
53
+ 0 1 2 2 1 1 2
54
+ 1 2 1 0 1 1 1
55
+ 2 3 3 3 3 3 2
56
+ TABLE
57
+ end
58
+
59
+ test("multiple") do
60
+ assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).count.to_s)
61
+ group_key1 group_key2 int uint float string
62
+ 0 1 1 2 1 1 2
63
+ 1 2 1 0 1 1 1
64
+ 2 3 1 1 1 1 0
65
+ 3 3 2 2 2 2 2
66
+ TABLE
67
+ end
68
+ end
69
+
70
+ sub_test_case("#sum") do
71
+ test("single") do
72
+ assert_equal(<<-TABLE, @table.group(:group_key1).sum.to_s)
73
+ group_key1 group_key2 int uint float
74
+ 0 1 2 -3 1 2.200000
75
+ 1 2 1 0 3 3.300000
76
+ 2 3 5 -15 15 16.500000
77
+ TABLE
78
+ end
79
+
80
+ test("multiple") do
81
+ assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).sum.to_s)
82
+ group_key1 group_key2 int uint float
83
+ 0 1 1 -3 1 2.200000
84
+ 1 2 1 0 3 3.300000
85
+ 2 3 1 -4 4 4.400000
86
+ 3 3 2 -11 11 12.100000
87
+ TABLE
88
+ end
89
+ end
90
+
91
+ sub_test_case("#average") do
92
+ test("single") do
93
+ assert_equal(<<-TABLE, @table.group(:group_key1).average.to_s)
94
+ group_key1 group_key2 int uint float
95
+ 0 1 1.000000 -1.500000 1.000000 2.200000
96
+ 1 2 1.000000 0.000000 3.000000 3.300000
97
+ 2 3 1.666667 -5.000000 5.000000 5.500000
98
+ TABLE
99
+ end
100
+
101
+ test("multiple") do
102
+ assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).average.to_s)
103
+ group_key1 group_key2 int uint float
104
+ 0 1 1 -1.500000 1.000000 2.200000
105
+ 1 2 1 0.000000 3.000000 3.300000
106
+ 2 3 1 -4.000000 4.000000 4.400000
107
+ 3 3 2 -5.500000 5.500000 6.050000
108
+ TABLE
109
+ end
110
+ end
111
+ end
data/test/test-slicer.rb CHANGED
@@ -106,6 +106,21 @@ class SlicerTest < Test::Unit::TestCase
106
106
  TABLE
107
107
  end
108
108
 
109
+ test("column.valid?") do
110
+ sliced_table = @table.slice do |slicer|
111
+ slicer.visible.valid?
112
+ end
113
+ assert_equal(<<-TABLE, sliced_table.to_s)
114
+ count visible
115
+ 0 1 true
116
+ 1 2 false
117
+ 2 8 true
118
+ 3 16 true
119
+ 4 32 false
120
+ 5 256 true
121
+ TABLE
122
+ end
123
+
109
124
  sub_test_case("column ==") do
110
125
  test("nil") do
111
126
  sliced_table = @table.slice do |slicer|
@@ -298,6 +313,33 @@ class SlicerTest < Test::Unit::TestCase
298
313
  TABLE
299
314
  end
300
315
 
316
+ test("column.in") do
317
+ sliced_table = @table.slice do |slicer|
318
+ slicer.count.in?([1, 4, 16, 64])
319
+ end
320
+ assert_equal(<<-TABLE, sliced_table.to_s)
321
+ count visible
322
+ 0 1 true
323
+ 1 4
324
+ 2 16 true
325
+ 3 64
326
+ TABLE
327
+ end
328
+
329
+ test("!column.in") do
330
+ sliced_table = @table.slice do |slicer|
331
+ !slicer.count.in?([1, 4, 16, 64])
332
+ end
333
+ assert_equal(<<-TABLE, sliced_table.to_s)
334
+ count visible
335
+ 0 0
336
+ 1 2 false
337
+ 2 8 true
338
+ 3 32 false
339
+ 4 256 true
340
+ TABLE
341
+ end
342
+
301
343
  test("condition & condition") do
302
344
  sliced_table = @table.slice do |slicer|
303
345
  slicer.visible & (slicer.count >= 16)
data/test/test-table.rb CHANGED
@@ -58,14 +58,21 @@ class TableTest < Test::Unit::TestCase
58
58
  TABLE
59
59
  end
60
60
 
61
- test("Integer") do
61
+ test("Integer: positive") do
62
62
  assert_equal(<<-TABLE, @table.slice(2).to_s)
63
63
  count visible
64
64
  0 4
65
65
  TABLE
66
66
  end
67
67
 
68
- test("Range: include end") do
68
+ test("Integer: negative") do
69
+ assert_equal(<<-TABLE, @table.slice(-1).to_s)
70
+ count visible
71
+ 0 128
72
+ TABLE
73
+ end
74
+
75
+ test("Range: positive: include end") do
69
76
  assert_equal(<<-TABLE, @table.slice(2..4).to_s)
70
77
  count visible
71
78
  0 4
@@ -74,7 +81,7 @@ class TableTest < Test::Unit::TestCase
74
81
  TABLE
75
82
  end
76
83
 
77
- test("Range: exclude end") do
84
+ test("Range: positive: exclude end") do
78
85
  assert_equal(<<-TABLE, @table.slice(2...4).to_s)
79
86
  count visible
80
87
  0 4
@@ -82,7 +89,24 @@ class TableTest < Test::Unit::TestCase
82
89
  TABLE
83
90
  end
84
91
 
85
- test("[from, to]") do
92
+ test("Range: negative: include end") do
93
+ assert_equal(<<-TABLE, @table.slice(-4..-2).to_s)
94
+ count visible
95
+ 0 16 true
96
+ 1 32 false
97
+ 2 64
98
+ TABLE
99
+ end
100
+
101
+ test("Range: negative: exclude end") do
102
+ assert_equal(<<-TABLE, @table.slice(-4...-2).to_s)
103
+ count visible
104
+ 0 16 true
105
+ 1 32 false
106
+ TABLE
107
+ end
108
+
109
+ test("[from, to]: positive") do
86
110
  assert_equal(<<-TABLE, @table.slice([0, 2]).to_s)
87
111
  count visible
88
112
  0 1 true
@@ -90,6 +114,14 @@ class TableTest < Test::Unit::TestCase
90
114
  TABLE
91
115
  end
92
116
 
117
+ test("[from, to]: negative") do
118
+ assert_equal(<<-TABLE, @table.slice([-4, 2]).to_s)
119
+ count visible
120
+ 0 16 true
121
+ 1 32 false
122
+ TABLE
123
+ end
124
+
93
125
  test("Integer, Range, ...") do
94
126
  assert_equal(<<-TABLE, @table.slice(0, 4...7).to_s)
95
127
  count visible
@@ -109,12 +141,6 @@ class TableTest < Test::Unit::TestCase
109
141
  test("[Symbol]") do
110
142
  assert_equal(@visible_column, @table[:visible])
111
143
  end
112
-
113
- test("[String, Symbol]") do
114
- assert_equal(Arrow::Table.new(@table.schema,
115
- [@visible_column, @count_column]).to_s,
116
- @table["visible", :count].to_s)
117
- end
118
144
  end
119
145
 
120
146
  sub_test_case("#merge") do
@@ -391,4 +417,20 @@ class TableTest < Test::Unit::TestCase
391
417
  end
392
418
  end
393
419
  end
420
+
421
+ test("#pack") do
422
+ packed_table = @table.pack
423
+ column_n_chunks = packed_table.columns.collect {|c| c.data.n_chunks}
424
+ assert_equal([[1, 1], <<-TABLE], [column_n_chunks, packed_table.to_s])
425
+ count visible
426
+ 0 1 true
427
+ 1 2 false
428
+ 2 4
429
+ 3 8 true
430
+ 4 16 true
431
+ 5 32 false
432
+ 6 64
433
+ 7 128
434
+ TABLE
435
+ end
394
436
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.8.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-05 00:00:00.000000000 Z
11
+ date: 2018-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: gobject-introspection
@@ -150,11 +150,13 @@ files:
150
150
  - lib/arrow/compatibility.rb
151
151
  - lib/arrow/csv-loader.rb
152
152
  - lib/arrow/csv-reader.rb
153
+ - lib/arrow/data-type.rb
153
154
  - lib/arrow/date32-array-builder.rb
154
155
  - lib/arrow/date32-array.rb
155
156
  - lib/arrow/date64-array-builder.rb
156
157
  - lib/arrow/date64-array.rb
157
158
  - lib/arrow/field.rb
159
+ - lib/arrow/group.rb
158
160
  - lib/arrow/loader.rb
159
161
  - lib/arrow/record-batch-file-reader.rb
160
162
  - lib/arrow/record-batch-stream-reader.rb
@@ -163,14 +165,18 @@ files:
163
165
  - lib/arrow/record.rb
164
166
  - lib/arrow/slicer.rb
165
167
  - lib/arrow/table-formatter.rb
168
+ - lib/arrow/table-list-formatter.rb
166
169
  - lib/arrow/table-loader.rb
167
170
  - lib/arrow/table-saver.rb
171
+ - lib/arrow/table-table-formatter.rb
168
172
  - lib/arrow/table.rb
169
173
  - lib/arrow/tensor.rb
170
174
  - lib/arrow/timestamp-array-builder.rb
171
175
  - lib/arrow/timestamp-array.rb
172
176
  - lib/arrow/version.rb
173
177
  - red-arrow.gemspec
178
+ - test/fixture/null-with-double-quote.csv
179
+ - test/fixture/null-without-double-quote.csv
174
180
  - test/fixture/with-header.csv
175
181
  - test/fixture/without-header.csv
176
182
  - test/helper.rb
@@ -184,6 +190,7 @@ files:
184
190
  - test/test-csv-reader.rb
185
191
  - test/test-date32-array.rb
186
192
  - test/test-date64-array.rb
193
+ - test/test-group.rb
187
194
  - test/test-record-batch-file-reader.rb
188
195
  - test/test-record-batch.rb
189
196
  - test/test-slicer.rb
@@ -217,7 +224,9 @@ summary: Red Arrow is a Ruby bindings of Apache Arrow. Red Arrow is based on GOb
217
224
  test_files:
218
225
  - test/test-record-batch-file-reader.rb
219
226
  - test/fixture/with-header.csv
227
+ - test/fixture/null-without-double-quote.csv
220
228
  - test/fixture/without-header.csv
229
+ - test/fixture/null-with-double-quote.csv
221
230
  - test/test-csv-reader.rb
222
231
  - test/test-column.rb
223
232
  - test/test-array-builder.rb
@@ -230,6 +239,7 @@ test_files:
230
239
  - test/test-table.rb
231
240
  - test/test-csv-loader.rb
232
241
  - test/test-array.rb
242
+ - test/test-group.rb
233
243
  - test/helper/fixture.rb
234
244
  - test/test-date64-array.rb
235
245
  - test/test-slicer.rb