red-arrow 11.0.0 → 12.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/converters.hpp +12 -27
  4. data/lib/arrow/array-computable.rb +13 -0
  5. data/lib/arrow/data-type.rb +9 -0
  6. data/lib/arrow/dense-union-array-builder.rb +49 -0
  7. data/lib/arrow/dense-union-array.rb +26 -0
  8. data/lib/arrow/loader.rb +5 -0
  9. data/lib/arrow/record-batch-file-reader.rb +2 -0
  10. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  11. data/lib/arrow/scalar.rb +67 -0
  12. data/lib/arrow/slicer.rb +61 -0
  13. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  14. data/lib/arrow/sparse-union-array.rb +26 -0
  15. data/lib/arrow/struct-array-builder.rb +0 -5
  16. data/lib/arrow/table.rb +130 -10
  17. data/lib/arrow/union-array-builder.rb +59 -0
  18. data/lib/arrow/version.rb +1 -1
  19. data/test/raw-records/test-dense-union-array.rb +90 -45
  20. data/test/raw-records/test-list-array.rb +28 -10
  21. data/test/raw-records/test-map-array.rb +39 -10
  22. data/test/raw-records/test-sparse-union-array.rb +86 -41
  23. data/test/raw-records/test-struct-array.rb +22 -8
  24. data/test/test-array.rb +7 -0
  25. data/test/test-chunked-array.rb +9 -0
  26. data/test/test-dense-union-array.rb +42 -0
  27. data/test/test-dense-union-data-type.rb +1 -1
  28. data/test/test-function.rb +7 -7
  29. data/test/test-group.rb +58 -58
  30. data/test/test-record-batch-file-reader.rb +21 -0
  31. data/test/test-record-batch-stream-reader.rb +129 -0
  32. data/test/test-scalar.rb +65 -0
  33. data/test/test-slicer.rb +194 -129
  34. data/test/test-sparse-union-array.rb +38 -0
  35. data/test/test-table.rb +200 -38
  36. data/test/values/test-dense-union-array.rb +88 -45
  37. data/test/values/test-list-array.rb +26 -10
  38. data/test/values/test-map-array.rb +33 -10
  39. data/test/values/test-sparse-union-array.rb +84 -41
  40. data/test/values/test-struct-array.rb +20 -8
  41. metadata +20 -7
@@ -35,7 +35,7 @@ class DenseUnionDataTypeTest < Test::Unit::TestCase
35
35
  test("description") do
36
36
  assert_equal("dense_union<visible: bool=2, count: int32=9>",
37
37
  Arrow::DenseUnionDataType.new(fields: @fields,
38
- type_codes: [2, 9]).to_s)
38
+ type_codes: [2, 9]).to_s)
39
39
  end
40
40
  end
41
41
  end
@@ -199,12 +199,12 @@ class FunctionTest < Test::Unit::TestCase
199
199
  end
200
200
 
201
201
  def test_call
202
- or_function = Arrow::Function.find("or")
203
- args = [
204
- Arrow::BooleanArray.new([true, false, false]),
205
- Arrow::BooleanArray.new([true, false, true]),
206
- ]
207
- assert_equal([true, false, true],
208
- or_function.call(args).value.to_a)
202
+ or_function = Arrow::Function.find("or")
203
+ args = [
204
+ Arrow::BooleanArray.new([true, false, false]),
205
+ Arrow::BooleanArray.new([true, false, true]),
206
+ ]
207
+ assert_equal([true, false, true],
208
+ or_function.call(args).value.to_a)
209
209
  end
210
210
  end
data/test/test-group.rb CHANGED
@@ -42,9 +42,9 @@ class GroupTest < Test::Unit::TestCase
42
42
  }
43
43
  table = Arrow::Table.new(raw_table)
44
44
  assert_equal(<<-TABLE, table.group(:time).count.to_s)
45
- count(int) time
46
- 0 1 #{time_values[0].iso8601}
47
- 1 1 #{time_values[1].iso8601}
45
+ time count(int)
46
+ 0 #{time_values[0].iso8601} 1
47
+ 1 #{time_values[1].iso8601} 1
48
48
  TABLE
49
49
  end
50
50
  end
@@ -52,31 +52,31 @@ class GroupTest < Test::Unit::TestCase
52
52
  sub_test_case("#count") do
53
53
  test("single") do
54
54
  assert_equal(<<-TABLE, @table.group(:group_key1).count.to_s)
55
- count(group_key2) count(int) count(uint) count(float) count(string) group_key1
56
- 0 2 2 1 1 2 1
57
- 1 1 0 1 1 1 2
58
- 2 3 3 3 3 2 3
55
+ group_key1 count(group_key2) count(int) count(uint) count(float) count(string)
56
+ 0 1 2 2 1 1 2
57
+ 1 2 1 0 1 1 1
58
+ 2 3 3 3 3 3 2
59
59
  TABLE
60
60
  end
61
61
 
62
62
  test("multiple") do
63
63
  assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).count.to_s)
64
- count(int) count(uint) count(float) count(string) group_key1 group_key2
65
- 0 2 1 1 2 1 1
66
- 1 0 1 1 1 2 1
67
- 2 1 1 1 0 3 1
68
- 3 2 2 2 2 3 2
64
+ group_key1 group_key2 count(int) count(uint) count(float) count(string)
65
+ 0 1 1 2 1 1 2
66
+ 1 2 1 0 1 1 1
67
+ 2 3 1 1 1 1 0
68
+ 3 3 2 2 2 2 2
69
69
  TABLE
70
70
  end
71
71
 
72
72
  test("column") do
73
73
  group = @table.group(:group_key1, :group_key2)
74
74
  assert_equal(<<-TABLE, group.count(:int, :uint).to_s)
75
- count(int) count(uint) group_key1 group_key2
76
- 0 2 1 1 1
77
- 1 0 1 2 1
78
- 2 1 1 3 1
79
- 3 2 2 3 2
75
+ group_key1 group_key2 count(int) count(uint)
76
+ 0 1 1 2 1
77
+ 1 2 1 0 1
78
+ 2 3 1 1 1
79
+ 3 3 2 2 2
80
80
  TABLE
81
81
  end
82
82
  end
@@ -84,20 +84,20 @@ class GroupTest < Test::Unit::TestCase
84
84
  sub_test_case("#sum") do
85
85
  test("single") do
86
86
  assert_equal(<<-TABLE, @table.group(:group_key1).sum.to_s)
87
- sum(group_key2) sum(int) sum(uint) sum(float) group_key1
88
- 0 2 -3 1 2.200000 1
89
- 1 1 (null) 3 3.300000 2
90
- 2 5 -15 15 16.500000 3
87
+ group_key1 sum(group_key2) sum(int) sum(uint) sum(float)
88
+ 0 1 2 -3 1 2.200000
89
+ 1 2 1 (null) 3 3.300000
90
+ 2 3 5 -15 15 16.500000
91
91
  TABLE
92
92
  end
93
93
 
94
94
  test("multiple") do
95
95
  assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).sum.to_s)
96
- sum(int) sum(uint) sum(float) group_key1 group_key2
97
- 0 -3 1 2.200000 1 1
98
- 1 (null) 3 3.300000 2 1
99
- 2 -4 4 4.400000 3 1
100
- 3 -11 11 12.100000 3 2
96
+ group_key1 group_key2 sum(int) sum(uint) sum(float)
97
+ 0 1 1 -3 1 2.200000
98
+ 1 2 1 (null) 3 3.300000
99
+ 2 3 1 -4 4 4.400000
100
+ 3 3 2 -11 11 12.100000
101
101
  TABLE
102
102
  end
103
103
  end
@@ -105,20 +105,20 @@ class GroupTest < Test::Unit::TestCase
105
105
  sub_test_case("#mean") do
106
106
  test("single") do
107
107
  assert_equal(<<-TABLE, @table.group(:group_key1).mean.to_s)
108
- mean(group_key2) mean(int) mean(uint) mean(float) group_key1
109
- 0 1.000000 -1.500000 1.000000 2.200000 1
110
- 1 1.000000 (null) 3.000000 3.300000 2
111
- 2 1.666667 -5.000000 5.000000 5.500000 3
108
+ group_key1 mean(group_key2) mean(int) mean(uint) mean(float)
109
+ 0 1 1.000000 -1.500000 1.000000 2.200000
110
+ 1 2 1.000000 (null) 3.000000 3.300000
111
+ 2 3 1.666667 -5.000000 5.000000 5.500000
112
112
  TABLE
113
113
  end
114
114
 
115
115
  test("multiple") do
116
116
  assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).mean.to_s)
117
- mean(int) mean(uint) mean(float) group_key1 group_key2
118
- 0 -1.500000 1.000000 2.200000 1 1
119
- 1 (null) 3.000000 3.300000 2 1
120
- 2 -4.000000 4.000000 4.400000 3 1
121
- 3 -5.500000 5.500000 6.050000 3 2
117
+ group_key1 group_key2 mean(int) mean(uint) mean(float)
118
+ 0 1 1 -1.500000 1.000000 2.200000
119
+ 1 2 1 (null) 3.000000 3.300000
120
+ 2 3 1 -4.000000 4.000000 4.400000
121
+ 3 3 2 -5.500000 5.500000 6.050000
122
122
  TABLE
123
123
  end
124
124
  end
@@ -126,20 +126,20 @@ class GroupTest < Test::Unit::TestCase
126
126
  sub_test_case("#min") do
127
127
  test("single") do
128
128
  assert_equal(<<-TABLE, @table.group(:group_key1).min.to_s)
129
- min(group_key2) min(int) min(uint) min(float) group_key1
130
- 0 1 -2 1 2.200000 1
131
- 1 1 (null) 3 3.300000 2
132
- 2 1 -6 4 4.400000 3
129
+ group_key1 min(group_key2) min(int) min(uint) min(float)
130
+ 0 1 1 -2 1 2.200000
131
+ 1 2 1 (null) 3 3.300000
132
+ 2 3 1 -6 4 4.400000
133
133
  TABLE
134
134
  end
135
135
 
136
136
  test("multiple") do
137
137
  assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).min.to_s)
138
- min(int) min(uint) min(float) group_key1 group_key2
139
- 0 -2 1 2.200000 1 1
140
- 1 (null) 3 3.300000 2 1
141
- 2 -4 4 4.400000 3 1
142
- 3 -6 5 5.500000 3 2
138
+ group_key1 group_key2 min(int) min(uint) min(float)
139
+ 0 1 1 -2 1 2.200000
140
+ 1 2 1 (null) 3 3.300000
141
+ 2 3 1 -4 4 4.400000
142
+ 3 3 2 -6 5 5.500000
143
143
  TABLE
144
144
  end
145
145
  end
@@ -147,20 +147,20 @@ class GroupTest < Test::Unit::TestCase
147
147
  sub_test_case("#max") do
148
148
  test("single") do
149
149
  assert_equal(<<-TABLE, @table.group(:group_key1).max.to_s)
150
- max(group_key2) max(int) max(uint) max(float) group_key1
151
- 0 1 -1 1 2.200000 1
152
- 1 1 (null) 3 3.300000 2
153
- 2 2 -4 6 6.600000 3
150
+ group_key1 max(group_key2) max(int) max(uint) max(float)
151
+ 0 1 1 -1 1 2.200000
152
+ 1 2 1 (null) 3 3.300000
153
+ 2 3 2 -4 6 6.600000
154
154
  TABLE
155
155
  end
156
156
 
157
157
  test("multiple") do
158
158
  assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).max.to_s)
159
- max(int) max(uint) max(float) group_key1 group_key2
160
- 0 -1 1 2.200000 1 1
161
- 1 (null) 3 3.300000 2 1
162
- 2 -4 4 4.400000 3 1
163
- 3 -5 6 6.600000 3 2
159
+ group_key1 group_key2 max(int) max(uint) max(float)
160
+ 0 1 1 -1 1 2.200000
161
+ 1 2 1 (null) 3 3.300000
162
+ 2 3 1 -4 4 4.400000
163
+ 3 3 2 -5 6 6.600000
164
164
  TABLE
165
165
  end
166
166
  end
@@ -169,11 +169,11 @@ class GroupTest < Test::Unit::TestCase
169
169
  test("function()") do
170
170
  group = @table.group(:group_key1, :group_key2)
171
171
  assert_equal(<<-TABLE, group.aggregate("count(int)", "sum(uint)").to_s)
172
- count(int) sum(uint) group_key1 group_key2
173
- 0 2 1 1 1
174
- 1 0 3 2 1
175
- 2 1 4 3 1
176
- 3 2 11 3 2
172
+ group_key1 group_key2 count(int) sum(uint)
173
+ 0 1 1 2 1
174
+ 1 2 1 0 3
175
+ 2 3 1 1 4
176
+ 3 3 2 2 11
177
177
  TABLE
178
178
  end
179
179
  end
@@ -112,4 +112,25 @@ class RecordBatchFileReaderTest < Test::Unit::TestCase
112
112
  end
113
113
  end
114
114
  end
115
+
116
+ sub_test_case("#each") do
117
+ test("without block") do
118
+ buffer = Arrow::ResizableBuffer.new(1024)
119
+ Arrow::Table.new(number: [1, 2, 3]).save(buffer)
120
+ Arrow::BufferInputStream.open(buffer) do |input|
121
+ reader = Arrow::RecordBatchFileReader.new(input)
122
+ each = reader.each
123
+ assert_equal({
124
+ size: 1,
125
+ to_a: [
126
+ Arrow::RecordBatch.new(number: [1, 2, 3]),
127
+ ],
128
+ },
129
+ {
130
+ size: each.size,
131
+ to_a: each.to_a,
132
+ })
133
+ end
134
+ end
135
+ end
115
136
  end
@@ -0,0 +1,129 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RecordBatchStreamReaderTest < Test::Unit::TestCase
19
+ test("write/read") do
20
+ fields = [
21
+ Arrow::Field.new("uint8", :uint8),
22
+ Arrow::Field.new("uint16", :uint16),
23
+ Arrow::Field.new("uint32", :uint32),
24
+ Arrow::Field.new("uint64", :uint64),
25
+ Arrow::Field.new("int8", :int8),
26
+ Arrow::Field.new("int16", :int16),
27
+ Arrow::Field.new("int32", :int32),
28
+ Arrow::Field.new("int64", :int64),
29
+ Arrow::Field.new("float", :float),
30
+ Arrow::Field.new("double", :double),
31
+ ]
32
+ schema = Arrow::Schema.new(fields)
33
+
34
+ tempfile = Tempfile.new(["batch", ".arrows"])
35
+ Arrow::FileOutputStream.open(tempfile.path, false) do |output|
36
+ Arrow::RecordBatchStreamWriter.open(output, schema) do |writer|
37
+ uints = [1, 2, 4, 8]
38
+ ints = [1, -2, 4, -8]
39
+ floats = [1.1, -2.2, 4.4, -8.8]
40
+ columns = [
41
+ Arrow::UInt8Array.new(uints),
42
+ Arrow::UInt16Array.new(uints),
43
+ Arrow::UInt32Array.new(uints),
44
+ Arrow::UInt64Array.new(uints),
45
+ Arrow::Int8Array.new(ints),
46
+ Arrow::Int16Array.new(ints),
47
+ Arrow::Int32Array.new(ints),
48
+ Arrow::Int64Array.new(ints),
49
+ Arrow::FloatArray.new(floats),
50
+ Arrow::DoubleArray.new(floats),
51
+ ]
52
+
53
+ record_batch = Arrow::RecordBatch.new(schema, 4, columns)
54
+ writer.write_record_batch(record_batch)
55
+ end
56
+ end
57
+
58
+ Arrow::MemoryMappedInputStream.open(tempfile.path) do |input|
59
+ reader = Arrow::RecordBatchStreamReader.new(input)
60
+ reader.each do |record_batch|
61
+ assert_equal([
62
+ {
63
+ "uint8" => 1,
64
+ "uint16" => 1,
65
+ "uint32" => 1,
66
+ "uint64" => 1,
67
+ "int8" => 1,
68
+ "int16" => 1,
69
+ "int32" => 1,
70
+ "int64" => 1,
71
+ "float" => 1.100000023841858,
72
+ "double" => 1.1,
73
+ },
74
+ {
75
+ "uint8" => 2,
76
+ "uint16" => 2,
77
+ "uint32" => 2,
78
+ "uint64" => 2,
79
+ "int8" => -2,
80
+ "int16" => -2,
81
+ "int32" => -2,
82
+ "int64" => -2,
83
+ "float" => -2.200000047683716,
84
+ "double" => -2.2,
85
+ },
86
+ {
87
+ "uint8" => 4,
88
+ "uint16" => 4,
89
+ "uint32" => 4,
90
+ "uint64" => 4,
91
+ "int8" => 4,
92
+ "int16" => 4,
93
+ "int32" => 4,
94
+ "int64" => 4,
95
+ "float" => 4.400000095367432,
96
+ "double" => 4.4,
97
+ },
98
+ {
99
+ "uint8" => 8,
100
+ "uint16" => 8,
101
+ "uint32" => 8,
102
+ "uint64" => 8,
103
+ "int8" => -8,
104
+ "int16" => -8,
105
+ "int32" => -8,
106
+ "int64" => -8,
107
+ "float" => -8.800000190734863,
108
+ "double" => -8.8,
109
+ },
110
+ ],
111
+ record_batch.collect(&:to_h))
112
+ end
113
+ end
114
+ end
115
+
116
+ sub_test_case("#each") do
117
+ test("without block") do
118
+ buffer = Arrow::ResizableBuffer.new(1024)
119
+ Arrow::Table.new(number: [1, 2, 3]).save(buffer, format: :arrows)
120
+ Arrow::BufferInputStream.open(buffer) do |input|
121
+ reader = Arrow::RecordBatchStreamReader.new(input)
122
+ assert_equal([
123
+ Arrow::RecordBatch.new(number: [1, 2, 3]),
124
+ ],
125
+ reader.each.to_a)
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,65 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class ScalarTest < Test::Unit::TestCase
19
+ sub_test_case(".resolve") do
20
+ test("Scalar") do
21
+ assert_equal(Arrow::Int32Scalar.new(29),
22
+ Arrow::Scalar.resolve(Arrow::Int32Scalar.new(29)))
23
+ end
24
+
25
+ test("true") do
26
+ assert_equal(Arrow::BooleanScalar.new(true),
27
+ Arrow::Scalar.resolve(true))
28
+ end
29
+
30
+ test("false") do
31
+ assert_equal(Arrow::BooleanScalar.new(false),
32
+ Arrow::Scalar.resolve(false))
33
+ end
34
+
35
+ test("Symbol") do
36
+ assert_equal(Arrow::StringScalar.new("hello"),
37
+ Arrow::Scalar.resolve(:hello))
38
+ end
39
+
40
+ test("String") do
41
+ assert_equal(Arrow::StringScalar.new("hello"),
42
+ Arrow::Scalar.resolve("hello"))
43
+ end
44
+
45
+ test("Integer") do
46
+ assert_equal(Arrow::Int64Scalar.new(-29),
47
+ Arrow::Scalar.resolve(-29))
48
+ end
49
+
50
+ test("Float") do
51
+ assert_equal(Arrow::DoubleScalar.new(2.9),
52
+ Arrow::Scalar.resolve(2.9))
53
+ end
54
+
55
+ test("Int64Scalar, :int32") do
56
+ assert_equal(Arrow::Int32Scalar.new(-29),
57
+ Arrow::Scalar.resolve(Arrow::Int64Scalar.new(-29), :int32))
58
+ end
59
+
60
+ test("Integer, :int32") do
61
+ assert_equal(Arrow::Int32Scalar.new(-29),
62
+ Arrow::Scalar.resolve(-29, :int32))
63
+ end
64
+ end
65
+ end