red-arrow 5.0.0 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -0
  3. data/ext/arrow/converters.cpp +5 -0
  4. data/ext/arrow/converters.hpp +126 -0
  5. data/ext/arrow/extconf.rb +13 -0
  6. data/ext/arrow/raw-records.cpp +1 -0
  7. data/ext/arrow/values.cpp +1 -0
  8. data/lib/arrow/aggregate-node-options.rb +35 -0
  9. data/lib/arrow/aggregation.rb +46 -0
  10. data/lib/arrow/array-builder.rb +5 -0
  11. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  12. data/lib/arrow/column-containable.rb +100 -1
  13. data/lib/arrow/datum.rb +2 -0
  14. data/lib/arrow/expression.rb +48 -0
  15. data/lib/arrow/file-system.rb +34 -0
  16. data/lib/arrow/group.rb +116 -124
  17. data/lib/arrow/loader.rb +13 -0
  18. data/lib/arrow/map-array-builder.rb +109 -0
  19. data/lib/arrow/map-array.rb +26 -0
  20. data/lib/arrow/map-data-type.rb +89 -0
  21. data/lib/arrow/path-extension.rb +1 -1
  22. data/lib/arrow/record-batch-reader.rb +41 -0
  23. data/lib/arrow/record-batch.rb +0 -2
  24. data/lib/arrow/slicer.rb +44 -143
  25. data/lib/arrow/source-node-options.rb +32 -0
  26. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  27. data/lib/arrow/symbol-values-appendable.rb +34 -0
  28. data/lib/arrow/table-concatenate-options.rb +36 -0
  29. data/lib/arrow/table-formatter.rb +141 -17
  30. data/lib/arrow/table-list-formatter.rb +5 -3
  31. data/lib/arrow/table-loader.rb +41 -3
  32. data/lib/arrow/table-saver.rb +29 -3
  33. data/lib/arrow/table-table-formatter.rb +7 -31
  34. data/lib/arrow/table.rb +32 -38
  35. data/lib/arrow/version.rb +1 -1
  36. data/red-arrow.gemspec +1 -1
  37. data/test/raw-records/test-dense-union-array.rb +14 -0
  38. data/test/raw-records/test-list-array.rb +19 -0
  39. data/test/raw-records/test-map-array.rb +441 -0
  40. data/test/raw-records/test-sparse-union-array.rb +14 -0
  41. data/test/raw-records/test-struct-array.rb +15 -0
  42. data/test/test-array-builder.rb +7 -0
  43. data/test/test-binary-dictionary-array-builder.rb +103 -0
  44. data/test/test-csv-loader.rb +8 -8
  45. data/test/test-expression.rb +40 -0
  46. data/test/test-group.rb +75 -51
  47. data/test/test-map-array-builder.rb +110 -0
  48. data/test/test-map-array.rb +33 -0
  49. data/test/test-map-data-type.rb +36 -0
  50. data/test/test-record-batch-reader.rb +46 -0
  51. data/test/test-record-batch.rb +42 -0
  52. data/test/test-slicer.rb +166 -167
  53. data/test/test-string-dictionary-array-builder.rb +103 -0
  54. data/test/test-table.rb +190 -53
  55. data/test/values/test-dense-union-array.rb +14 -0
  56. data/test/values/test-list-array.rb +17 -0
  57. data/test/values/test-map-array.rb +433 -0
  58. data/test/values/test-sparse-union-array.rb +14 -0
  59. data/test/values/test-struct-array.rb +15 -0
  60. metadata +107 -76
data/test/test-group.rb CHANGED
@@ -42,9 +42,9 @@ class GroupTest < Test::Unit::TestCase
42
42
  }
43
43
  table = Arrow::Table.new(raw_table)
44
44
  assert_equal(<<-TABLE, table.group(:time).count.to_s)
45
- time int
46
- 0 #{time_values[0].iso8601} 1
47
- 1 #{time_values[1].iso8601} 1
45
+ count(int) time
46
+ 0 1 #{time_values[0].iso8601}
47
+ 1 1 #{time_values[1].iso8601}
48
48
  TABLE
49
49
  end
50
50
  end
@@ -52,20 +52,31 @@ class GroupTest < Test::Unit::TestCase
52
52
  sub_test_case("#count") do
53
53
  test("single") do
54
54
  assert_equal(<<-TABLE, @table.group(:group_key1).count.to_s)
55
- group_key1 group_key2 int uint float string
56
- 0 1 2 2 1 1 2
57
- 1 2 1 0 1 1 1
58
- 2 3 3 3 3 3 2
55
+ count(group_key2) count(int) count(uint) count(float) count(string) group_key1
56
+ 0 2 2 1 1 2 1
57
+ 1 1 0 1 1 1 2
58
+ 2 3 3 3 3 2 3
59
59
  TABLE
60
60
  end
61
61
 
62
62
  test("multiple") do
63
63
  assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).count.to_s)
64
- group_key1 group_key2 int uint float string
65
- 0 1 1 2 1 1 2
66
- 1 2 1 0 1 1 1
67
- 2 3 1 1 1 1 0
68
- 3 3 2 2 2 2 2
64
+ count(int) count(uint) count(float) count(string) group_key1 group_key2
65
+ 0 2 1 1 2 1 1
66
+ 1 0 1 1 1 2 1
67
+ 2 1 1 1 0 3 1
68
+ 3 2 2 2 2 3 2
69
+ TABLE
70
+ end
71
+
72
+ test("column") do
73
+ group = @table.group(:group_key1, :group_key2)
74
+ assert_equal(<<-TABLE, group.count(:int, :uint).to_s)
75
+ count(int) count(uint) group_key1 group_key2
76
+ 0 2 1 1 1
77
+ 1 0 1 2 1
78
+ 2 1 1 3 1
79
+ 3 2 2 3 2
69
80
  TABLE
70
81
  end
71
82
  end
@@ -73,41 +84,41 @@ class GroupTest < Test::Unit::TestCase
73
84
  sub_test_case("#sum") do
74
85
  test("single") do
75
86
  assert_equal(<<-TABLE, @table.group(:group_key1).sum.to_s)
76
- group_key1 group_key2 int uint float
77
- 0 1 2 -3 1 2.200000
78
- 1 2 1 0 3 3.300000
79
- 2 3 5 -15 15 16.500000
87
+ sum(group_key2) sum(int) sum(uint) sum(float) group_key1
88
+ 0 2 -3 1 2.200000 1
89
+ 1 1 (null) 3 3.300000 2
90
+ 2 5 -15 15 16.500000 3
80
91
  TABLE
81
92
  end
82
93
 
83
94
  test("multiple") do
84
95
  assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).sum.to_s)
85
- group_key1 group_key2 int uint float
86
- 0 1 1 -3 1 2.200000
87
- 1 2 1 0 3 3.300000
88
- 2 3 1 -4 4 4.400000
89
- 3 3 2 -11 11 12.100000
96
+ sum(int) sum(uint) sum(float) group_key1 group_key2
97
+ 0 -3 1 2.200000 1 1
98
+ 1 (null) 3 3.300000 2 1
99
+ 2 -4 4 4.400000 3 1
100
+ 3 -11 11 12.100000 3 2
90
101
  TABLE
91
102
  end
92
103
  end
93
104
 
94
- sub_test_case("#average") do
105
+ sub_test_case("#mean") do
95
106
  test("single") do
96
- assert_equal(<<-TABLE, @table.group(:group_key1).average.to_s)
97
- group_key1 group_key2 int uint float
98
- 0 1 1.000000 -1.500000 1.000000 2.200000
99
- 1 2 1.000000 0.000000 3.000000 3.300000
100
- 2 3 1.666667 -5.000000 5.000000 5.500000
107
+ assert_equal(<<-TABLE, @table.group(:group_key1).mean.to_s)
108
+ mean(group_key2) mean(int) mean(uint) mean(float) group_key1
109
+ 0 1.000000 -1.500000 1.000000 2.200000 1
110
+ 1 1.000000 (null) 3.000000 3.300000 2
111
+ 2 1.666667 -5.000000 5.000000 5.500000 3
101
112
  TABLE
102
113
  end
103
114
 
104
115
  test("multiple") do
105
- assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).average.to_s)
106
- group_key1 group_key2 int uint float
107
- 0 1 1 -1.500000 1.000000 2.200000
108
- 1 2 1 0.000000 3.000000 3.300000
109
- 2 3 1 -4.000000 4.000000 4.400000
110
- 3 3 2 -5.500000 5.500000 6.050000
116
+ assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).mean.to_s)
117
+ mean(int) mean(uint) mean(float) group_key1 group_key2
118
+ 0 -1.500000 1.000000 2.200000 1 1
119
+ 1 (null) 3.000000 3.300000 2 1
120
+ 2 -4.000000 4.000000 4.400000 3 1
121
+ 3 -5.500000 5.500000 6.050000 3 2
111
122
  TABLE
112
123
  end
113
124
  end
@@ -115,20 +126,20 @@ class GroupTest < Test::Unit::TestCase
115
126
  sub_test_case("#min") do
116
127
  test("single") do
117
128
  assert_equal(<<-TABLE, @table.group(:group_key1).min.to_s)
118
- group_key1 group_key2 int uint float
119
- 0 1 1 -2 1 2.200000
120
- 1 2 1 3 3.300000
121
- 2 3 1 -6 4 4.400000
129
+ min(group_key2) min(int) min(uint) min(float) group_key1
130
+ 0 1 -2 1 2.200000 1
131
+ 1 1 (null) 3 3.300000 2
132
+ 2 1 -6 4 4.400000 3
122
133
  TABLE
123
134
  end
124
135
 
125
136
  test("multiple") do
126
137
  assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).min.to_s)
127
- group_key1 group_key2 int uint float
128
- 0 1 1 -2 1 2.200000
129
- 1 2 1 3 3.300000
130
- 2 3 1 -4 4 4.400000
131
- 3 3 2 -6 5 5.500000
138
+ min(int) min(uint) min(float) group_key1 group_key2
139
+ 0 -2 1 2.200000 1 1
140
+ 1 (null) 3 3.300000 2 1
141
+ 2 -4 4 4.400000 3 1
142
+ 3 -6 5 5.500000 3 2
132
143
  TABLE
133
144
  end
134
145
  end
@@ -136,20 +147,33 @@ class GroupTest < Test::Unit::TestCase
136
147
  sub_test_case("#max") do
137
148
  test("single") do
138
149
  assert_equal(<<-TABLE, @table.group(:group_key1).max.to_s)
139
- group_key1 group_key2 int uint float
140
- 0 1 1 -1 1 2.200000
141
- 1 2 1 3 3.300000
142
- 2 3 2 -4 6 6.600000
150
+ max(group_key2) max(int) max(uint) max(float) group_key1
151
+ 0 1 -1 1 2.200000 1
152
+ 1 1 (null) 3 3.300000 2
153
+ 2 2 -4 6 6.600000 3
143
154
  TABLE
144
155
  end
145
156
 
146
157
  test("multiple") do
147
158
  assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).max.to_s)
148
- group_key1 group_key2 int uint float
149
- 0 1 1 -1 1 2.200000
150
- 1 2 1 3 3.300000
151
- 2 3 1 -4 4 4.400000
152
- 3 3 2 -5 6 6.600000
159
+ max(int) max(uint) max(float) group_key1 group_key2
160
+ 0 -1 1 2.200000 1 1
161
+ 1 (null) 3 3.300000 2 1
162
+ 2 -4 4 4.400000 3 1
163
+ 3 -5 6 6.600000 3 2
164
+ TABLE
165
+ end
166
+ end
167
+
168
+ sub_test_case("#aggregate") do
169
+ test("function()") do
170
+ group = @table.group(:group_key1, :group_key2)
171
+ assert_equal(<<-TABLE, group.aggregate("count(int)", "sum(uint)").to_s)
172
+ count(int) sum(uint) group_key1 group_key2
173
+ 0 2 1 1 1
174
+ 1 0 3 2 1
175
+ 2 1 4 3 1
176
+ 3 2 11 3 2
153
177
  TABLE
154
178
  end
155
179
  end
@@ -0,0 +1,110 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class MapArrayBuilderTest < Test::Unit::TestCase
19
+ def setup
20
+ key_type = Arrow::StringDataType.new
21
+ item_type = Arrow::Int16DataType.new
22
+ data_type = Arrow::MapDataType.new(key_type, item_type)
23
+ @builder = Arrow::MapArrayBuilder.new(data_type)
24
+ end
25
+
26
+ sub_test_case("#append_value") do
27
+ test("nil") do
28
+ @builder.append_value(nil)
29
+ array = @builder.finish
30
+ assert_equal([nil], array.collect {|value| value})
31
+ end
32
+
33
+ test("Hash") do
34
+ @builder.append_value({"a" => 0, "b" => 1})
35
+ @builder.append_value({"c" => 0, "d" => 1})
36
+ array = @builder.finish
37
+ assert_equal([
38
+ {"a" => 0, "b" => 1},
39
+ {"c" => 0, "d" => 1}
40
+ ],
41
+ array.collect {|value| value})
42
+ end
43
+
44
+ test("#each") do
45
+ @builder.append_value([["a", 0], ["b", 1]])
46
+ @builder.append_value([["c", 0], ["d", 1]])
47
+ array = @builder.finish
48
+ assert_equal([
49
+ {"a" => 0, "b" => 1},
50
+ {"c" => 0, "d" => 1}
51
+ ],
52
+ array.collect {|value| value})
53
+ end
54
+ end
55
+
56
+ sub_test_case("#append_values") do
57
+ test("[nil]") do
58
+ @builder.append_values([nil])
59
+ array = @builder.finish
60
+ assert_equal([nil], array.collect {|value| value})
61
+ end
62
+
63
+ test("[Hash]") do
64
+ @builder.append_values([{"a" => 0, "b" => 1}, {"c" => 0, "d" => 1}])
65
+ array = @builder.finish
66
+ assert_equal([
67
+ {"a" => 0, "b" => 1},
68
+ {"c" => 0, "d" => 1}
69
+ ],
70
+ array.collect {|value| value})
71
+ end
72
+
73
+ test("[#each]") do
74
+ @builder.append_values([[["a", 0], ["b", 1]], [["c", 0], ["d", 1]]])
75
+ array = @builder.finish
76
+ assert_equal([
77
+ {"a" => 0, "b" => 1},
78
+ {"c" => 0, "d" => 1}
79
+ ],
80
+ array.collect {|value| value})
81
+ end
82
+
83
+ test("[nil, Hash, #each]") do
84
+ @builder.append_values([nil, {"a" => 0, "b" => 1}, [["c", 0], ["d", 1]]])
85
+ array = @builder.finish
86
+ assert_equal([
87
+ nil,
88
+ {"a" => 0, "b" => 1},
89
+ {"c" => 0, "d" => 1}
90
+ ],
91
+ array.collect {|value| value})
92
+ end
93
+
94
+ test("is_valids") do
95
+ @builder.append_values([
96
+ {"a" => 0, "b" => 1},
97
+ {"c" => 0, "d" => 1},
98
+ {"e" => 0, "f" => 1}
99
+ ],
100
+ [true, false, true])
101
+ array = @builder.finish
102
+ assert_equal([
103
+ {"a" => 0, "b" => 1},
104
+ nil,
105
+ {"e" => 0, "f" => 1}
106
+ ],
107
+ array.collect {|value| value})
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,33 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class MapArrayTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("build") do
21
+ key_type = Arrow::StringDataType.new
22
+ item_type = Arrow::Int16DataType.new
23
+ data_type = Arrow::MapDataType.new(key_type, item_type)
24
+ values = [
25
+ {"a" => 0, "b" => 1},
26
+ nil,
27
+ {"c" => 0, "d" => 1}
28
+ ]
29
+ array = Arrow::MapArray.new(data_type, values)
30
+ assert_equal(values, array.collect {|value| value})
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,36 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class MapDataTypeTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ def setup
21
+ @key = :int8
22
+ @item = :string
23
+ end
24
+
25
+ test("ordered arguments") do
26
+ assert_equal("map<int8, string>",
27
+ Arrow::MapDataType.new(@key, @item).to_s)
28
+ end
29
+
30
+ test("description") do
31
+ assert_equal("map<int8, string>",
32
+ Arrow::MapDataType.new(key: @key,
33
+ item: @item).to_s)
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,46 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class TestRecordBatchReader < Test::Unit::TestCase
19
+ sub_test_case(".try_convert") do
20
+ test("Arrow::RecordBatch") do
21
+ record_batch =
22
+ Arrow::RecordBatch.new("count" => [1, 2, 3],
23
+ "private" => [true, false, true])
24
+ reader = Arrow::RecordBatchReader.try_convert(record_batch)
25
+ assert_equal(record_batch,
26
+ reader.read_next)
27
+ end
28
+
29
+ test("[Arrow::RecordBatch]") do
30
+ record_batch =
31
+ Arrow::RecordBatch.new("count" => [1, 2, 3],
32
+ "private" => [true, false, true])
33
+ reader = Arrow::RecordBatchReader.try_convert([record_batch])
34
+ assert_equal(record_batch,
35
+ reader.read_next)
36
+ end
37
+
38
+ test("Arrow::Table") do
39
+ table = Arrow::Table.new("count" => [1, 2, 3],
40
+ "private" => [true, false, true])
41
+ reader = Arrow::RecordBatchReader.try_convert(table)
42
+ assert_equal(table,
43
+ reader.read_all)
44
+ end
45
+ end
46
+ end
@@ -136,5 +136,47 @@ class RecordBatchTest < Test::Unit::TestCase
136
136
  end
137
137
  end
138
138
  end
139
+
140
+ sub_test_case("#[]") do
141
+ def setup
142
+ @record_batch = Arrow::RecordBatch.new(a: [true],
143
+ b: [true],
144
+ c: [true],
145
+ d: [true],
146
+ e: [true],
147
+ f: [true],
148
+ g: [true])
149
+ end
150
+
151
+ test("[String]") do
152
+ assert_equal(Arrow::Column.new(@record_batch, 0),
153
+ @record_batch["a"])
154
+ end
155
+
156
+ test("[Symbol]") do
157
+ assert_equal(Arrow::Column.new(@record_batch, 1),
158
+ @record_batch[:b])
159
+ end
160
+
161
+ test("[Integer]") do
162
+ assert_equal(Arrow::Column.new(@record_batch, 6),
163
+ @record_batch[-1])
164
+ end
165
+
166
+ test("[Range]") do
167
+ assert_equal(Arrow::RecordBatch.new(d: [true],
168
+ e: [true]),
169
+ @record_batch[3..4])
170
+ end
171
+
172
+ test("[[Symbol, String, Integer, Range]]") do
173
+ assert_equal(Arrow::RecordBatch.new(c: [true],
174
+ a: [true],
175
+ g: [true],
176
+ d: [true],
177
+ e: [true]),
178
+ @record_batch[[:c, "a", -1, 3..4]])
179
+ end
180
+ end
139
181
  end
140
182
  end