red-arrow 5.0.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -0
  3. data/ext/arrow/converters.cpp +5 -0
  4. data/ext/arrow/converters.hpp +126 -0
  5. data/ext/arrow/extconf.rb +13 -0
  6. data/ext/arrow/raw-records.cpp +1 -0
  7. data/ext/arrow/values.cpp +1 -0
  8. data/lib/arrow/aggregate-node-options.rb +35 -0
  9. data/lib/arrow/aggregation.rb +46 -0
  10. data/lib/arrow/array-builder.rb +5 -0
  11. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  12. data/lib/arrow/column-containable.rb +100 -1
  13. data/lib/arrow/datum.rb +2 -0
  14. data/lib/arrow/expression.rb +48 -0
  15. data/lib/arrow/file-system.rb +34 -0
  16. data/lib/arrow/group.rb +116 -124
  17. data/lib/arrow/loader.rb +13 -0
  18. data/lib/arrow/map-array-builder.rb +109 -0
  19. data/lib/arrow/map-array.rb +26 -0
  20. data/lib/arrow/map-data-type.rb +89 -0
  21. data/lib/arrow/path-extension.rb +1 -1
  22. data/lib/arrow/record-batch-reader.rb +41 -0
  23. data/lib/arrow/record-batch.rb +0 -2
  24. data/lib/arrow/slicer.rb +44 -143
  25. data/lib/arrow/source-node-options.rb +32 -0
  26. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  27. data/lib/arrow/symbol-values-appendable.rb +34 -0
  28. data/lib/arrow/table-concatenate-options.rb +36 -0
  29. data/lib/arrow/table-formatter.rb +141 -17
  30. data/lib/arrow/table-list-formatter.rb +5 -3
  31. data/lib/arrow/table-loader.rb +41 -3
  32. data/lib/arrow/table-saver.rb +29 -3
  33. data/lib/arrow/table-table-formatter.rb +7 -31
  34. data/lib/arrow/table.rb +32 -38
  35. data/lib/arrow/version.rb +1 -1
  36. data/red-arrow.gemspec +1 -1
  37. data/test/raw-records/test-dense-union-array.rb +14 -0
  38. data/test/raw-records/test-list-array.rb +19 -0
  39. data/test/raw-records/test-map-array.rb +441 -0
  40. data/test/raw-records/test-sparse-union-array.rb +14 -0
  41. data/test/raw-records/test-struct-array.rb +15 -0
  42. data/test/test-array-builder.rb +7 -0
  43. data/test/test-binary-dictionary-array-builder.rb +103 -0
  44. data/test/test-csv-loader.rb +8 -8
  45. data/test/test-expression.rb +40 -0
  46. data/test/test-group.rb +75 -51
  47. data/test/test-map-array-builder.rb +110 -0
  48. data/test/test-map-array.rb +33 -0
  49. data/test/test-map-data-type.rb +36 -0
  50. data/test/test-record-batch-reader.rb +46 -0
  51. data/test/test-record-batch.rb +42 -0
  52. data/test/test-slicer.rb +166 -167
  53. data/test/test-string-dictionary-array-builder.rb +103 -0
  54. data/test/test-table.rb +190 -53
  55. data/test/values/test-dense-union-array.rb +14 -0
  56. data/test/values/test-list-array.rb +17 -0
  57. data/test/values/test-map-array.rb +433 -0
  58. data/test/values/test-sparse-union-array.rb +14 -0
  59. data/test/values/test-struct-array.rb +15 -0
  60. metadata +107 -76
data/test/test-group.rb CHANGED
@@ -42,9 +42,9 @@ class GroupTest < Test::Unit::TestCase
42
42
  }
43
43
  table = Arrow::Table.new(raw_table)
44
44
  assert_equal(<<-TABLE, table.group(:time).count.to_s)
45
- time int
46
- 0 #{time_values[0].iso8601} 1
47
- 1 #{time_values[1].iso8601} 1
45
+ count(int) time
46
+ 0 1 #{time_values[0].iso8601}
47
+ 1 1 #{time_values[1].iso8601}
48
48
  TABLE
49
49
  end
50
50
  end
@@ -52,20 +52,31 @@ class GroupTest < Test::Unit::TestCase
52
52
  sub_test_case("#count") do
53
53
  test("single") do
54
54
  assert_equal(<<-TABLE, @table.group(:group_key1).count.to_s)
55
- group_key1 group_key2 int uint float string
56
- 0 1 2 2 1 1 2
57
- 1 2 1 0 1 1 1
58
- 2 3 3 3 3 3 2
55
+ count(group_key2) count(int) count(uint) count(float) count(string) group_key1
56
+ 0 2 2 1 1 2 1
57
+ 1 1 0 1 1 1 2
58
+ 2 3 3 3 3 2 3
59
59
  TABLE
60
60
  end
61
61
 
62
62
  test("multiple") do
63
63
  assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).count.to_s)
64
- group_key1 group_key2 int uint float string
65
- 0 1 1 2 1 1 2
66
- 1 2 1 0 1 1 1
67
- 2 3 1 1 1 1 0
68
- 3 3 2 2 2 2 2
64
+ count(int) count(uint) count(float) count(string) group_key1 group_key2
65
+ 0 2 1 1 2 1 1
66
+ 1 0 1 1 1 2 1
67
+ 2 1 1 1 0 3 1
68
+ 3 2 2 2 2 3 2
69
+ TABLE
70
+ end
71
+
72
+ test("column") do
73
+ group = @table.group(:group_key1, :group_key2)
74
+ assert_equal(<<-TABLE, group.count(:int, :uint).to_s)
75
+ count(int) count(uint) group_key1 group_key2
76
+ 0 2 1 1 1
77
+ 1 0 1 2 1
78
+ 2 1 1 3 1
79
+ 3 2 2 3 2
69
80
  TABLE
70
81
  end
71
82
  end
@@ -73,41 +84,41 @@ class GroupTest < Test::Unit::TestCase
73
84
  sub_test_case("#sum") do
74
85
  test("single") do
75
86
  assert_equal(<<-TABLE, @table.group(:group_key1).sum.to_s)
76
- group_key1 group_key2 int uint float
77
- 0 1 2 -3 1 2.200000
78
- 1 2 1 0 3 3.300000
79
- 2 3 5 -15 15 16.500000
87
+ sum(group_key2) sum(int) sum(uint) sum(float) group_key1
88
+ 0 2 -3 1 2.200000 1
89
+ 1 1 (null) 3 3.300000 2
90
+ 2 5 -15 15 16.500000 3
80
91
  TABLE
81
92
  end
82
93
 
83
94
  test("multiple") do
84
95
  assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).sum.to_s)
85
- group_key1 group_key2 int uint float
86
- 0 1 1 -3 1 2.200000
87
- 1 2 1 0 3 3.300000
88
- 2 3 1 -4 4 4.400000
89
- 3 3 2 -11 11 12.100000
96
+ sum(int) sum(uint) sum(float) group_key1 group_key2
97
+ 0 -3 1 2.200000 1 1
98
+ 1 (null) 3 3.300000 2 1
99
+ 2 -4 4 4.400000 3 1
100
+ 3 -11 11 12.100000 3 2
90
101
  TABLE
91
102
  end
92
103
  end
93
104
 
94
- sub_test_case("#average") do
105
+ sub_test_case("#mean") do
95
106
  test("single") do
96
- assert_equal(<<-TABLE, @table.group(:group_key1).average.to_s)
97
- group_key1 group_key2 int uint float
98
- 0 1 1.000000 -1.500000 1.000000 2.200000
99
- 1 2 1.000000 0.000000 3.000000 3.300000
100
- 2 3 1.666667 -5.000000 5.000000 5.500000
107
+ assert_equal(<<-TABLE, @table.group(:group_key1).mean.to_s)
108
+ mean(group_key2) mean(int) mean(uint) mean(float) group_key1
109
+ 0 1.000000 -1.500000 1.000000 2.200000 1
110
+ 1 1.000000 (null) 3.000000 3.300000 2
111
+ 2 1.666667 -5.000000 5.000000 5.500000 3
101
112
  TABLE
102
113
  end
103
114
 
104
115
  test("multiple") do
105
- assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).average.to_s)
106
- group_key1 group_key2 int uint float
107
- 0 1 1 -1.500000 1.000000 2.200000
108
- 1 2 1 0.000000 3.000000 3.300000
109
- 2 3 1 -4.000000 4.000000 4.400000
110
- 3 3 2 -5.500000 5.500000 6.050000
116
+ assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).mean.to_s)
117
+ mean(int) mean(uint) mean(float) group_key1 group_key2
118
+ 0 -1.500000 1.000000 2.200000 1 1
119
+ 1 (null) 3.000000 3.300000 2 1
120
+ 2 -4.000000 4.000000 4.400000 3 1
121
+ 3 -5.500000 5.500000 6.050000 3 2
111
122
  TABLE
112
123
  end
113
124
  end
@@ -115,20 +126,20 @@ class GroupTest < Test::Unit::TestCase
115
126
  sub_test_case("#min") do
116
127
  test("single") do
117
128
  assert_equal(<<-TABLE, @table.group(:group_key1).min.to_s)
118
- group_key1 group_key2 int uint float
119
- 0 1 1 -2 1 2.200000
120
- 1 2 1 3 3.300000
121
- 2 3 1 -6 4 4.400000
129
+ min(group_key2) min(int) min(uint) min(float) group_key1
130
+ 0 1 -2 1 2.200000 1
131
+ 1 1 (null) 3 3.300000 2
132
+ 2 1 -6 4 4.400000 3
122
133
  TABLE
123
134
  end
124
135
 
125
136
  test("multiple") do
126
137
  assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).min.to_s)
127
- group_key1 group_key2 int uint float
128
- 0 1 1 -2 1 2.200000
129
- 1 2 1 3 3.300000
130
- 2 3 1 -4 4 4.400000
131
- 3 3 2 -6 5 5.500000
138
+ min(int) min(uint) min(float) group_key1 group_key2
139
+ 0 -2 1 2.200000 1 1
140
+ 1 (null) 3 3.300000 2 1
141
+ 2 -4 4 4.400000 3 1
142
+ 3 -6 5 5.500000 3 2
132
143
  TABLE
133
144
  end
134
145
  end
@@ -136,20 +147,33 @@ class GroupTest < Test::Unit::TestCase
136
147
  sub_test_case("#max") do
137
148
  test("single") do
138
149
  assert_equal(<<-TABLE, @table.group(:group_key1).max.to_s)
139
- group_key1 group_key2 int uint float
140
- 0 1 1 -1 1 2.200000
141
- 1 2 1 3 3.300000
142
- 2 3 2 -4 6 6.600000
150
+ max(group_key2) max(int) max(uint) max(float) group_key1
151
+ 0 1 -1 1 2.200000 1
152
+ 1 1 (null) 3 3.300000 2
153
+ 2 2 -4 6 6.600000 3
143
154
  TABLE
144
155
  end
145
156
 
146
157
  test("multiple") do
147
158
  assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).max.to_s)
148
- group_key1 group_key2 int uint float
149
- 0 1 1 -1 1 2.200000
150
- 1 2 1 3 3.300000
151
- 2 3 1 -4 4 4.400000
152
- 3 3 2 -5 6 6.600000
159
+ max(int) max(uint) max(float) group_key1 group_key2
160
+ 0 -1 1 2.200000 1 1
161
+ 1 (null) 3 3.300000 2 1
162
+ 2 -4 4 4.400000 3 1
163
+ 3 -5 6 6.600000 3 2
164
+ TABLE
165
+ end
166
+ end
167
+
168
+ sub_test_case("#aggregate") do
169
+ test("function()") do
170
+ group = @table.group(:group_key1, :group_key2)
171
+ assert_equal(<<-TABLE, group.aggregate("count(int)", "sum(uint)").to_s)
172
+ count(int) sum(uint) group_key1 group_key2
173
+ 0 2 1 1 1
174
+ 1 0 3 2 1
175
+ 2 1 4 3 1
176
+ 3 2 11 3 2
153
177
  TABLE
154
178
  end
155
179
  end
@@ -0,0 +1,110 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class MapArrayBuilderTest < Test::Unit::TestCase
19
+ def setup
20
+ key_type = Arrow::StringDataType.new
21
+ item_type = Arrow::Int16DataType.new
22
+ data_type = Arrow::MapDataType.new(key_type, item_type)
23
+ @builder = Arrow::MapArrayBuilder.new(data_type)
24
+ end
25
+
26
+ sub_test_case("#append_value") do
27
+ test("nil") do
28
+ @builder.append_value(nil)
29
+ array = @builder.finish
30
+ assert_equal([nil], array.collect {|value| value})
31
+ end
32
+
33
+ test("Hash") do
34
+ @builder.append_value({"a" => 0, "b" => 1})
35
+ @builder.append_value({"c" => 0, "d" => 1})
36
+ array = @builder.finish
37
+ assert_equal([
38
+ {"a" => 0, "b" => 1},
39
+ {"c" => 0, "d" => 1}
40
+ ],
41
+ array.collect {|value| value})
42
+ end
43
+
44
+ test("#each") do
45
+ @builder.append_value([["a", 0], ["b", 1]])
46
+ @builder.append_value([["c", 0], ["d", 1]])
47
+ array = @builder.finish
48
+ assert_equal([
49
+ {"a" => 0, "b" => 1},
50
+ {"c" => 0, "d" => 1}
51
+ ],
52
+ array.collect {|value| value})
53
+ end
54
+ end
55
+
56
+ sub_test_case("#append_values") do
57
+ test("[nil]") do
58
+ @builder.append_values([nil])
59
+ array = @builder.finish
60
+ assert_equal([nil], array.collect {|value| value})
61
+ end
62
+
63
+ test("[Hash]") do
64
+ @builder.append_values([{"a" => 0, "b" => 1}, {"c" => 0, "d" => 1}])
65
+ array = @builder.finish
66
+ assert_equal([
67
+ {"a" => 0, "b" => 1},
68
+ {"c" => 0, "d" => 1}
69
+ ],
70
+ array.collect {|value| value})
71
+ end
72
+
73
+ test("[#each]") do
74
+ @builder.append_values([[["a", 0], ["b", 1]], [["c", 0], ["d", 1]]])
75
+ array = @builder.finish
76
+ assert_equal([
77
+ {"a" => 0, "b" => 1},
78
+ {"c" => 0, "d" => 1}
79
+ ],
80
+ array.collect {|value| value})
81
+ end
82
+
83
+ test("[nil, Hash, #each]") do
84
+ @builder.append_values([nil, {"a" => 0, "b" => 1}, [["c", 0], ["d", 1]]])
85
+ array = @builder.finish
86
+ assert_equal([
87
+ nil,
88
+ {"a" => 0, "b" => 1},
89
+ {"c" => 0, "d" => 1}
90
+ ],
91
+ array.collect {|value| value})
92
+ end
93
+
94
+ test("is_valids") do
95
+ @builder.append_values([
96
+ {"a" => 0, "b" => 1},
97
+ {"c" => 0, "d" => 1},
98
+ {"e" => 0, "f" => 1}
99
+ ],
100
+ [true, false, true])
101
+ array = @builder.finish
102
+ assert_equal([
103
+ {"a" => 0, "b" => 1},
104
+ nil,
105
+ {"e" => 0, "f" => 1}
106
+ ],
107
+ array.collect {|value| value})
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,33 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class MapArrayTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("build") do
21
+ key_type = Arrow::StringDataType.new
22
+ item_type = Arrow::Int16DataType.new
23
+ data_type = Arrow::MapDataType.new(key_type, item_type)
24
+ values = [
25
+ {"a" => 0, "b" => 1},
26
+ nil,
27
+ {"c" => 0, "d" => 1}
28
+ ]
29
+ array = Arrow::MapArray.new(data_type, values)
30
+ assert_equal(values, array.collect {|value| value})
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,36 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class MapDataTypeTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ def setup
21
+ @key = :int8
22
+ @item = :string
23
+ end
24
+
25
+ test("ordered arguments") do
26
+ assert_equal("map<int8, string>",
27
+ Arrow::MapDataType.new(@key, @item).to_s)
28
+ end
29
+
30
+ test("description") do
31
+ assert_equal("map<int8, string>",
32
+ Arrow::MapDataType.new(key: @key,
33
+ item: @item).to_s)
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,46 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class TestRecordBatchReader < Test::Unit::TestCase
19
+ sub_test_case(".try_convert") do
20
+ test("Arrow::RecordBatch") do
21
+ record_batch =
22
+ Arrow::RecordBatch.new("count" => [1, 2, 3],
23
+ "private" => [true, false, true])
24
+ reader = Arrow::RecordBatchReader.try_convert(record_batch)
25
+ assert_equal(record_batch,
26
+ reader.read_next)
27
+ end
28
+
29
+ test("[Arrow::RecordBatch]") do
30
+ record_batch =
31
+ Arrow::RecordBatch.new("count" => [1, 2, 3],
32
+ "private" => [true, false, true])
33
+ reader = Arrow::RecordBatchReader.try_convert([record_batch])
34
+ assert_equal(record_batch,
35
+ reader.read_next)
36
+ end
37
+
38
+ test("Arrow::Table") do
39
+ table = Arrow::Table.new("count" => [1, 2, 3],
40
+ "private" => [true, false, true])
41
+ reader = Arrow::RecordBatchReader.try_convert(table)
42
+ assert_equal(table,
43
+ reader.read_all)
44
+ end
45
+ end
46
+ end
@@ -136,5 +136,47 @@ class RecordBatchTest < Test::Unit::TestCase
136
136
  end
137
137
  end
138
138
  end
139
+
140
+ sub_test_case("#[]") do
141
+ def setup
142
+ @record_batch = Arrow::RecordBatch.new(a: [true],
143
+ b: [true],
144
+ c: [true],
145
+ d: [true],
146
+ e: [true],
147
+ f: [true],
148
+ g: [true])
149
+ end
150
+
151
+ test("[String]") do
152
+ assert_equal(Arrow::Column.new(@record_batch, 0),
153
+ @record_batch["a"])
154
+ end
155
+
156
+ test("[Symbol]") do
157
+ assert_equal(Arrow::Column.new(@record_batch, 1),
158
+ @record_batch[:b])
159
+ end
160
+
161
+ test("[Integer]") do
162
+ assert_equal(Arrow::Column.new(@record_batch, 6),
163
+ @record_batch[-1])
164
+ end
165
+
166
+ test("[Range]") do
167
+ assert_equal(Arrow::RecordBatch.new(d: [true],
168
+ e: [true]),
169
+ @record_batch[3..4])
170
+ end
171
+
172
+ test("[[Symbol, String, Integer, Range]]") do
173
+ assert_equal(Arrow::RecordBatch.new(c: [true],
174
+ a: [true],
175
+ g: [true],
176
+ d: [true],
177
+ e: [true]),
178
+ @record_batch[[:c, "a", -1, 3..4]])
179
+ end
180
+ end
139
181
  end
140
182
  end