red-arrow 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/Rakefile +4 -0
  4. data/lib/arrow/array-builder.rb +6 -2
  5. data/lib/arrow/array.rb +6 -2
  6. data/{test/test-csv-reader.rb → lib/arrow/compression-type.rb} +16 -13
  7. data/lib/arrow/csv-loader.rb +102 -2
  8. data/lib/arrow/csv-read-options.rb +25 -0
  9. data/lib/arrow/data-type.rb +135 -0
  10. data/lib/arrow/decimal128-array-builder.rb +64 -0
  11. data/lib/arrow/decimal128-data-type.rb +69 -0
  12. data/lib/arrow/dense-union-data-type.rb +90 -0
  13. data/lib/arrow/dictionary-data-type.rb +106 -0
  14. data/lib/arrow/field-containable.rb +35 -0
  15. data/lib/arrow/field.rb +92 -8
  16. data/lib/arrow/file-output-stream.rb +34 -0
  17. data/lib/arrow/list-array-builder.rb +96 -0
  18. data/lib/arrow/list-data-type.rb +68 -0
  19. data/lib/arrow/loader.rb +30 -5
  20. data/lib/arrow/{csv-reader.rb → path-extension.rb} +19 -28
  21. data/lib/arrow/record-batch-builder.rb +115 -0
  22. data/lib/arrow/record-batch.rb +25 -0
  23. data/lib/arrow/schema.rb +97 -0
  24. data/lib/arrow/sparse-union-data-type.rb +90 -0
  25. data/lib/arrow/struct-array-builder.rb +146 -0
  26. data/lib/arrow/struct-array.rb +34 -0
  27. data/lib/arrow/struct-data-type.rb +130 -0
  28. data/lib/arrow/struct.rb +68 -0
  29. data/lib/arrow/table-loader.rb +65 -25
  30. data/lib/arrow/table-saver.rb +73 -24
  31. data/lib/arrow/table.rb +11 -2
  32. data/lib/arrow/time32-data-type.rb +61 -0
  33. data/lib/arrow/time64-data-type.rb +61 -0
  34. data/lib/arrow/timestamp-data-type.rb +57 -0
  35. data/lib/arrow/version.rb +5 -7
  36. data/lib/arrow/writable.rb +22 -0
  37. data/red-arrow.gemspec +8 -4
  38. data/test/helper.rb +1 -2
  39. data/test/test-csv-loader.rb +27 -0
  40. data/test/test-data-type.rb +47 -0
  41. data/test/test-decimal128-array-builder.rb +95 -0
  42. data/test/test-decimal128-array.rb +38 -0
  43. data/test/test-decimal128-data-type.rb +31 -0
  44. data/test/test-dense-union-data-type.rb +41 -0
  45. data/test/test-dictionary-data-type.rb +40 -0
  46. data/test/test-feather.rb +34 -0
  47. data/test/test-field.rb +71 -0
  48. data/test/test-file-output-stream.rb +54 -0
  49. data/test/test-list-array-builder.rb +79 -0
  50. data/test/test-list-array.rb +32 -0
  51. data/test/test-list-data-type.rb +43 -0
  52. data/test/test-record-batch-builder.rb +116 -0
  53. data/test/test-record-batch.rb +82 -27
  54. data/test/test-schema.rb +104 -0
  55. data/test/test-sparse-union-data-type.rb +41 -0
  56. data/test/test-struct-array-builder.rb +180 -0
  57. data/test/test-struct-array.rb +60 -15
  58. data/test/test-struct-data-type.rb +112 -0
  59. data/test/test-struct.rb +81 -0
  60. data/test/test-table.rb +165 -29
  61. data/test/test-time32-data-type.rb +42 -0
  62. data/test/test-time64-data-type.rb +42 -0
  63. data/test/test-timestamp-data-type.rb +42 -0
  64. metadata +99 -10
@@ -16,42 +16,97 @@
16
16
  # under the License.
17
17
 
18
18
  class RecordBatchTest < Test::Unit::TestCase
19
- sub_test_case(".each") do
20
- setup do
21
- fields = [
22
- Arrow::Field.new("count", :uint32),
19
+ sub_test_case(".new") do
20
+ def setup
21
+ @schema = Arrow::Schema.new(visible: :boolean,
22
+ count: :uint32)
23
+ end
24
+
25
+ test("[Schema, records]") do
26
+ records = [
27
+ {visible: true, count: 1},
28
+ nil,
29
+ [false, 3],
23
30
  ]
24
- @schema = Arrow::Schema.new(fields)
25
- @counts = Arrow::UInt32Array.new([1, 2, 4, 8])
26
- @record_batch = Arrow::RecordBatch.new(@schema, @counts.length, [@counts])
31
+ record_batch = Arrow::RecordBatch.new(@schema, records)
32
+ assert_equal([
33
+ {"visible" => true, "count" => 1},
34
+ {"visible" => nil, "count" => nil},
35
+ {"visible" => false, "count" => 3},
36
+ ],
37
+ record_batch.each_record.collect(&:to_h))
27
38
  end
28
39
 
29
- test("default") do
30
- records = []
31
- @record_batch.each do |record|
32
- records << [record, record.index]
33
- end
40
+ test("[Schema, columns]") do
41
+ columns = {
42
+ visible: [true, nil, false],
43
+ count: [1, 2, nil],
44
+ }
45
+ record_batch = Arrow::RecordBatch.new(@schema, columns)
34
46
  assert_equal([
35
- [0, 0],
36
- [1, 1],
37
- [2, 2],
38
- [3, 3],
47
+ {"visible" => true, "count" => 1},
48
+ {"visible" => nil, "count" => 2},
49
+ {"visible" => false, "count" => nil},
39
50
  ],
40
- records.collect {|record, i| [record.index, i]})
51
+ record_batch.each_record.collect(&:to_h))
41
52
  end
42
53
 
43
- test("reuse_record: true") do
44
- records = []
45
- @record_batch.each(reuse_record: true) do |record|
46
- records << [record, record.index]
47
- end
54
+ test("[Schema, n_rows, columns]") do
55
+ columns = [
56
+ Arrow::BooleanArray.new([true, nil, false]),
57
+ Arrow::UInt32Array.new([1, 2, nil]),
58
+ ]
59
+ n_rows = columns[0].length
60
+ record_batch = Arrow::RecordBatch.new(@schema, n_rows, columns)
48
61
  assert_equal([
49
- [3, 0],
50
- [3, 1],
51
- [3, 2],
52
- [3, 3],
62
+ {"visible" => true, "count" => 1},
63
+ {"visible" => nil, "count" => 2},
64
+ {"visible" => false, "count" => nil},
53
65
  ],
54
- records.collect {|record, i| [record.index, i]})
66
+ record_batch.each_record.collect(&:to_h))
67
+ end
68
+ end
69
+
70
+ sub_test_case("instance methods") do
71
+ def setup
72
+ @schema = Arrow::Schema.new(count: :uint32)
73
+ @counts = Arrow::UInt32Array.new([1, 2, 4, 8])
74
+ @record_batch = Arrow::RecordBatch.new(@schema, @counts.length, [@counts])
75
+ end
76
+
77
+ sub_test_case("#each") do
78
+ test("default") do
79
+ records = []
80
+ @record_batch.each do |record|
81
+ records << [record, record.index]
82
+ end
83
+ assert_equal([
84
+ [0, 0],
85
+ [1, 1],
86
+ [2, 2],
87
+ [3, 3],
88
+ ],
89
+ records.collect {|record, i| [record.index, i]})
90
+ end
91
+
92
+ test("reuse_record: true") do
93
+ records = []
94
+ @record_batch.each(reuse_record: true) do |record|
95
+ records << [record, record.index]
96
+ end
97
+ assert_equal([
98
+ [3, 0],
99
+ [3, 1],
100
+ [3, 2],
101
+ [3, 3],
102
+ ],
103
+ records.collect {|record, i| [record.index, i]})
104
+ end
105
+ end
106
+
107
+ test("#to_table") do
108
+ assert_equal(Arrow::Table.new(@schema, [@counts]),
109
+ @record_batch.to_table)
55
110
  end
56
111
  end
57
112
  end
@@ -0,0 +1,104 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class SchemaTest < Test::Unit::TestCase
19
+ def setup
20
+ @count_field = Arrow::Field.new("count", :uint32)
21
+ @visible_field = Arrow::Field.new("visible", :boolean)
22
+ end
23
+
24
+ sub_test_case(".new") do
25
+ test("[Arrow::Field]") do
26
+ fields = [
27
+ @count_field,
28
+ @visible_field,
29
+ ]
30
+ assert_equal("count: uint32\n" +
31
+ "visible: bool",
32
+ Arrow::Schema.new(fields).to_s)
33
+ end
34
+
35
+ test("[Arrow::Field, Hash]") do
36
+ fields = [
37
+ @count_field,
38
+ {name: "visible", type: :boolean},
39
+ ]
40
+ assert_equal("count: uint32\n" +
41
+ "visible: bool",
42
+ Arrow::Schema.new(fields).to_s)
43
+ end
44
+
45
+ test("{String, Symbol => Arrow::DataType}") do
46
+ fields = {
47
+ "count" => Arrow::UInt32DataType.new,
48
+ :visible => :boolean,
49
+ }
50
+ assert_equal("count: uint32\n" +
51
+ "visible: bool",
52
+ Arrow::Schema.new(fields).to_s)
53
+ end
54
+
55
+ test("{String, Symbol => Hash}") do
56
+ fields = {
57
+ "count" => {type: :uint32},
58
+ :tags => {
59
+ type: :list,
60
+ field: {
61
+ name: "tag",
62
+ type: :string,
63
+ },
64
+ },
65
+ }
66
+ assert_equal("count: uint32\n" +
67
+ "tags: list<tag: string>",
68
+ Arrow::Schema.new(fields).to_s)
69
+ end
70
+ end
71
+
72
+ sub_test_case("instance methods") do
73
+ def setup
74
+ super
75
+ @schema = Arrow::Schema.new([@count_field, @visible_field])
76
+ end
77
+
78
+ sub_test_case("#[]") do
79
+ test("[String]") do
80
+ assert_equal([@count_field, @visible_field],
81
+ [@schema["count"], @schema["visible"]])
82
+ end
83
+
84
+ test("[Symbol]") do
85
+ assert_equal([@count_field, @visible_field],
86
+ [@schema[:count], @schema[:visible]])
87
+ end
88
+
89
+ test("[Integer]") do
90
+ assert_equal([@count_field, @visible_field],
91
+ [@schema[0], @schema[1]])
92
+ end
93
+
94
+ test("[invalid]") do
95
+ invalid = []
96
+ message = "field name or index must be String, Symbol or Integer"
97
+ message << ": <#{invalid.inspect}>"
98
+ assert_raise(ArgumentError.new(message)) do
99
+ @schema[invalid]
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,41 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class SparseUnionDataTypeTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ def setup
21
+ @fields = [
22
+ Arrow::Field.new("visible", :boolean),
23
+ {
24
+ name: "count",
25
+ type: :int32,
26
+ },
27
+ ]
28
+ end
29
+
30
+ test("ordered arguments") do
31
+ assert_equal("union[sparse]<visible: bool=2, count: int32=9>",
32
+ Arrow::SparseUnionDataType.new(@fields, [2, 9]).to_s)
33
+ end
34
+
35
+ test("description") do
36
+ assert_equal("union[sparse]<visible: bool=2, count: int32=9>",
37
+ Arrow::SparseUnionDataType.new(fields: @fields,
38
+ type_codes: [2, 9]).to_s)
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,180 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class StructArrayBuilderTest < Test::Unit::TestCase
19
+ def setup
20
+ @data_type = Arrow::StructDataType.new(visible: {type: :boolean},
21
+ count: {type: :uint64})
22
+ @builder = Arrow::StructArrayBuilder.new(@data_type)
23
+ end
24
+
25
+ sub_test_case("#append_value") do
26
+ test("nil") do
27
+ @builder.append_value(nil)
28
+ array = @builder.finish
29
+ assert_equal([
30
+ [nil],
31
+ [nil],
32
+ ],
33
+ [
34
+ array.find_field(0).to_a,
35
+ array.find_field(1).to_a,
36
+ ])
37
+ end
38
+
39
+ test("Array") do
40
+ @builder.append_value([true, 1])
41
+ array = @builder.finish
42
+ assert_equal([
43
+ [true],
44
+ [1],
45
+ ],
46
+ [
47
+ array.find_field(0).to_a,
48
+ array.find_field(1).to_a,
49
+ ])
50
+ end
51
+
52
+ test("Arrow::Struct") do
53
+ source_array = Arrow::StructArray.new(@data_type, [[true, 1]])
54
+ struct = source_array.get_value(0)
55
+ @builder.append_value(struct)
56
+ array = @builder.finish
57
+ assert_equal([
58
+ [true],
59
+ [1],
60
+ ],
61
+ [
62
+ array.find_field(0).to_a,
63
+ array.find_field(1).to_a,
64
+ ])
65
+ end
66
+
67
+ test("Hash") do
68
+ @builder.append_value(count: 1, visible: true)
69
+ array = @builder.finish
70
+ assert_equal([
71
+ [true],
72
+ [1],
73
+ ],
74
+ [
75
+ array.find_field(0).to_a,
76
+ array.find_field(1).to_a,
77
+ ])
78
+ end
79
+ end
80
+
81
+ sub_test_case("#append_values") do
82
+ test("[nil]") do
83
+ @builder.append_values([nil])
84
+ array = @builder.finish
85
+ assert_equal([
86
+ [nil],
87
+ [nil],
88
+ ],
89
+ [
90
+ array.find_field(0).to_a,
91
+ array.find_field(1).to_a,
92
+ ])
93
+ end
94
+
95
+ test("[Array]") do
96
+ @builder.append_values([[true, 1]])
97
+ array = @builder.finish
98
+ assert_equal([
99
+ [true],
100
+ [1],
101
+ ],
102
+ [
103
+ array.find_field(0).to_a,
104
+ array.find_field(1).to_a,
105
+ ])
106
+ end
107
+
108
+ test("[Hash]") do
109
+ @builder.append_values([{count: 1, visible: true}])
110
+ array = @builder.finish
111
+ assert_equal([
112
+ [true],
113
+ [1],
114
+ ],
115
+ [
116
+ array.find_field(0).to_a,
117
+ array.find_field(1).to_a,
118
+ ])
119
+ end
120
+
121
+ test("[nil, Array, Hash]") do
122
+ @builder.append_values([
123
+ nil,
124
+ [true, 1],
125
+ {count: 2, visible: false},
126
+ ])
127
+ array = @builder.finish
128
+ assert_equal([
129
+ [nil, true, false],
130
+ [nil, 1, 2],
131
+ ],
132
+ [
133
+ array.find_field(0).to_a,
134
+ array.find_field(1).to_a,
135
+ ])
136
+ end
137
+
138
+ test("is_valids") do
139
+ @builder.append_values([
140
+ [true, 1],
141
+ [false, 2],
142
+ [true, 3],
143
+ ],
144
+ [
145
+ true,
146
+ false,
147
+ true,
148
+ ])
149
+ array = @builder.finish
150
+ assert_equal([
151
+ [true, nil, true],
152
+ [1, nil, 3],
153
+ ],
154
+ [
155
+ array.find_field(0).to_a,
156
+ array.find_field(1).to_a,
157
+ ])
158
+ end
159
+ end
160
+
161
+ sub_test_case("#append") do
162
+ test("backward compatibility") do
163
+ @builder.append
164
+ @builder.get_field_builder(0).append(true)
165
+ @builder.get_field_builder(1).append(1)
166
+ @builder.append
167
+ @builder.get_field_builder(0).append(false)
168
+ @builder.get_field_builder(1).append(2)
169
+ array = @builder.finish
170
+ assert_equal([
171
+ [true, 1],
172
+ [false, 2],
173
+ ],
174
+ [
175
+ array.get_value(0).values,
176
+ array.get_value(1).values,
177
+ ])
178
+ end
179
+ end
180
+ end