red-arrow 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/Rakefile +4 -0
  4. data/lib/arrow/array-builder.rb +6 -2
  5. data/lib/arrow/array.rb +6 -2
  6. data/{test/test-csv-reader.rb → lib/arrow/compression-type.rb} +16 -13
  7. data/lib/arrow/csv-loader.rb +102 -2
  8. data/lib/arrow/csv-read-options.rb +25 -0
  9. data/lib/arrow/data-type.rb +135 -0
  10. data/lib/arrow/decimal128-array-builder.rb +64 -0
  11. data/lib/arrow/decimal128-data-type.rb +69 -0
  12. data/lib/arrow/dense-union-data-type.rb +90 -0
  13. data/lib/arrow/dictionary-data-type.rb +106 -0
  14. data/lib/arrow/field-containable.rb +35 -0
  15. data/lib/arrow/field.rb +92 -8
  16. data/lib/arrow/file-output-stream.rb +34 -0
  17. data/lib/arrow/list-array-builder.rb +96 -0
  18. data/lib/arrow/list-data-type.rb +68 -0
  19. data/lib/arrow/loader.rb +30 -5
  20. data/lib/arrow/{csv-reader.rb → path-extension.rb} +19 -28
  21. data/lib/arrow/record-batch-builder.rb +115 -0
  22. data/lib/arrow/record-batch.rb +25 -0
  23. data/lib/arrow/schema.rb +97 -0
  24. data/lib/arrow/sparse-union-data-type.rb +90 -0
  25. data/lib/arrow/struct-array-builder.rb +146 -0
  26. data/lib/arrow/struct-array.rb +34 -0
  27. data/lib/arrow/struct-data-type.rb +130 -0
  28. data/lib/arrow/struct.rb +68 -0
  29. data/lib/arrow/table-loader.rb +65 -25
  30. data/lib/arrow/table-saver.rb +73 -24
  31. data/lib/arrow/table.rb +11 -2
  32. data/lib/arrow/time32-data-type.rb +61 -0
  33. data/lib/arrow/time64-data-type.rb +61 -0
  34. data/lib/arrow/timestamp-data-type.rb +57 -0
  35. data/lib/arrow/version.rb +5 -7
  36. data/lib/arrow/writable.rb +22 -0
  37. data/red-arrow.gemspec +8 -4
  38. data/test/helper.rb +1 -2
  39. data/test/test-csv-loader.rb +27 -0
  40. data/test/test-data-type.rb +47 -0
  41. data/test/test-decimal128-array-builder.rb +95 -0
  42. data/test/test-decimal128-array.rb +38 -0
  43. data/test/test-decimal128-data-type.rb +31 -0
  44. data/test/test-dense-union-data-type.rb +41 -0
  45. data/test/test-dictionary-data-type.rb +40 -0
  46. data/test/test-feather.rb +34 -0
  47. data/test/test-field.rb +71 -0
  48. data/test/test-file-output-stream.rb +54 -0
  49. data/test/test-list-array-builder.rb +79 -0
  50. data/test/test-list-array.rb +32 -0
  51. data/test/test-list-data-type.rb +43 -0
  52. data/test/test-record-batch-builder.rb +116 -0
  53. data/test/test-record-batch.rb +82 -27
  54. data/test/test-schema.rb +104 -0
  55. data/test/test-sparse-union-data-type.rb +41 -0
  56. data/test/test-struct-array-builder.rb +180 -0
  57. data/test/test-struct-array.rb +60 -15
  58. data/test/test-struct-data-type.rb +112 -0
  59. data/test/test-struct.rb +81 -0
  60. data/test/test-table.rb +165 -29
  61. data/test/test-time32-data-type.rb +42 -0
  62. data/test/test-time64-data-type.rb +42 -0
  63. data/test/test-timestamp-data-type.rb +42 -0
  64. metadata +99 -10
@@ -0,0 +1,41 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class DenseUnionDataTypeTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ def setup
21
+ @fields = [
22
+ Arrow::Field.new("visible", :boolean),
23
+ {
24
+ name: "count",
25
+ type: :int32,
26
+ },
27
+ ]
28
+ end
29
+
30
+ test("ordered arguments") do
31
+ assert_equal("union[dense]<visible: bool=2, count: int32=9>",
32
+ Arrow::DenseUnionDataType.new(@fields, [2, 9]).to_s)
33
+ end
34
+
35
+ test("description") do
36
+ assert_equal("union[dense]<visible: bool=2, count: int32=9>",
37
+ Arrow::DenseUnionDataType.new(fields: @fields,
38
+ type_codes: [2, 9]).to_s)
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,40 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class DictionaryDataTypeTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ def setup
21
+ @index_data_type = :int8
22
+ @dictionary = Arrow::StringArray.new(["Hello", "World"])
23
+ @ordered = true
24
+ end
25
+
26
+ test("ordered arguments") do
27
+ assert_equal("dictionary<values=string, indices=int8, ordered=1>",
28
+ Arrow::DictionaryDataType.new(@index_data_type,
29
+ @dictionary,
30
+ @ordered).to_s)
31
+ end
32
+
33
+ test("description") do
34
+ assert_equal("dictionary<values=string, indices=int8, ordered=1>",
35
+ Arrow::DictionaryDataType.new(index_data_type: @index_data_type,
36
+ dictionary: @dictionary,
37
+ ordered: @ordered).to_s)
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,34 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class FeatherTest < Test::Unit::TestCase
19
+ include Helper::Fixture
20
+
21
+ def test_save_load
22
+ columns = {
23
+ "message" => Arrow::StringArray.new(["Start", "Crash", "Shutdown"]),
24
+ "is_critical" => Arrow::BooleanArray.new([false, true, false]),
25
+ }
26
+ table = Arrow::Table.new(columns)
27
+
28
+ output = Tempfile.new(["red-arrow", ".feather"])
29
+ table.save(output.path)
30
+ output.close
31
+
32
+ assert_equal(table, Arrow::Table.load(output.path))
33
+ end
34
+ end
@@ -0,0 +1,71 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class FieldTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("String, Arrow::DataType") do
21
+ assert_equal("visible: bool",
22
+ Arrow::Field.new("visible", Arrow::BooleanDataType.new).to_s)
23
+ end
24
+
25
+ test("Symbol, Arrow::DataType") do
26
+ assert_equal("visible: bool",
27
+ Arrow::Field.new(:visible, Arrow::BooleanDataType.new).to_s)
28
+ end
29
+
30
+ test("String, Symbol") do
31
+ assert_equal("visible: bool",
32
+ Arrow::Field.new(:visible, :boolean).to_s)
33
+ end
34
+
35
+ test("String, Hash") do
36
+ assert_equal("visible: bool",
37
+ Arrow::Field.new(:visible, type: :boolean).to_s)
38
+ end
39
+
40
+ test("description: String") do
41
+ assert_equal("visible: bool",
42
+ Arrow::Field.new(name: "visible",
43
+ data_type: :boolean).to_s)
44
+ end
45
+
46
+ test("description: Symbol") do
47
+ assert_equal("visible: bool",
48
+ Arrow::Field.new(name: :visible,
49
+ data_type: :boolean).to_s)
50
+ end
51
+
52
+ test("description: shortcut") do
53
+ assert_equal("visible: bool",
54
+ Arrow::Field.new(name: :visible,
55
+ type: :boolean).to_s)
56
+ end
57
+
58
+ test("Hash: shortcut: additional") do
59
+ description = {
60
+ name: :tags,
61
+ type: :list,
62
+ field: {
63
+ name: "tag",
64
+ type: :string,
65
+ },
66
+ }
67
+ assert_equal("tags: list<tag: string>",
68
+ Arrow::Field.new(description).to_s)
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,54 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class TestFileOutputStream < Test::Unit::TestCase
19
+ sub_test_case(".open") do
20
+ def setup
21
+ @file = Tempfile.open("arrow-file-output-stream")
22
+ @file.write("Hello")
23
+ @file.close
24
+ end
25
+
26
+ def test_default
27
+ Arrow::FileOutputStream.open(@file.path) do |file|
28
+ file.write(" World")
29
+ end
30
+ assert_equal(" World", File.read(@file.path))
31
+ end
32
+
33
+ def test_options_append
34
+ Arrow::FileOutputStream.open(@file.path, append: true) do |file|
35
+ file.write(" World")
36
+ end
37
+ assert_equal("Hello World", File.read(@file.path))
38
+ end
39
+
40
+ def test_append_true
41
+ Arrow::FileOutputStream.open(@file.path, true) do |file|
42
+ file.write(" World")
43
+ end
44
+ assert_equal("Hello World", File.read(@file.path))
45
+ end
46
+
47
+ def test_append_false
48
+ Arrow::FileOutputStream.open(@file.path, false) do |file|
49
+ file.write(" World")
50
+ end
51
+ assert_equal(" World", File.read(@file.path))
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,79 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class ListArrayBuilderTest < Test::Unit::TestCase
19
+ def setup
20
+ @data_type = Arrow::ListDataType.new(name: "visible", type: :boolean)
21
+ @builder = Arrow::ListArrayBuilder.new(@data_type)
22
+ end
23
+
24
+ sub_test_case("#append_value") do
25
+ test("nil") do
26
+ @builder.append_value(nil)
27
+ array = @builder.finish
28
+ assert_equal(nil, array[0])
29
+ end
30
+
31
+ test("Array") do
32
+ @builder.append_value([true, false, true])
33
+ array = @builder.finish
34
+ assert_equal([true, false, true], array[0].to_a)
35
+ end
36
+ end
37
+
38
+ sub_test_case("#append_values") do
39
+ test("[nil, Array]") do
40
+ @builder.append_values([[false], nil, [true, false, true]])
41
+ array = @builder.finish
42
+ assert_equal([
43
+ [false],
44
+ nil,
45
+ [true, false, true],
46
+ ],
47
+ array.collect {|list| list ? list.to_a : nil})
48
+ end
49
+
50
+ test("is_valids") do
51
+ @builder.append_values([[false], [true, true], [true, false, true]],
52
+ [true, false, true])
53
+ array = @builder.finish
54
+ assert_equal([
55
+ [false],
56
+ nil,
57
+ [true, false, true],
58
+ ],
59
+ array.collect {|list| list ? list.to_a : nil})
60
+ end
61
+ end
62
+
63
+ sub_test_case("#append") do
64
+ test("backward compatibility") do
65
+ @builder.append
66
+ @builder.value_builder.append(true)
67
+ @builder.value_builder.append(false)
68
+ @builder.append
69
+ @builder.value_builder.append(true)
70
+ array = @builder.finish
71
+
72
+ assert_equal([
73
+ [true, false],
74
+ [true],
75
+ ],
76
+ array.collect(&:to_a))
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,32 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class ListArrayTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("build") do
21
+ data_type = Arrow::ListDataType.new(name: "visible", type: :boolean)
22
+ values = [
23
+ [true, false],
24
+ nil,
25
+ [false, true, false],
26
+ ]
27
+ array = Arrow::ListArray.new(data_type, values)
28
+ assert_equal(values,
29
+ array.collect {|value| value ? value.to_a : nil})
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,43 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class ListDataTypeTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("Arrow::Field") do
21
+ field = Arrow::Field.new(:tag, :string)
22
+ assert_equal("list<tag: string>",
23
+ Arrow::ListDataType.new(field).to_s)
24
+ end
25
+
26
+ test("Hash") do
27
+ assert_equal("list<tag: string>",
28
+ Arrow::ListDataType.new(name: "tag", type: :string).to_s)
29
+ end
30
+
31
+ test("field: Arrow::Field") do
32
+ field = Arrow::Field.new(:tag, :string)
33
+ assert_equal("list<tag: string>",
34
+ Arrow::ListDataType.new(field: field).to_s)
35
+ end
36
+
37
+ test("field: Hash") do
38
+ field_description = {name: "tag", type: :string}
39
+ assert_equal("list<tag: string>",
40
+ Arrow::ListDataType.new(field: field_description).to_s)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,116 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RecordBatchBuilderTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("Schema") do
21
+ schema = Arrow::Schema.new(visible: :boolean,
22
+ count: :uint32)
23
+ builder = Arrow::RecordBatchBuilder.new(schema)
24
+ assert_equal(schema,
25
+ builder.schema)
26
+ end
27
+
28
+ test("Hash") do
29
+ builder = Arrow::RecordBatchBuilder.new(visible: :boolean,
30
+ count: :uint32)
31
+ assert_equal(Arrow::Schema.new(visible: :boolean,
32
+ count: :uint32),
33
+ builder.schema)
34
+ end
35
+ end
36
+
37
+ sub_test_case("instance methods") do
38
+ def setup
39
+ @schema = Arrow::Schema.new(visible: :boolean,
40
+ count: :uint32)
41
+ @builder = Arrow::RecordBatchBuilder.new(@schema)
42
+ end
43
+
44
+ sub_test_case("#[]") do
45
+ test("String") do
46
+ assert_equal(Arrow::BooleanDataType.new,
47
+ @builder["visible"].value_data_type)
48
+ end
49
+
50
+ test("Symbol") do
51
+ assert_equal(Arrow::BooleanDataType.new,
52
+ @builder[:visible].value_data_type)
53
+ end
54
+
55
+ test("Integer") do
56
+ assert_equal(Arrow::UInt32DataType.new,
57
+ @builder[1].value_data_type)
58
+ end
59
+ end
60
+
61
+ test("#append") do
62
+ records = [
63
+ {visible: true, count: 1},
64
+ ]
65
+ columns = {
66
+ visible: [false],
67
+ count: [2],
68
+ }
69
+ arrays = [
70
+ Arrow::BooleanArray.new([true, false]),
71
+ Arrow::UInt32Array.new([1, 2]),
72
+ ]
73
+ @builder.append(records, columns)
74
+ assert_equal(Arrow::RecordBatch.new(@schema,
75
+ arrays[0].length,
76
+ arrays),
77
+ @builder.flush)
78
+ end
79
+
80
+ test("#append_records") do
81
+ records = [
82
+ {visible: true, count: 1},
83
+ {visible: true, count: 2, garbage: "garbage"},
84
+ {visible: true},
85
+ [false, 4],
86
+ nil,
87
+ [true],
88
+ ]
89
+ arrays = [
90
+ Arrow::BooleanArray.new([true, true, true, false, nil, true]),
91
+ Arrow::UInt32Array.new([1, 2, nil, 4, nil, nil]),
92
+ ]
93
+ @builder.append_records(records)
94
+ assert_equal(Arrow::RecordBatch.new(@schema,
95
+ arrays[0].length,
96
+ arrays),
97
+ @builder.flush)
98
+ end
99
+
100
+ test("#append_columns") do
101
+ columns = {
102
+ visible: [true, true, true, false, nil, true],
103
+ count: [1, 2, nil, 4, nil, nil],
104
+ }
105
+ arrays = [
106
+ Arrow::BooleanArray.new(columns[:visible]),
107
+ Arrow::UInt32Array.new(columns[:count]),
108
+ ]
109
+ @builder.append_columns(columns)
110
+ assert_equal(Arrow::RecordBatch.new(@schema,
111
+ arrays[0].length,
112
+ arrays),
113
+ @builder.flush)
114
+ end
115
+ end
116
+ end