red-arrow 18.1.0 → 19.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
data/test/test-buffer.rb DELETED
@@ -1,49 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class BufferTest < Test::Unit::TestCase
19
- sub_test_case(".new") do
20
- test("GC") do
21
- data = "Hello"
22
- data_id = data.object_id
23
- _buffer = Arrow::Buffer.new(data)
24
- data = nil
25
- GC.start
26
- assert_equal("Hello", ObjectSpace._id2ref(data_id))
27
- end
28
- end
29
-
30
- sub_test_case("instance methods") do
31
- def setup
32
- @buffer = Arrow::Buffer.new("Hello")
33
- end
34
-
35
- sub_test_case("#==") do
36
- test("Arrow::Buffer") do
37
- assert do
38
- @buffer == @buffer
39
- end
40
- end
41
-
42
- test("not Arrow::Buffer") do
43
- assert do
44
- not (@buffer == 29)
45
- end
46
- end
47
- end
48
- end
49
- end
@@ -1,198 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class ChunkedArrayTest < Test::Unit::TestCase
19
- test("#each") do
20
- arrays = [
21
- Arrow::BooleanArray.new([true, false]),
22
- Arrow::BooleanArray.new([nil, true]),
23
- ]
24
- chunked_array = Arrow::ChunkedArray.new(arrays)
25
- assert_equal([true, false, nil, true],
26
- chunked_array.to_a)
27
- end
28
-
29
- sub_test_case("#pack") do
30
- test("basic array") do
31
- arrays = [
32
- Arrow::BooleanArray.new([true, false]),
33
- Arrow::BooleanArray.new([nil, true]),
34
- ]
35
- chunked_array = Arrow::ChunkedArray.new(arrays)
36
- packed_chunked_array = chunked_array.pack
37
- assert_equal([
38
- Arrow::BooleanArray,
39
- [true, false, nil, true],
40
- ],
41
- [
42
- packed_chunked_array.class,
43
- packed_chunked_array.to_a,
44
- ])
45
- end
46
-
47
- test("TimestampArray") do
48
- type = Arrow::TimestampDataType.new(:nano)
49
- arrays = [
50
- Arrow::TimestampArrayBuilder.new(type).build([Time.at(0)]),
51
- Arrow::TimestampArrayBuilder.new(type).build([Time.at(1)]),
52
- ]
53
- chunked_array = Arrow::ChunkedArray.new(arrays)
54
- packed_chunked_array = chunked_array.pack
55
- assert_equal([
56
- Arrow::TimestampArray,
57
- [Time.at(0), Time.at(1)],
58
- ],
59
- [
60
- packed_chunked_array.class,
61
- packed_chunked_array.to_a,
62
- ])
63
- end
64
- end
65
-
66
- sub_test_case("#==") do
67
- def setup
68
- arrays = [
69
- Arrow::BooleanArray.new([true]),
70
- Arrow::BooleanArray.new([false, true]),
71
- ]
72
- @chunked_array = Arrow::ChunkedArray.new(arrays)
73
- end
74
-
75
- test("Arrow::ChunkedArray") do
76
- assert do
77
- @chunked_array == @chunked_array
78
- end
79
- end
80
-
81
- test("not Arrow::ChunkedArray") do
82
- assert do
83
- not (@chunked_array == 29)
84
- end
85
- end
86
- end
87
-
88
- sub_test_case("#filter") do
89
- def setup
90
- arrays = [
91
- Arrow::BooleanArray.new([false, true]),
92
- Arrow::BooleanArray.new([false, true, false]),
93
- ]
94
- @chunked_array = Arrow::ChunkedArray.new(arrays)
95
- @options = Arrow::FilterOptions.new
96
- @options.null_selection_behavior = :emit_null
97
- end
98
-
99
- test("Array: boolean") do
100
- filter = [nil, true, true, false, true]
101
- chunks = [
102
- Arrow::BooleanArray.new([nil, true]),
103
- Arrow::BooleanArray.new([false, false]),
104
- ]
105
- filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
106
- assert_equal(filtered_chunked_array,
107
- @chunked_array.filter(filter, @options))
108
- end
109
-
110
- test("Arrow::BooleanArray") do
111
- filter = Arrow::BooleanArray.new([nil, true, true, false, true])
112
- chunks = [
113
- Arrow::BooleanArray.new([nil, true]),
114
- Arrow::BooleanArray.new([false, false]),
115
- ]
116
- filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
117
- assert_equal(filtered_chunked_array,
118
- @chunked_array.filter(filter, @options))
119
- end
120
-
121
- test("Arrow::ChunkedArray") do
122
- chunks = [
123
- Arrow::BooleanArray.new([nil, true]),
124
- Arrow::BooleanArray.new([true, false, true]),
125
- ]
126
- filter = Arrow::ChunkedArray.new(chunks)
127
- filtered_chunks = [
128
- Arrow::BooleanArray.new([nil, true]),
129
- Arrow::BooleanArray.new([false, false]),
130
- ]
131
- filtered_chunked_array = Arrow::ChunkedArray.new(filtered_chunks)
132
- assert_equal(filtered_chunked_array,
133
- @chunked_array.filter(filter, @options))
134
- end
135
- end
136
-
137
- sub_test_case("#take") do
138
- def setup
139
- chunks = [
140
- Arrow::Int16Array.new([1, 0]),
141
- Arrow::Int16Array.new([2]),
142
- ]
143
- @chunked_array = Arrow::ChunkedArray.new(chunks)
144
- end
145
-
146
- test("Arrow: boolean") do
147
- chunks = [
148
- Arrow::Int16Array.new([0, 1]),
149
- Arrow::Int16Array.new([2])
150
- ]
151
- taken_chunked_array = Arrow::ChunkedArray.new(chunks)
152
- indices = [1, 0, 2]
153
- assert_equal(taken_chunked_array,
154
- @chunked_array.take(indices))
155
- end
156
-
157
- test("Arrow::Array") do
158
- chunks = [
159
- Arrow::Int16Array.new([0, 1]),
160
- Arrow::Int16Array.new([2])
161
- ]
162
- taken_chunked_array = Arrow::ChunkedArray.new(chunks)
163
- indices = Arrow::Int16Array.new([1, 0, 2])
164
- assert_equal(taken_chunked_array,
165
- @chunked_array.take(indices))
166
- end
167
-
168
- test("Arrow::ChunkedArray") do
169
- taken_chunks = [
170
- Arrow::Int16Array.new([0, 1]),
171
- Arrow::Int16Array.new([2])
172
- ]
173
- taken_chunked_array = Arrow::ChunkedArray.new(taken_chunks)
174
- indices_chunks = [
175
- Arrow::Int16Array.new([1, 0]),
176
- Arrow::Int16Array.new([2])
177
- ]
178
- indices = Arrow::ChunkedArray.new(indices_chunks)
179
- assert_equal(taken_chunked_array,
180
- @chunked_array.take(indices))
181
- end
182
- end
183
-
184
- test("#cast") do
185
- chunked_array = Arrow::ChunkedArray.new([[1, nil, 3]])
186
- assert_equal(Arrow::ChunkedArray.new([["1", nil, "3"]]),
187
- chunked_array.cast(:string))
188
- end
189
-
190
- test("#index") do
191
- arrays = [
192
- Arrow::Int32Array.new([1, 2]),
193
- Arrow::Int32Array.new([3, 4, 5]),
194
- ]
195
- chunked_array = Arrow::ChunkedArray.new(arrays)
196
- assert_equal(2, chunked_array.index(3))
197
- end
198
- end
data/test/test-column.rb DELETED
@@ -1,123 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class ColumnTest < Test::Unit::TestCase
19
- def setup
20
- table = Arrow::Table.new("visible" => [true, nil, false])
21
- @column = table.visible
22
- end
23
-
24
- test("#name") do
25
- assert_equal("visible", @column.name)
26
- end
27
-
28
- test("#data_type") do
29
- assert_equal(Arrow::BooleanDataType.new, @column.data_type)
30
- end
31
-
32
- test("#null?") do
33
- assert do
34
- @column.null?(1)
35
- end
36
- end
37
-
38
- test("#valid?") do
39
- assert do
40
- @column.valid?(0)
41
- end
42
- end
43
-
44
- test("#each") do
45
- assert_equal([true, nil, false], @column.each.to_a)
46
- end
47
-
48
- test("#reverse_each") do
49
- assert_equal([false, nil, true], @column.reverse_each.to_a)
50
- end
51
-
52
- test("#n_rows") do
53
- assert_equal(3, @column.n_rows)
54
- end
55
-
56
- test("#n_nulls") do
57
- assert_equal(1, @column.n_nulls)
58
- end
59
-
60
- sub_test_case("#==") do
61
- test("same value") do
62
- table1 = Arrow::Table.new("visible" => [true, false])
63
- table2 = Arrow::Table.new("visible" => [true, false])
64
- assert do
65
- table1.visible == table2.visible
66
- end
67
- end
68
-
69
- test("different name") do
70
- table1 = Arrow::Table.new("visible" => [true, false])
71
- table2 = Arrow::Table.new("invisible" => [true, false])
72
- assert do
73
- not table1.visible == table2.invisible
74
- end
75
- end
76
-
77
- test("different value") do
78
- table1 = Arrow::Table.new("visible" => [true, false])
79
- table2 = Arrow::Table.new("visible" => [true, true])
80
- assert do
81
- not table1.visible == table2.visible
82
- end
83
- end
84
-
85
- test("not Arrow::Column") do
86
- table = Arrow::Table.new("visible" => [true, false])
87
- assert do
88
- not table.visible == 29
89
- end
90
- end
91
- end
92
-
93
- test("#count") do
94
- table = Arrow::Table.new("revenue" => [1, nil, 3])
95
- assert_equal(2, table["revenue"].count)
96
- end
97
-
98
- test("#min") do
99
- table = Arrow::Table.new("revenue" => [1, 2, 3])
100
- assert_equal(1, table["revenue"].min)
101
- end
102
-
103
- test("#max") do
104
- table = Arrow::Table.new("revenue" => [1, 2, 3])
105
- assert_equal(3, table["revenue"].max)
106
- end
107
-
108
- test("#sum") do
109
- table = Arrow::Table.new("revenue" => [1, 2, 3])
110
- assert_equal(6, table["revenue"].sum)
111
- end
112
-
113
- test("#uniq") do
114
- table = Arrow::Table.new("revenue" => [1, 2, 2])
115
- assert_equal([1, 2], table["revenue"].uniq)
116
- end
117
-
118
- test("#cast") do
119
- table = Arrow::Table.new("revenue" => [1, nil, 3])
120
- assert_equal(Arrow::ChunkedArray.new([["1", nil, "3"]]),
121
- table["revenue"].cast(:string))
122
- end
123
- end
@@ -1,297 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class CSVLoaderTest < Test::Unit::TestCase
19
- include Helper::Fixture
20
- include Helper::Omittable
21
-
22
- def load_csv(input)
23
- Arrow::CSVLoader.load(input, skip_lines: /^#/)
24
- end
25
-
26
- sub_test_case(".load") do
27
- test("String: data: with header") do
28
- data = fixture_path("with-header-float.csv").read
29
- assert_equal(<<-TABLE, load_csv(data).to_s)
30
- name score
31
- (utf8) (double)
32
- 0 alice 10.100000
33
- 1 bob 29.200000
34
- 2 chris -1.300000
35
- TABLE
36
- end
37
-
38
- test("String: data: without header") do
39
- data = fixture_path("without-header-float.csv").read
40
- assert_equal(<<-TABLE, load_csv(data).to_s)
41
- 0 1
42
- (utf8) (double)
43
- 0 alice 10.100000
44
- 1 bob 29.200000
45
- 2 chris -1.300000
46
- TABLE
47
- end
48
-
49
- test("String: path: with header") do
50
- path = fixture_path("with-header-float.csv").to_s
51
- assert_equal(<<-TABLE, load_csv(path).to_s)
52
- name score
53
- (utf8) (double)
54
- 0 alice 10.100000
55
- 1 bob 29.200000
56
- 2 chris -1.300000
57
- TABLE
58
- end
59
-
60
- test("String: path: without header") do
61
- path = fixture_path("without-header-float.csv").to_s
62
- assert_equal(<<-TABLE, load_csv(path).to_s)
63
- 0 1
64
- (utf8) (double)
65
- 0 alice 10.100000
66
- 1 bob 29.200000
67
- 2 chris -1.300000
68
- TABLE
69
- end
70
-
71
- test("Pathname: with header") do
72
- path = fixture_path("with-header-float.csv")
73
- assert_equal(<<-TABLE, load_csv(path).to_s)
74
- name score
75
- (utf8) (double)
76
- 0 alice 10.100000
77
- 1 bob 29.200000
78
- 2 chris -1.300000
79
- TABLE
80
- end
81
-
82
- test("Pathname: without header") do
83
- path = fixture_path("without-header-float.csv")
84
- assert_equal(<<-TABLE, load_csv(path).to_s)
85
- 0 1
86
- (utf8) (double)
87
- 0 alice 10.100000
88
- 1 bob 29.200000
89
- 2 chris -1.300000
90
- TABLE
91
- end
92
-
93
- test("null: with double quote") do
94
- path = fixture_path("null-with-double-quote.csv").to_s
95
- assert_equal(<<-TABLE, load_csv(path).to_s)
96
- name score
97
- (utf8) (int8)
98
- 0 alice 10
99
- 1 bob (null)
100
- 2 chris -1
101
- TABLE
102
- end
103
-
104
- test("null: without double quote") do
105
- path = fixture_path("null-without-double-quote.csv").to_s
106
- assert_equal(<<-TABLE, load_csv(path).to_s)
107
- name score
108
- (utf8) (int8)
109
- 0 alice 10
110
- 1 bob (null)
111
- 2 chris -1
112
- TABLE
113
- end
114
-
115
- test("number: float, integer") do
116
- path = fixture_path("float-integer.csv").to_s
117
- assert_equal([2.9, 10, -1.1],
118
- load_csv(path)[:score].to_a)
119
- end
120
-
121
- test("number: integer, float") do
122
- path = fixture_path("integer-float.csv").to_s
123
- assert_equal([10.0, 2.9, -1.1],
124
- load_csv(path)[:score].to_a)
125
- end
126
- end
127
-
128
- sub_test_case("CSVReader") do
129
- def load_csv(data, **options)
130
- Arrow::CSVLoader.load(data, **options)
131
- end
132
-
133
- sub_test_case(":headers") do
134
- test("true") do
135
- values = Arrow::StringArray.new(["a", "b", "c"])
136
- assert_equal(Arrow::Table.new(value: values),
137
- load_csv(<<-CSV, headers: true))
138
- value
139
- a
140
- b
141
- c
142
- CSV
143
- end
144
-
145
- test(":first_line") do
146
- values = Arrow::StringArray.new(["a", "b", "c"])
147
- assert_equal(Arrow::Table.new(value: values),
148
- load_csv(<<-CSV, headers: :first_line))
149
- value
150
- a
151
- b
152
- c
153
- CSV
154
- end
155
-
156
- test("truthy") do
157
- values = Arrow::StringArray.new(["a", "b", "c"])
158
- assert_equal(Arrow::Table.new(value: values),
159
- load_csv(<<-CSV, headers: 0))
160
- value
161
- a
162
- b
163
- c
164
- CSV
165
- end
166
-
167
- test("Array of column names") do
168
- values = Arrow::StringArray.new(["a", "b", "c"])
169
- assert_equal(Arrow::Table.new(column: values),
170
- load_csv(<<-CSV, headers: ["column"]))
171
- a
172
- b
173
- c
174
- CSV
175
- end
176
-
177
- test("false") do
178
- values = Arrow::StringArray.new(["a", "b", "c"])
179
- assert_equal(Arrow::Table.new(f0: values),
180
- load_csv(<<-CSV, headers: false))
181
- a
182
- b
183
- c
184
- CSV
185
- end
186
-
187
- test("nil") do
188
- values = Arrow::StringArray.new(["a", "b", "c"])
189
- assert_equal(Arrow::Table.new(f0: values),
190
- load_csv(<<-CSV, headers: nil))
191
- a
192
- b
193
- c
194
- CSV
195
- end
196
-
197
- test("string") do
198
- values = Arrow::StringArray.new(["a", "b", "c"])
199
- assert_equal(Arrow::Table.new(column: values),
200
- load_csv(<<-CSV, headers: "column"))
201
- a
202
- b
203
- c
204
- CSV
205
- end
206
- end
207
-
208
- test(":column_types") do
209
- assert_equal(Arrow::Table.new(:count => Arrow::UInt16Array.new([1, 2, 4])),
210
- load_csv(<<-CSV, column_types: {count: :uint16}))
211
- count
212
- 1
213
- 2
214
- 4
215
- CSV
216
- end
217
-
218
- test(":schema") do
219
- table = Arrow::Table.new(:count => Arrow::UInt16Array.new([1, 2, 4]))
220
- assert_equal(table,
221
- load_csv(<<-CSV, schema: table.schema))
222
- count
223
- 1
224
- 2
225
- 4
226
- CSV
227
- end
228
-
229
- test(":encoding") do
230
- messages = [
231
- "\u3042", # U+3042 HIRAGANA LETTER A
232
- "\u3044", # U+3044 HIRAGANA LETTER I
233
- "\u3046", # U+3046 HIRAGANA LETTER U
234
- ]
235
- table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
236
- encoding = "cp932"
237
- assert_equal(table,
238
- load_csv((["message"] + messages).join("\n").encode(encoding),
239
- schema: table.schema,
240
- encoding: encoding))
241
- end
242
-
243
- test(":encoding and :compression") do
244
- messages = [
245
- "\u3042", # U+3042 HIRAGANA LETTER A
246
- "\u3044", # U+3044 HIRAGANA LETTER I
247
- "\u3046", # U+3046 HIRAGANA LETTER U
248
- ]
249
- table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
250
- encoding = "cp932"
251
- csv = (["message"] + messages).join("\n").encode(encoding)
252
- assert_equal(table,
253
- load_csv(Zlib::Deflate.deflate(csv),
254
- schema: table.schema,
255
- encoding: encoding,
256
- compression: :gzip))
257
- end
258
-
259
- sub_test_case(":timestamp_parsers") do
260
- test(":iso8601") do
261
- require_glib(2, 58, 0)
262
- data_type = Arrow::TimestampDataType.new(:second,
263
- GLib::TimeZone.new("UTC"))
264
- timestamps = [
265
- Time.iso8601("2024-03-16T23:54:12Z"),
266
- Time.iso8601("2024-03-16T23:54:13Z"),
267
- Time.iso8601("2024-03-16T23:54:14Z"),
268
- ]
269
- values = Arrow::TimestampArray.new(data_type, timestamps)
270
- assert_equal(Arrow::Table.new(value: values),
271
- load_csv(<<-CSV, headers: true, timestamp_parsers: [:iso8601]))
272
- value
273
- #{timestamps[0].iso8601}
274
- #{timestamps[1].iso8601}
275
- #{timestamps[2].iso8601}
276
- CSV
277
- end
278
-
279
- test("String") do
280
- timestamps = [
281
- Time.iso8601("2024-03-16T23:54:12Z"),
282
- Time.iso8601("2024-03-16T23:54:13Z"),
283
- Time.iso8601("2024-03-16T23:54:14Z"),
284
- ]
285
- values = Arrow::TimestampArray.new(:second, timestamps)
286
- format = "%Y-%m-%dT%H:%M:%S"
287
- assert_equal(Arrow::Table.new(value: values).schema,
288
- load_csv(<<-CSV, headers: true, timestamp_parsers: [format]).schema)
289
- value
290
- #{timestamps[0].iso8601.chomp("Z")}
291
- #{timestamps[1].iso8601.chomp("Z")}
292
- #{timestamps[2].iso8601.chomp("Z")}
293
- CSV
294
- end
295
- end
296
- end
297
- end