red-arrow 18.1.0 → 19.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
data/test/test-buffer.rb DELETED
@@ -1,49 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class BufferTest < Test::Unit::TestCase
19
- sub_test_case(".new") do
20
- test("GC") do
21
- data = "Hello"
22
- data_id = data.object_id
23
- _buffer = Arrow::Buffer.new(data)
24
- data = nil
25
- GC.start
26
- assert_equal("Hello", ObjectSpace._id2ref(data_id))
27
- end
28
- end
29
-
30
- sub_test_case("instance methods") do
31
- def setup
32
- @buffer = Arrow::Buffer.new("Hello")
33
- end
34
-
35
- sub_test_case("#==") do
36
- test("Arrow::Buffer") do
37
- assert do
38
- @buffer == @buffer
39
- end
40
- end
41
-
42
- test("not Arrow::Buffer") do
43
- assert do
44
- not (@buffer == 29)
45
- end
46
- end
47
- end
48
- end
49
- end
@@ -1,198 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class ChunkedArrayTest < Test::Unit::TestCase
19
- test("#each") do
20
- arrays = [
21
- Arrow::BooleanArray.new([true, false]),
22
- Arrow::BooleanArray.new([nil, true]),
23
- ]
24
- chunked_array = Arrow::ChunkedArray.new(arrays)
25
- assert_equal([true, false, nil, true],
26
- chunked_array.to_a)
27
- end
28
-
29
- sub_test_case("#pack") do
30
- test("basic array") do
31
- arrays = [
32
- Arrow::BooleanArray.new([true, false]),
33
- Arrow::BooleanArray.new([nil, true]),
34
- ]
35
- chunked_array = Arrow::ChunkedArray.new(arrays)
36
- packed_chunked_array = chunked_array.pack
37
- assert_equal([
38
- Arrow::BooleanArray,
39
- [true, false, nil, true],
40
- ],
41
- [
42
- packed_chunked_array.class,
43
- packed_chunked_array.to_a,
44
- ])
45
- end
46
-
47
- test("TimestampArray") do
48
- type = Arrow::TimestampDataType.new(:nano)
49
- arrays = [
50
- Arrow::TimestampArrayBuilder.new(type).build([Time.at(0)]),
51
- Arrow::TimestampArrayBuilder.new(type).build([Time.at(1)]),
52
- ]
53
- chunked_array = Arrow::ChunkedArray.new(arrays)
54
- packed_chunked_array = chunked_array.pack
55
- assert_equal([
56
- Arrow::TimestampArray,
57
- [Time.at(0), Time.at(1)],
58
- ],
59
- [
60
- packed_chunked_array.class,
61
- packed_chunked_array.to_a,
62
- ])
63
- end
64
- end
65
-
66
- sub_test_case("#==") do
67
- def setup
68
- arrays = [
69
- Arrow::BooleanArray.new([true]),
70
- Arrow::BooleanArray.new([false, true]),
71
- ]
72
- @chunked_array = Arrow::ChunkedArray.new(arrays)
73
- end
74
-
75
- test("Arrow::ChunkedArray") do
76
- assert do
77
- @chunked_array == @chunked_array
78
- end
79
- end
80
-
81
- test("not Arrow::ChunkedArray") do
82
- assert do
83
- not (@chunked_array == 29)
84
- end
85
- end
86
- end
87
-
88
- sub_test_case("#filter") do
89
- def setup
90
- arrays = [
91
- Arrow::BooleanArray.new([false, true]),
92
- Arrow::BooleanArray.new([false, true, false]),
93
- ]
94
- @chunked_array = Arrow::ChunkedArray.new(arrays)
95
- @options = Arrow::FilterOptions.new
96
- @options.null_selection_behavior = :emit_null
97
- end
98
-
99
- test("Array: boolean") do
100
- filter = [nil, true, true, false, true]
101
- chunks = [
102
- Arrow::BooleanArray.new([nil, true]),
103
- Arrow::BooleanArray.new([false, false]),
104
- ]
105
- filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
106
- assert_equal(filtered_chunked_array,
107
- @chunked_array.filter(filter, @options))
108
- end
109
-
110
- test("Arrow::BooleanArray") do
111
- filter = Arrow::BooleanArray.new([nil, true, true, false, true])
112
- chunks = [
113
- Arrow::BooleanArray.new([nil, true]),
114
- Arrow::BooleanArray.new([false, false]),
115
- ]
116
- filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
117
- assert_equal(filtered_chunked_array,
118
- @chunked_array.filter(filter, @options))
119
- end
120
-
121
- test("Arrow::ChunkedArray") do
122
- chunks = [
123
- Arrow::BooleanArray.new([nil, true]),
124
- Arrow::BooleanArray.new([true, false, true]),
125
- ]
126
- filter = Arrow::ChunkedArray.new(chunks)
127
- filtered_chunks = [
128
- Arrow::BooleanArray.new([nil, true]),
129
- Arrow::BooleanArray.new([false, false]),
130
- ]
131
- filtered_chunked_array = Arrow::ChunkedArray.new(filtered_chunks)
132
- assert_equal(filtered_chunked_array,
133
- @chunked_array.filter(filter, @options))
134
- end
135
- end
136
-
137
- sub_test_case("#take") do
138
- def setup
139
- chunks = [
140
- Arrow::Int16Array.new([1, 0]),
141
- Arrow::Int16Array.new([2]),
142
- ]
143
- @chunked_array = Arrow::ChunkedArray.new(chunks)
144
- end
145
-
146
- test("Arrow: boolean") do
147
- chunks = [
148
- Arrow::Int16Array.new([0, 1]),
149
- Arrow::Int16Array.new([2])
150
- ]
151
- taken_chunked_array = Arrow::ChunkedArray.new(chunks)
152
- indices = [1, 0, 2]
153
- assert_equal(taken_chunked_array,
154
- @chunked_array.take(indices))
155
- end
156
-
157
- test("Arrow::Array") do
158
- chunks = [
159
- Arrow::Int16Array.new([0, 1]),
160
- Arrow::Int16Array.new([2])
161
- ]
162
- taken_chunked_array = Arrow::ChunkedArray.new(chunks)
163
- indices = Arrow::Int16Array.new([1, 0, 2])
164
- assert_equal(taken_chunked_array,
165
- @chunked_array.take(indices))
166
- end
167
-
168
- test("Arrow::ChunkedArray") do
169
- taken_chunks = [
170
- Arrow::Int16Array.new([0, 1]),
171
- Arrow::Int16Array.new([2])
172
- ]
173
- taken_chunked_array = Arrow::ChunkedArray.new(taken_chunks)
174
- indices_chunks = [
175
- Arrow::Int16Array.new([1, 0]),
176
- Arrow::Int16Array.new([2])
177
- ]
178
- indices = Arrow::ChunkedArray.new(indices_chunks)
179
- assert_equal(taken_chunked_array,
180
- @chunked_array.take(indices))
181
- end
182
- end
183
-
184
- test("#cast") do
185
- chunked_array = Arrow::ChunkedArray.new([[1, nil, 3]])
186
- assert_equal(Arrow::ChunkedArray.new([["1", nil, "3"]]),
187
- chunked_array.cast(:string))
188
- end
189
-
190
- test("#index") do
191
- arrays = [
192
- Arrow::Int32Array.new([1, 2]),
193
- Arrow::Int32Array.new([3, 4, 5]),
194
- ]
195
- chunked_array = Arrow::ChunkedArray.new(arrays)
196
- assert_equal(2, chunked_array.index(3))
197
- end
198
- end
data/test/test-column.rb DELETED
@@ -1,123 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class ColumnTest < Test::Unit::TestCase
19
- def setup
20
- table = Arrow::Table.new("visible" => [true, nil, false])
21
- @column = table.visible
22
- end
23
-
24
- test("#name") do
25
- assert_equal("visible", @column.name)
26
- end
27
-
28
- test("#data_type") do
29
- assert_equal(Arrow::BooleanDataType.new, @column.data_type)
30
- end
31
-
32
- test("#null?") do
33
- assert do
34
- @column.null?(1)
35
- end
36
- end
37
-
38
- test("#valid?") do
39
- assert do
40
- @column.valid?(0)
41
- end
42
- end
43
-
44
- test("#each") do
45
- assert_equal([true, nil, false], @column.each.to_a)
46
- end
47
-
48
- test("#reverse_each") do
49
- assert_equal([false, nil, true], @column.reverse_each.to_a)
50
- end
51
-
52
- test("#n_rows") do
53
- assert_equal(3, @column.n_rows)
54
- end
55
-
56
- test("#n_nulls") do
57
- assert_equal(1, @column.n_nulls)
58
- end
59
-
60
- sub_test_case("#==") do
61
- test("same value") do
62
- table1 = Arrow::Table.new("visible" => [true, false])
63
- table2 = Arrow::Table.new("visible" => [true, false])
64
- assert do
65
- table1.visible == table2.visible
66
- end
67
- end
68
-
69
- test("different name") do
70
- table1 = Arrow::Table.new("visible" => [true, false])
71
- table2 = Arrow::Table.new("invisible" => [true, false])
72
- assert do
73
- not table1.visible == table2.invisible
74
- end
75
- end
76
-
77
- test("different value") do
78
- table1 = Arrow::Table.new("visible" => [true, false])
79
- table2 = Arrow::Table.new("visible" => [true, true])
80
- assert do
81
- not table1.visible == table2.visible
82
- end
83
- end
84
-
85
- test("not Arrow::Column") do
86
- table = Arrow::Table.new("visible" => [true, false])
87
- assert do
88
- not table.visible == 29
89
- end
90
- end
91
- end
92
-
93
- test("#count") do
94
- table = Arrow::Table.new("revenue" => [1, nil, 3])
95
- assert_equal(2, table["revenue"].count)
96
- end
97
-
98
- test("#min") do
99
- table = Arrow::Table.new("revenue" => [1, 2, 3])
100
- assert_equal(1, table["revenue"].min)
101
- end
102
-
103
- test("#max") do
104
- table = Arrow::Table.new("revenue" => [1, 2, 3])
105
- assert_equal(3, table["revenue"].max)
106
- end
107
-
108
- test("#sum") do
109
- table = Arrow::Table.new("revenue" => [1, 2, 3])
110
- assert_equal(6, table["revenue"].sum)
111
- end
112
-
113
- test("#uniq") do
114
- table = Arrow::Table.new("revenue" => [1, 2, 2])
115
- assert_equal([1, 2], table["revenue"].uniq)
116
- end
117
-
118
- test("#cast") do
119
- table = Arrow::Table.new("revenue" => [1, nil, 3])
120
- assert_equal(Arrow::ChunkedArray.new([["1", nil, "3"]]),
121
- table["revenue"].cast(:string))
122
- end
123
- end
@@ -1,297 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class CSVLoaderTest < Test::Unit::TestCase
19
- include Helper::Fixture
20
- include Helper::Omittable
21
-
22
- def load_csv(input)
23
- Arrow::CSVLoader.load(input, skip_lines: /^#/)
24
- end
25
-
26
- sub_test_case(".load") do
27
- test("String: data: with header") do
28
- data = fixture_path("with-header-float.csv").read
29
- assert_equal(<<-TABLE, load_csv(data).to_s)
30
- name score
31
- (utf8) (double)
32
- 0 alice 10.100000
33
- 1 bob 29.200000
34
- 2 chris -1.300000
35
- TABLE
36
- end
37
-
38
- test("String: data: without header") do
39
- data = fixture_path("without-header-float.csv").read
40
- assert_equal(<<-TABLE, load_csv(data).to_s)
41
- 0 1
42
- (utf8) (double)
43
- 0 alice 10.100000
44
- 1 bob 29.200000
45
- 2 chris -1.300000
46
- TABLE
47
- end
48
-
49
- test("String: path: with header") do
50
- path = fixture_path("with-header-float.csv").to_s
51
- assert_equal(<<-TABLE, load_csv(path).to_s)
52
- name score
53
- (utf8) (double)
54
- 0 alice 10.100000
55
- 1 bob 29.200000
56
- 2 chris -1.300000
57
- TABLE
58
- end
59
-
60
- test("String: path: without header") do
61
- path = fixture_path("without-header-float.csv").to_s
62
- assert_equal(<<-TABLE, load_csv(path).to_s)
63
- 0 1
64
- (utf8) (double)
65
- 0 alice 10.100000
66
- 1 bob 29.200000
67
- 2 chris -1.300000
68
- TABLE
69
- end
70
-
71
- test("Pathname: with header") do
72
- path = fixture_path("with-header-float.csv")
73
- assert_equal(<<-TABLE, load_csv(path).to_s)
74
- name score
75
- (utf8) (double)
76
- 0 alice 10.100000
77
- 1 bob 29.200000
78
- 2 chris -1.300000
79
- TABLE
80
- end
81
-
82
- test("Pathname: without header") do
83
- path = fixture_path("without-header-float.csv")
84
- assert_equal(<<-TABLE, load_csv(path).to_s)
85
- 0 1
86
- (utf8) (double)
87
- 0 alice 10.100000
88
- 1 bob 29.200000
89
- 2 chris -1.300000
90
- TABLE
91
- end
92
-
93
- test("null: with double quote") do
94
- path = fixture_path("null-with-double-quote.csv").to_s
95
- assert_equal(<<-TABLE, load_csv(path).to_s)
96
- name score
97
- (utf8) (int8)
98
- 0 alice 10
99
- 1 bob (null)
100
- 2 chris -1
101
- TABLE
102
- end
103
-
104
- test("null: without double quote") do
105
- path = fixture_path("null-without-double-quote.csv").to_s
106
- assert_equal(<<-TABLE, load_csv(path).to_s)
107
- name score
108
- (utf8) (int8)
109
- 0 alice 10
110
- 1 bob (null)
111
- 2 chris -1
112
- TABLE
113
- end
114
-
115
- test("number: float, integer") do
116
- path = fixture_path("float-integer.csv").to_s
117
- assert_equal([2.9, 10, -1.1],
118
- load_csv(path)[:score].to_a)
119
- end
120
-
121
- test("number: integer, float") do
122
- path = fixture_path("integer-float.csv").to_s
123
- assert_equal([10.0, 2.9, -1.1],
124
- load_csv(path)[:score].to_a)
125
- end
126
- end
127
-
128
- sub_test_case("CSVReader") do
129
- def load_csv(data, **options)
130
- Arrow::CSVLoader.load(data, **options)
131
- end
132
-
133
- sub_test_case(":headers") do
134
- test("true") do
135
- values = Arrow::StringArray.new(["a", "b", "c"])
136
- assert_equal(Arrow::Table.new(value: values),
137
- load_csv(<<-CSV, headers: true))
138
- value
139
- a
140
- b
141
- c
142
- CSV
143
- end
144
-
145
- test(":first_line") do
146
- values = Arrow::StringArray.new(["a", "b", "c"])
147
- assert_equal(Arrow::Table.new(value: values),
148
- load_csv(<<-CSV, headers: :first_line))
149
- value
150
- a
151
- b
152
- c
153
- CSV
154
- end
155
-
156
- test("truthy") do
157
- values = Arrow::StringArray.new(["a", "b", "c"])
158
- assert_equal(Arrow::Table.new(value: values),
159
- load_csv(<<-CSV, headers: 0))
160
- value
161
- a
162
- b
163
- c
164
- CSV
165
- end
166
-
167
- test("Array of column names") do
168
- values = Arrow::StringArray.new(["a", "b", "c"])
169
- assert_equal(Arrow::Table.new(column: values),
170
- load_csv(<<-CSV, headers: ["column"]))
171
- a
172
- b
173
- c
174
- CSV
175
- end
176
-
177
- test("false") do
178
- values = Arrow::StringArray.new(["a", "b", "c"])
179
- assert_equal(Arrow::Table.new(f0: values),
180
- load_csv(<<-CSV, headers: false))
181
- a
182
- b
183
- c
184
- CSV
185
- end
186
-
187
- test("nil") do
188
- values = Arrow::StringArray.new(["a", "b", "c"])
189
- assert_equal(Arrow::Table.new(f0: values),
190
- load_csv(<<-CSV, headers: nil))
191
- a
192
- b
193
- c
194
- CSV
195
- end
196
-
197
- test("string") do
198
- values = Arrow::StringArray.new(["a", "b", "c"])
199
- assert_equal(Arrow::Table.new(column: values),
200
- load_csv(<<-CSV, headers: "column"))
201
- a
202
- b
203
- c
204
- CSV
205
- end
206
- end
207
-
208
- test(":column_types") do
209
- assert_equal(Arrow::Table.new(:count => Arrow::UInt16Array.new([1, 2, 4])),
210
- load_csv(<<-CSV, column_types: {count: :uint16}))
211
- count
212
- 1
213
- 2
214
- 4
215
- CSV
216
- end
217
-
218
- test(":schema") do
219
- table = Arrow::Table.new(:count => Arrow::UInt16Array.new([1, 2, 4]))
220
- assert_equal(table,
221
- load_csv(<<-CSV, schema: table.schema))
222
- count
223
- 1
224
- 2
225
- 4
226
- CSV
227
- end
228
-
229
- test(":encoding") do
230
- messages = [
231
- "\u3042", # U+3042 HIRAGANA LETTER A
232
- "\u3044", # U+3044 HIRAGANA LETTER I
233
- "\u3046", # U+3046 HIRAGANA LETTER U
234
- ]
235
- table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
236
- encoding = "cp932"
237
- assert_equal(table,
238
- load_csv((["message"] + messages).join("\n").encode(encoding),
239
- schema: table.schema,
240
- encoding: encoding))
241
- end
242
-
243
- test(":encoding and :compression") do
244
- messages = [
245
- "\u3042", # U+3042 HIRAGANA LETTER A
246
- "\u3044", # U+3044 HIRAGANA LETTER I
247
- "\u3046", # U+3046 HIRAGANA LETTER U
248
- ]
249
- table = Arrow::Table.new(:message => Arrow::StringArray.new(messages))
250
- encoding = "cp932"
251
- csv = (["message"] + messages).join("\n").encode(encoding)
252
- assert_equal(table,
253
- load_csv(Zlib::Deflate.deflate(csv),
254
- schema: table.schema,
255
- encoding: encoding,
256
- compression: :gzip))
257
- end
258
-
259
- sub_test_case(":timestamp_parsers") do
260
- test(":iso8601") do
261
- require_glib(2, 58, 0)
262
- data_type = Arrow::TimestampDataType.new(:second,
263
- GLib::TimeZone.new("UTC"))
264
- timestamps = [
265
- Time.iso8601("2024-03-16T23:54:12Z"),
266
- Time.iso8601("2024-03-16T23:54:13Z"),
267
- Time.iso8601("2024-03-16T23:54:14Z"),
268
- ]
269
- values = Arrow::TimestampArray.new(data_type, timestamps)
270
- assert_equal(Arrow::Table.new(value: values),
271
- load_csv(<<-CSV, headers: true, timestamp_parsers: [:iso8601]))
272
- value
273
- #{timestamps[0].iso8601}
274
- #{timestamps[1].iso8601}
275
- #{timestamps[2].iso8601}
276
- CSV
277
- end
278
-
279
- test("String") do
280
- timestamps = [
281
- Time.iso8601("2024-03-16T23:54:12Z"),
282
- Time.iso8601("2024-03-16T23:54:13Z"),
283
- Time.iso8601("2024-03-16T23:54:14Z"),
284
- ]
285
- values = Arrow::TimestampArray.new(:second, timestamps)
286
- format = "%Y-%m-%dT%H:%M:%S"
287
- assert_equal(Arrow::Table.new(value: values).schema,
288
- load_csv(<<-CSV, headers: true, timestamp_parsers: [format]).schema)
289
- value
290
- #{timestamps[0].iso8601.chomp("Z")}
291
- #{timestamps[1].iso8601.chomp("Z")}
292
- #{timestamps[2].iso8601.chomp("Z")}
293
- CSV
294
- end
295
- end
296
- end
297
- end