red-arrow 8.0.0 → 24.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +15 -7
  3. data/ext/arrow/arrow.cpp +67 -0
  4. data/ext/arrow/converters.cpp +10 -0
  5. data/ext/arrow/converters.hpp +310 -46
  6. data/ext/arrow/extconf.rb +41 -22
  7. data/ext/arrow/raw-records.cpp +165 -2
  8. data/ext/arrow/red-arrow.hpp +2 -0
  9. data/ext/arrow/values.cpp +6 -2
  10. data/lib/arrow/array-builder.rb +89 -14
  11. data/{test/test-time32-data-type.rb → lib/arrow/array-computable.rb} +24 -16
  12. data/{test/test-buffer.rb → lib/arrow/array-statistics.rb} +19 -24
  13. data/lib/arrow/array.rb +40 -4
  14. data/lib/arrow/chunked-array.rb +56 -1
  15. data/lib/arrow/column-containable.rb +9 -0
  16. data/lib/arrow/column.rb +49 -4
  17. data/{test/test-tensor.rb → lib/arrow/csv-write-options.rb} +28 -31
  18. data/lib/arrow/data-type.rb +17 -3
  19. data/lib/arrow/decimal128-array-builder.rb +16 -6
  20. data/lib/arrow/decimal128.rb +14 -0
  21. data/lib/arrow/decimal256-array-builder.rb +16 -6
  22. data/lib/arrow/decimal256.rb +14 -0
  23. data/{test/test-float-scalar.rb → lib/arrow/dense-union-array-builder.rb} +27 -24
  24. data/{test/test-boolean-scalar.rb → lib/arrow/dense-union-array.rb} +7 -7
  25. data/lib/arrow/duration-array-builder.rb +27 -0
  26. data/lib/arrow/duration-array.rb +24 -0
  27. data/lib/arrow/duration-data-type.rb +32 -0
  28. data/lib/arrow/expression.rb +6 -2
  29. data/lib/arrow/field-containable.rb +1 -1
  30. data/lib/arrow/field.rb +44 -3
  31. data/lib/arrow/fixed-size-list-array-builder.rb +29 -0
  32. data/lib/arrow/fixed-size-list-data-type.rb +118 -0
  33. data/lib/arrow/function.rb +0 -1
  34. data/lib/arrow/half-float-array-builder.rb +32 -0
  35. data/lib/arrow/half-float-array.rb +24 -0
  36. data/lib/arrow/half-float.rb +118 -0
  37. data/{test/helper/fixture.rb → lib/arrow/input-referable.rb} +7 -6
  38. data/lib/arrow/jruby/array-builder.rb +114 -0
  39. data/lib/arrow/jruby/array.rb +109 -0
  40. data/lib/arrow/jruby/chunked-array.rb +36 -0
  41. data/lib/arrow/jruby/compression-type.rb +26 -0
  42. data/lib/arrow/jruby/csv-read-options.rb +32 -0
  43. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  44. data/lib/arrow/jruby/decimal128.rb +28 -0
  45. data/lib/arrow/jruby/decimal256.rb +28 -0
  46. data/{test/fixture/float-integer.csv → lib/arrow/jruby/error.rb} +7 -4
  47. data/lib/arrow/jruby/file-system.rb +24 -0
  48. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  49. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  50. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  51. data/{test/fixture/integer-float.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  52. data/lib/arrow/jruby/sort-options.rb +24 -0
  53. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  54. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  55. data/lib/arrow/jruby/writable.rb +24 -0
  56. data/lib/arrow/jruby.rb +52 -0
  57. data/{test/test-date32-array.rb → lib/arrow/large-list-array-builder.rb} +10 -5
  58. data/lib/arrow/large-list-data-type.rb +83 -0
  59. data/lib/arrow/libraries.rb +140 -0
  60. data/lib/arrow/list-array-builder.rb +1 -68
  61. data/lib/arrow/list-data-type.rb +3 -38
  62. data/{test/test-dictionary-array.rb → lib/arrow/list-field-resolvable.rb} +26 -17
  63. data/lib/arrow/list-slice-options.rb +76 -0
  64. data/lib/arrow/list-values-appendable.rb +88 -0
  65. data/lib/arrow/loader.rb +15 -96
  66. data/{test/test-decimal128-array.rb → lib/arrow/make-struct-options.rb} +18 -18
  67. data/lib/arrow/raw-table-converter.rb +10 -3
  68. data/lib/arrow/raw-tensor-converter.rb +89 -0
  69. data/lib/arrow/record-batch-file-reader.rb +2 -0
  70. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  71. data/lib/arrow/record-batch.rb +6 -2
  72. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +5 -4
  73. data/lib/arrow/scalar.rb +67 -0
  74. data/lib/arrow/slicer.rb +61 -0
  75. data/lib/arrow/sort-key.rb +3 -3
  76. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  77. data/lib/arrow/sparse-union-array.rb +26 -0
  78. data/lib/arrow/stream-decoder.rb +29 -0
  79. data/{test/test-decimal256-data-type.rb → lib/arrow/stream-listener.rb} +25 -9
  80. data/lib/arrow/string-array-builder.rb +30 -0
  81. data/lib/arrow/struct-array-builder.rb +0 -5
  82. data/lib/arrow/table-formatter.rb +38 -8
  83. data/lib/arrow/table-list-formatter.rb +3 -3
  84. data/lib/arrow/table-loader.rb +11 -5
  85. data/lib/arrow/table-saver.rb +4 -3
  86. data/lib/arrow/table-table-formatter.rb +7 -0
  87. data/lib/arrow/table.rb +180 -33
  88. data/lib/arrow/tensor.rb +144 -0
  89. data/lib/arrow/time-unit.rb +31 -0
  90. data/lib/arrow/time32-array-builder.rb +2 -14
  91. data/lib/arrow/time32-data-type.rb +9 -38
  92. data/lib/arrow/time64-array-builder.rb +2 -14
  93. data/lib/arrow/time64-data-type.rb +9 -38
  94. data/lib/arrow/timestamp-array-builder.rb +3 -15
  95. data/lib/arrow/timestamp-data-type.rb +9 -34
  96. data/{test/test-date64-array.rb → lib/arrow/timestamp-parser.rb} +14 -6
  97. data/lib/arrow/union-array-builder.rb +59 -0
  98. data/lib/arrow/union-array.rb +26 -0
  99. data/lib/arrow/version.rb +1 -1
  100. data/lib/arrow.rb +2 -7
  101. data/red-arrow.gemspec +74 -11
  102. metadata +85 -210
  103. data/test/fixture/TestOrcFile.test1.orc +0 -0
  104. data/test/fixture/with-header-float.csv +0 -20
  105. data/test/fixture/with-header.csv +0 -20
  106. data/test/fixture/without-header-float.csv +0 -19
  107. data/test/fixture/without-header.csv +0 -19
  108. data/test/helper/omittable.rb +0 -36
  109. data/test/helper.rb +0 -30
  110. data/test/raw-records/test-basic-arrays.rb +0 -395
  111. data/test/raw-records/test-dense-union-array.rb +0 -521
  112. data/test/raw-records/test-list-array.rb +0 -610
  113. data/test/raw-records/test-map-array.rb +0 -478
  114. data/test/raw-records/test-multiple-columns.rb +0 -65
  115. data/test/raw-records/test-sparse-union-array.rb +0 -511
  116. data/test/raw-records/test-struct-array.rb +0 -515
  117. data/test/raw-records/test-table.rb +0 -47
  118. data/test/run-test.rb +0 -71
  119. data/test/test-array-builder.rb +0 -136
  120. data/test/test-array.rb +0 -325
  121. data/test/test-bigdecimal.rb +0 -40
  122. data/test/test-binary-dictionary-array-builder.rb +0 -103
  123. data/test/test-chunked-array.rb +0 -183
  124. data/test/test-column.rb +0 -92
  125. data/test/test-csv-loader.rb +0 -250
  126. data/test/test-data-type.rb +0 -83
  127. data/test/test-decimal128-array-builder.rb +0 -112
  128. data/test/test-decimal128-data-type.rb +0 -31
  129. data/test/test-decimal128.rb +0 -102
  130. data/test/test-decimal256-array-builder.rb +0 -112
  131. data/test/test-decimal256-array.rb +0 -38
  132. data/test/test-decimal256.rb +0 -102
  133. data/test/test-dense-union-data-type.rb +0 -41
  134. data/test/test-dictionary-data-type.rb +0 -40
  135. data/test/test-expression.rb +0 -40
  136. data/test/test-feather.rb +0 -49
  137. data/test/test-field.rb +0 -91
  138. data/test/test-file-output-stream.rb +0 -54
  139. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  140. data/test/test-fixed-size-binary-array.rb +0 -36
  141. data/test/test-function.rb +0 -210
  142. data/test/test-group.rb +0 -180
  143. data/test/test-list-array-builder.rb +0 -79
  144. data/test/test-list-array.rb +0 -32
  145. data/test/test-list-data-type.rb +0 -69
  146. data/test/test-map-array-builder.rb +0 -110
  147. data/test/test-map-array.rb +0 -33
  148. data/test/test-memory-view.rb +0 -434
  149. data/test/test-orc.rb +0 -173
  150. data/test/test-record-batch-builder.rb +0 -125
  151. data/test/test-record-batch-file-reader.rb +0 -115
  152. data/test/test-record-batch-iterator.rb +0 -37
  153. data/test/test-record-batch-reader.rb +0 -46
  154. data/test/test-record-batch.rb +0 -182
  155. data/test/test-schema.rb +0 -134
  156. data/test/test-slicer.rb +0 -487
  157. data/test/test-sort-indices.rb +0 -40
  158. data/test/test-sort-key.rb +0 -81
  159. data/test/test-sort-options.rb +0 -58
  160. data/test/test-sparse-union-data-type.rb +0 -41
  161. data/test/test-string-dictionary-array-builder.rb +0 -103
  162. data/test/test-struct-array-builder.rb +0 -184
  163. data/test/test-struct-array.rb +0 -94
  164. data/test/test-struct-data-type.rb +0 -112
  165. data/test/test-table.rb +0 -1123
  166. data/test/test-time.rb +0 -288
  167. data/test/test-time32-array.rb +0 -81
  168. data/test/test-time64-array.rb +0 -81
  169. data/test/test-time64-data-type.rb +0 -42
  170. data/test/test-timestamp-array.rb +0 -45
  171. data/test/test-timestamp-data-type.rb +0 -42
  172. data/test/values/test-basic-arrays.rb +0 -325
  173. data/test/values/test-dense-union-array.rb +0 -509
  174. data/test/values/test-dictionary-array.rb +0 -295
  175. data/test/values/test-list-array.rb +0 -571
  176. data/test/values/test-map-array.rb +0 -466
  177. data/test/values/test-sparse-union-array.rb +0 -500
  178. data/test/values/test-struct-array.rb +0 -512
data/test/test-table.rb DELETED
@@ -1,1123 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class TableTest < Test::Unit::TestCase
19
- include Helper::Fixture
20
-
21
- def setup
22
- @count_field = Arrow::Field.new("count", :uint8)
23
- @visible_field = Arrow::Field.new("visible", :boolean)
24
- schema = Arrow::Schema.new([@count_field, @visible_field])
25
- count_arrays = [
26
- Arrow::UInt8Array.new([1, 2]),
27
- Arrow::UInt8Array.new([4, 8, 16]),
28
- Arrow::UInt8Array.new([32, 64]),
29
- Arrow::UInt8Array.new([128]),
30
- ]
31
- visible_arrays = [
32
- Arrow::BooleanArray.new([true, false, nil]),
33
- Arrow::BooleanArray.new([true]),
34
- Arrow::BooleanArray.new([true, false]),
35
- Arrow::BooleanArray.new([nil]),
36
- Arrow::BooleanArray.new([nil]),
37
- ]
38
- @count_array = Arrow::ChunkedArray.new(count_arrays)
39
- @visible_array = Arrow::ChunkedArray.new(visible_arrays)
40
- @table = Arrow::Table.new(schema, [@count_array, @visible_array])
41
- end
42
-
43
- test("#columns") do
44
- assert_equal([
45
- Arrow::Column.new(@table, 0),
46
- Arrow::Column.new(@table, 1),
47
- ],
48
- @table.columns)
49
- end
50
-
51
- sub_test_case("#slice") do
52
- test("Arrow::BooleanArray") do
53
- target_rows_raw = [nil, true, true, false, true, false, true, true]
54
- target_rows = Arrow::BooleanArray.new(target_rows_raw)
55
- assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
56
- count visible
57
- 0 (null) (null)
58
- 1 2 false
59
- 2 4 (null)
60
- 3 16 true
61
- 4 64 (null)
62
- 5 128 (null)
63
- TABLE
64
- end
65
-
66
- test("Array: boolean") do
67
- target_rows_raw = [nil, true, true, false, true, false, true, true]
68
- assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
69
- count visible
70
- 0 (null) (null)
71
- 1 2 false
72
- 2 4 (null)
73
- 3 16 true
74
- 4 64 (null)
75
- 5 128 (null)
76
- TABLE
77
- end
78
-
79
- test("Integer: positive") do
80
- assert_equal({"count" => 128, "visible" => nil},
81
- @table.slice(@table.n_rows - 1).to_h)
82
- end
83
-
84
- test("Integer: negative") do
85
- assert_equal({"count" => 1, "visible" => true},
86
- @table.slice(-@table.n_rows).to_h)
87
- end
88
-
89
- test("Integer: out of index") do
90
- assert_equal([
91
- nil,
92
- nil,
93
- ],
94
- [
95
- @table.slice(@table.n_rows),
96
- @table.slice(-(@table.n_rows + 1)),
97
- ])
98
- end
99
-
100
- test("Range: positive: include end") do
101
- assert_equal(<<-TABLE, @table.slice(2..4).to_s)
102
- count visible
103
- 0 4 (null)
104
- 1 8 true
105
- 2 16 true
106
- TABLE
107
- end
108
-
109
- test("Range: positive: exclude end") do
110
- assert_equal(<<-TABLE, @table.slice(2...4).to_s)
111
- count visible
112
- 0 4 (null)
113
- 1 8 true
114
- TABLE
115
- end
116
-
117
- test("Range: negative: include end") do
118
- assert_equal(<<-TABLE, @table.slice(-4..-2).to_s)
119
- count visible
120
- 0 16 true
121
- 1 32 false
122
- 2 64 (null)
123
- TABLE
124
- end
125
-
126
- test("Range: negative: exclude end") do
127
- assert_equal(<<-TABLE, @table.slice(-4...-2).to_s)
128
- count visible
129
- 0 16 true
130
- 1 32 false
131
- TABLE
132
- end
133
-
134
- test("[from, to]: positive") do
135
- assert_equal(<<-TABLE, @table.slice(0, 2).to_s)
136
- count visible
137
- 0 1 true
138
- 1 2 false
139
- TABLE
140
- end
141
-
142
- test("[from, to]: negative") do
143
- assert_equal(<<-TABLE, @table.slice(-4, 2).to_s)
144
- count visible
145
- 0 16 true
146
- 1 32 false
147
- TABLE
148
- end
149
-
150
- test("{key: Number}") do
151
- assert_equal(<<-TABLE, @table.slice(count: 16).to_s)
152
- count visible
153
- 0 16 true
154
- TABLE
155
- end
156
-
157
- test("{key: String}") do
158
- table = Arrow::Table.new(name: Arrow::StringArray.new(["a", "b", "c"]))
159
- assert_equal(<<-TABLE, table.slice(name: 'b').to_s)
160
- name
161
- 0 b
162
- TABLE
163
- end
164
-
165
- test("{key: true}") do
166
- assert_equal(<<-TABLE, @table.slice(visible: true).to_s)
167
- count visible
168
- 0 1 true
169
- 1 (null) (null)
170
- 2 8 true
171
- 3 16 true
172
- 4 (null) (null)
173
- 5 (null) (null)
174
- TABLE
175
- end
176
-
177
- test("{key: false}") do
178
- assert_equal(<<-TABLE, @table.slice(visible: false).to_s)
179
- count visible
180
- 0 2 false
181
- 1 (null) (null)
182
- 2 32 false
183
- 3 (null) (null)
184
- 4 (null) (null)
185
- TABLE
186
- end
187
-
188
- test("{key: Range}: beginless include end") do
189
- begin
190
- range = eval("..8")
191
- rescue SyntaxError
192
- omit("beginless range isn't supported")
193
- end
194
- assert_equal(<<-TABLE, @table.slice(count: range).to_s)
195
- count visible
196
- 0 1 true
197
- 1 2 false
198
- 2 4 (null)
199
- 3 8 true
200
- TABLE
201
- end
202
-
203
- test("{key: Range}: beginless exclude end") do
204
- begin
205
- range = eval("...8")
206
- rescue SyntaxError
207
- omit("beginless range isn't supported")
208
- end
209
- assert_equal(<<-TABLE, @table.slice(count: range).to_s)
210
- count visible
211
- 0 1 true
212
- 1 2 false
213
- 2 4 (null)
214
- TABLE
215
- end
216
-
217
- test("{key: Range}: endless") do
218
- begin
219
- range = eval("16..")
220
- rescue SyntaxError
221
- omit("endless range isn't supported")
222
- end
223
- assert_equal(<<-TABLE, @table.slice(count: range).to_s)
224
- count visible
225
- 0 16 true
226
- 1 32 false
227
- 2 64 (null)
228
- 3 128 (null)
229
- TABLE
230
- end
231
-
232
- test("{key: Range}: include end") do
233
- assert_equal(<<-TABLE, @table.slice(count: 1..16).to_s)
234
- count visible
235
- 0 1 true
236
- 1 2 false
237
- 2 4 (null)
238
- 3 8 true
239
- 4 16 true
240
- TABLE
241
- end
242
-
243
- test("{key: Range}: exclude end") do
244
- assert_equal(<<-TABLE, @table.slice(count: 1...16).to_s)
245
- count visible
246
- 0 1 true
247
- 1 2 false
248
- 2 4 (null)
249
- 3 8 true
250
- TABLE
251
- end
252
-
253
- test("{key1: Range, key2: true}") do
254
- assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s)
255
- count visible
256
- 0 2 false
257
- 1 (null) (null)
258
- 2 (null) (null)
259
- 3 (null) (null)
260
- TABLE
261
- end
262
-
263
- sub_test_case("wrong argument") do
264
- test("no arguments") do
265
- message = "wrong number of arguments (given 0, expected 1..2)"
266
- assert_raise(ArgumentError.new(message)) do
267
- @table.slice
268
- end
269
- end
270
-
271
- test("too many arguments") do
272
- message = "wrong number of arguments (given 3, expected 1..2)"
273
- assert_raise(ArgumentError.new(message)) do
274
- @table.slice(1, 2, 3)
275
- end
276
- end
277
-
278
- test("arguments: with block") do
279
- message = "must not specify both arguments and block"
280
- assert_raise(ArgumentError.new(message)) do
281
- @table.slice(1, 2) {}
282
- end
283
- end
284
-
285
- test("offset: too small") do
286
- n_rows = @table.n_rows
287
- offset = -(n_rows + 1)
288
- message = "offset is out of range (-#{n_rows + 1},#{n_rows}): #{offset}"
289
- assert_raise(ArgumentError.new(message)) do
290
- @table.slice(offset, 1)
291
- end
292
- end
293
-
294
- test("offset: too large") do
295
- n_rows = @table.n_rows
296
- offset = n_rows
297
- message = "offset is out of range (-#{n_rows + 1},#{n_rows}): #{offset}"
298
- assert_raise(ArgumentError.new(message)) do
299
- @table.slice(offset, 1)
300
- end
301
- end
302
- end
303
- end
304
-
305
- sub_test_case("#[]") do
306
- def setup
307
- @table = Arrow::Table.new(a: [true],
308
- b: [true],
309
- c: [true],
310
- d: [true],
311
- e: [true],
312
- f: [true],
313
- g: [true])
314
- end
315
-
316
- test("[String]") do
317
- assert_equal(Arrow::Column.new(@table, 0),
318
- @table["a"])
319
- end
320
-
321
- test("[Symbol]") do
322
- assert_equal(Arrow::Column.new(@table, 1),
323
- @table[:b])
324
- end
325
-
326
- test("[Integer]") do
327
- assert_equal(Arrow::Column.new(@table, 6),
328
- @table[-1])
329
- end
330
-
331
- test("[Range]") do
332
- assert_equal(Arrow::Table.new(d: [true],
333
- e: [true]),
334
- @table[3..4])
335
- end
336
-
337
- test("[[Symbol, String, Integer, Range]]") do
338
- assert_equal(Arrow::Table.new(c: [true],
339
- a: [true],
340
- g: [true],
341
- d: [true],
342
- e: [true]),
343
- @table[[:c, "a", -1, 3..4]])
344
- end
345
- end
346
-
347
- sub_test_case("#merge") do
348
- sub_test_case("Hash") do
349
- test("add") do
350
- name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"])
351
- assert_equal(<<-TABLE, @table.merge(:name => name_array).to_s)
352
- count visible name
353
- 0 1 true a
354
- 1 2 false b
355
- 2 4 (null) c
356
- 3 8 true d
357
- 4 16 true e
358
- 5 32 false f
359
- 6 64 (null) g
360
- 7 128 (null) h
361
- TABLE
362
- end
363
-
364
- test("remove") do
365
- assert_equal(<<-TABLE, @table.merge(:visible => nil).to_s)
366
- count
367
- 0 1
368
- 1 2
369
- 2 4
370
- 3 8
371
- 4 16
372
- 5 32
373
- 6 64
374
- 7 128
375
- TABLE
376
- end
377
-
378
- test("replace") do
379
- visible_array = Arrow::Int32Array.new([1] * @visible_array.length)
380
- assert_equal(<<-TABLE, @table.merge(:visible => visible_array).to_s)
381
- count visible
382
- 0 1 1
383
- 1 2 1
384
- 2 4 1
385
- 3 8 1
386
- 4 16 1
387
- 5 32 1
388
- 6 64 1
389
- 7 128 1
390
- TABLE
391
- end
392
- end
393
-
394
- sub_test_case("Arrow::Table") do
395
- test("add") do
396
- name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"])
397
- table = Arrow::Table.new("name" => name_array)
398
- assert_equal(<<-TABLE, @table.merge(table).to_s)
399
- count visible name
400
- 0 1 true a
401
- 1 2 false b
402
- 2 4 (null) c
403
- 3 8 true d
404
- 4 16 true e
405
- 5 32 false f
406
- 6 64 (null) g
407
- 7 128 (null) h
408
- TABLE
409
- end
410
-
411
- test("replace") do
412
- visible_array = Arrow::Int32Array.new([1] * @visible_array.length)
413
- table = Arrow::Table.new("visible" => visible_array)
414
- assert_equal(<<-TABLE, @table.merge(table).to_s)
415
- count visible
416
- 0 1 1
417
- 1 2 1
418
- 2 4 1
419
- 3 8 1
420
- 4 16 1
421
- 5 32 1
422
- 6 64 1
423
- 7 128 1
424
- TABLE
425
- end
426
- end
427
- end
428
-
429
- test("column name getter") do
430
- assert_equal(Arrow::Column.new(@table, 1),
431
- @table.visible)
432
- end
433
-
434
- sub_test_case("#remove_column") do
435
- test("String") do
436
- assert_equal(<<-TABLE, @table.remove_column("visible").to_s)
437
- count
438
- 0 1
439
- 1 2
440
- 2 4
441
- 3 8
442
- 4 16
443
- 5 32
444
- 6 64
445
- 7 128
446
- TABLE
447
- end
448
-
449
- test("Symbol") do
450
- assert_equal(<<-TABLE, @table.remove_column(:visible).to_s)
451
- count
452
- 0 1
453
- 1 2
454
- 2 4
455
- 3 8
456
- 4 16
457
- 5 32
458
- 6 64
459
- 7 128
460
- TABLE
461
- end
462
-
463
- test("unknown column name") do
464
- assert_raise(KeyError) do
465
- @table.remove_column(:nonexistent)
466
- end
467
- end
468
-
469
- test("Integer") do
470
- assert_equal(<<-TABLE, @table.remove_column(1).to_s)
471
- count
472
- 0 1
473
- 1 2
474
- 2 4
475
- 3 8
476
- 4 16
477
- 5 32
478
- 6 64
479
- 7 128
480
- TABLE
481
- end
482
-
483
- test("negative integer") do
484
- assert_equal(<<-TABLE, @table.remove_column(-1).to_s)
485
- count
486
- 0 1
487
- 1 2
488
- 2 4
489
- 3 8
490
- 4 16
491
- 5 32
492
- 6 64
493
- 7 128
494
- TABLE
495
- end
496
-
497
- test("too small index") do
498
- assert_raise(IndexError) do
499
- @table.remove_column(-3)
500
- end
501
- end
502
-
503
- test("too large index") do
504
- assert_raise(IndexError) do
505
- @table.remove_column(2)
506
- end
507
- end
508
- end
509
-
510
- sub_test_case("#select_columns") do
511
- def setup
512
- raw_table = {
513
- :a => Arrow::UInt8Array.new([1]),
514
- :b => Arrow::UInt8Array.new([1]),
515
- :c => Arrow::UInt8Array.new([1]),
516
- :d => Arrow::UInt8Array.new([1]),
517
- :e => Arrow::UInt8Array.new([1]),
518
- }
519
- @table = Arrow::Table.new(raw_table)
520
- end
521
-
522
- test("names") do
523
- assert_equal(<<-TABLE, @table.select_columns(:c, :a).to_s)
524
- c a
525
- 0 1 1
526
- TABLE
527
- end
528
-
529
- test("range") do
530
- assert_equal(<<-TABLE, @table.select_columns(2...4).to_s)
531
- c d
532
- 0 1 1
533
- TABLE
534
- end
535
-
536
- test("indexes") do
537
- assert_equal(<<-TABLE, @table.select_columns(0, -1, 2).to_s)
538
- a e c
539
- 0 1 1 1
540
- TABLE
541
- end
542
-
543
- test("mixed") do
544
- assert_equal(<<-TABLE, @table.select_columns(:a, -1, 2..3).to_s)
545
- a e c d
546
- 0 1 1 1 1
547
- TABLE
548
- end
549
-
550
- test("block") do
551
- selected_table = @table.select_columns.with_index do |column, i|
552
- column.name == "a" or i.odd?
553
- end
554
- assert_equal(<<-TABLE, selected_table.to_s)
555
- a b d
556
- 0 1 1 1
557
- TABLE
558
- end
559
-
560
- test("names, indexes and block") do
561
- selected_table = @table.select_columns(:a, -1) do |column|
562
- column.name == "a"
563
- end
564
- assert_equal(<<-TABLE, selected_table.to_s)
565
- a
566
- 0 1
567
- TABLE
568
- end
569
- end
570
-
571
- sub_test_case("#save and .load") do
572
- module SaveLoadFormatTests
573
- def test_default
574
- output = create_output(".arrow")
575
- @table.save(output)
576
- assert_equal(@table, Arrow::Table.load(output))
577
- end
578
-
579
- def test_arrow_file
580
- output = create_output(".arrow")
581
- @table.save(output, format: :arrow_file)
582
- assert_equal(@table, Arrow::Table.load(output, format: :arrow_file))
583
- end
584
-
585
- def test_batch
586
- output = create_output(".arrow")
587
- @table.save(output, format: :batch)
588
- assert_equal(@table, Arrow::Table.load(output, format: :batch))
589
- end
590
-
591
- def test_arrows
592
- output = create_output(".arrows")
593
- @table.save(output, format: :arrows)
594
- assert_equal(@table, Arrow::Table.load(output, format: :arrows))
595
- end
596
-
597
- def test_arrow_streaming
598
- output = create_output(".arrows")
599
- @table.save(output, format: :arrow_streaming)
600
- assert_equal(@table, Arrow::Table.load(output, format: :arrow_streaming))
601
- end
602
-
603
- def test_stream
604
- output = create_output(".arrows")
605
- @table.save(output, format: :stream)
606
- assert_equal(@table, Arrow::Table.load(output, format: :stream))
607
- end
608
-
609
- def test_csv
610
- output = create_output(".csv")
611
- @table.save(output, format: :csv)
612
- assert_equal(@table,
613
- Arrow::Table.load(output,
614
- format: :csv,
615
- schema: @table.schema))
616
- end
617
-
618
- def test_csv_gz
619
- output = create_output(".csv.gz")
620
- @table.save(output,
621
- format: :csv,
622
- compression: :gzip)
623
- assert_equal(@table,
624
- Arrow::Table.load(output,
625
- format: :csv,
626
- compression: :gzip,
627
- schema: @table.schema))
628
- end
629
-
630
- def test_tsv
631
- output = create_output(".tsv")
632
- @table.save(output, format: :tsv)
633
- assert_equal(@table,
634
- Arrow::Table.load(output,
635
- format: :tsv,
636
- schema: @table.schema))
637
- end
638
- end
639
-
640
- sub_test_case("path") do
641
- sub_test_case(":format") do
642
- include SaveLoadFormatTests
643
-
644
- def create_output(extension)
645
- @file = Tempfile.new(["red-arrow", extension])
646
- @file.path
647
- end
648
-
649
- sub_test_case("save: auto detect") do
650
- test("arrow") do
651
- output = create_output(".arrow")
652
- @table.save(output)
653
- assert_equal(@table,
654
- Arrow::Table.load(output,
655
- format: :arrow,
656
- schema: @table.schema))
657
- end
658
-
659
- test("arrows") do
660
- output = create_output(".arrows")
661
- @table.save(output)
662
- assert_equal(@table,
663
- Arrow::Table.load(output,
664
- format: :arrows,
665
- schema: @table.schema))
666
- end
667
-
668
- test("csv") do
669
- output = create_output(".csv")
670
- @table.save(output)
671
- assert_equal(@table,
672
- Arrow::Table.load(output,
673
- format: :csv,
674
- schema: @table.schema))
675
- end
676
-
677
- test("csv.gz") do
678
- output = create_output(".csv.gz")
679
- @table.save(output)
680
- assert_equal(@table,
681
- Arrow::Table.load(output,
682
- format: :csv,
683
- compression: :gzip,
684
- schema: @table.schema))
685
- end
686
-
687
- test("tsv") do
688
- output = create_output(".tsv")
689
- @table.save(output)
690
- assert_equal(@table,
691
- Arrow::Table.load(output,
692
- format: :tsv,
693
- schema: @table.schema))
694
- end
695
- end
696
-
697
- sub_test_case("load: auto detect") do
698
- test("arrow: file") do
699
- output = create_output(".arrow")
700
- @table.save(output, format: :arrow_file)
701
- assert_equal(@table, Arrow::Table.load(output))
702
- end
703
-
704
- test("arrow: streaming") do
705
- output = create_output(".arrow")
706
- @table.save(output, format: :arrows)
707
- assert_equal(@table, Arrow::Table.load(output))
708
- end
709
-
710
- test("arrows") do
711
- output = create_output(".arrows")
712
- @table.save(output, format: :arrows)
713
- assert_equal(@table, Arrow::Table.load(output))
714
- end
715
-
716
- test("csv") do
717
- path = fixture_path("with-header.csv")
718
- table = Arrow::Table.load(path, skip_lines: /^\#/)
719
- assert_equal(<<-TABLE, table.to_s)
720
- name score
721
- 0 alice 10
722
- 1 bob 29
723
- 2 chris -1
724
- TABLE
725
- end
726
-
727
- test("csv.gz") do
728
- file = Tempfile.new(["red-arrow", ".csv.gz"])
729
- file.close
730
- Zlib::GzipWriter.open(file.path) do |gz|
731
- gz.write(<<-CSV)
732
- name,score
733
- alice,10
734
- bob,29
735
- chris,-1
736
- CSV
737
- end
738
- assert_equal(<<-TABLE, Arrow::Table.load(file.path).to_s)
739
- name score
740
- 0 alice 10
741
- 1 bob 29
742
- 2 chris -1
743
- TABLE
744
- end
745
-
746
- test("tsv") do
747
- file = Tempfile.new(["red-arrow", ".tsv"])
748
- file.puts(<<-TSV)
749
- name\tscore
750
- alice\t10
751
- bob\t29
752
- chris\t-1
753
- TSV
754
- file.close
755
- table = Arrow::Table.load(file.path)
756
- assert_equal(<<-TABLE, table.to_s)
757
- name score
758
- 0 alice 10
759
- 1 bob 29
760
- 2 chris -1
761
- TABLE
762
- end
763
- end
764
- end
765
- end
766
-
767
- sub_test_case("Buffer") do
768
- sub_test_case(":format") do
769
- include SaveLoadFormatTests
770
-
771
- def create_output(extension)
772
- Arrow::ResizableBuffer.new(1024)
773
- end
774
- end
775
- end
776
-
777
- sub_test_case("URI") do
778
- def start_web_server(path, data, content_type)
779
- http_server = WEBrick::HTTPServer.new(:Port => 0)
780
- http_server.mount_proc(path) do |request, response|
781
- response.body = data
782
- response.content_type = content_type
783
- end
784
- http_server_thread = Thread.new do
785
- http_server.start
786
- end
787
- begin
788
- Timeout.timeout(1) do
789
- yield(http_server[:Port])
790
- end
791
- ensure
792
- http_server.shutdown
793
- http_server_thread.join
794
- end
795
- end
796
-
797
- data("Arrow File",
798
- ["arrow", "application/vnd.apache.arrow.file"])
799
- data("Arrow Stream",
800
- ["arrows", "application/vnd.apache.arrow.stream"])
801
- data("CSV",
802
- ["csv", "text/csv"])
803
- def test_http(data)
804
- extension, content_type = data
805
- output = Arrow::ResizableBuffer.new(1024)
806
- @table.save(output, format: extension.to_sym)
807
- path = "/data.#{extension}"
808
- start_web_server(path,
809
- output.data.to_s,
810
- content_type) do |port|
811
- input = URI("http://127.0.0.1:#{port}#{path}")
812
- loaded_table = Arrow::Table.load(input)
813
- assert_equal(@table.to_s, loaded_table.to_s)
814
- end
815
- end
816
- end
817
- end
818
-
819
- test("#pack") do
820
- packed_table = @table.pack
821
- column_n_chunks = packed_table.columns.collect {|c| c.data.n_chunks}
822
- assert_equal([[1, 1], <<-TABLE], [column_n_chunks, packed_table.to_s])
823
- count visible
824
- 0 1 true
825
- 1 2 false
826
- 2 4 (null)
827
- 3 8 true
828
- 4 16 true
829
- 5 32 false
830
- 6 64 (null)
831
- 7 128 (null)
832
- TABLE
833
- end
834
-
835
- sub_test_case("#to_s") do
836
- sub_test_case(":format") do
837
- def setup
838
- columns = {
839
- "count" => Arrow::UInt8Array.new([1, 2]),
840
- "visible" => Arrow::BooleanArray.new([true, false]),
841
- }
842
- @table = Arrow::Table.new(columns)
843
- end
844
-
845
- test(":column") do
846
- assert_equal(<<-TABLE, @table.to_s(format: :column))
847
- count: uint8
848
- visible: bool
849
- ----
850
- count:
851
- [
852
- [
853
- 1,
854
- 2
855
- ]
856
- ]
857
- visible:
858
- [
859
- [
860
- true,
861
- false
862
- ]
863
- ]
864
- TABLE
865
- end
866
-
867
- test(":list") do
868
- assert_equal(<<-TABLE, @table.to_s(format: :list))
869
- ==================== 0 ====================
870
- count: 1
871
- visible: true
872
- ==================== 1 ====================
873
- count: 2
874
- visible: false
875
- TABLE
876
- end
877
-
878
- test(":table") do
879
- assert_equal(<<-TABLE, @table.to_s(format: :table))
880
- count visible
881
- 0 1 true
882
- 1 2 false
883
- TABLE
884
- end
885
-
886
- test("invalid") do
887
- message = ":format must be :column, :list, :table or nil: <:invalid>"
888
- assert_raise(ArgumentError.new(message)) do
889
- @table.to_s(format: :invalid)
890
- end
891
- end
892
- end
893
-
894
- sub_test_case("#==") do
895
- test("Arrow::Table") do
896
- assert do
897
- @table == @table
898
- end
899
- end
900
-
901
- test("not Arrow::Table") do
902
- assert do
903
- not (@table == 29)
904
- end
905
- end
906
- end
907
- end
908
-
909
- sub_test_case("#filter") do
910
- def setup
911
- super
912
- @options = Arrow::FilterOptions.new
913
- @options.null_selection_behavior = :emit_null
914
- end
915
-
916
- test("Array: boolean") do
917
- filter = [nil, true, true, false, true, false, true, true]
918
- assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
919
- count visible
920
- 0 (null) (null)
921
- 1 2 false
922
- 2 4 (null)
923
- 3 16 true
924
- 4 64 (null)
925
- 5 128 (null)
926
- TABLE
927
- end
928
-
929
- test("Arrow::BooleanArray") do
930
- array = [nil, true, true, false, true, false, true, true]
931
- filter = Arrow::BooleanArray.new(array)
932
- assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
933
- count visible
934
- 0 (null) (null)
935
- 1 2 false
936
- 2 4 (null)
937
- 3 16 true
938
- 4 64 (null)
939
- 5 128 (null)
940
- TABLE
941
- end
942
-
943
- test("Arrow::ChunkedArray") do
944
- filter_chunks = [
945
- Arrow::BooleanArray.new([nil, true, true]),
946
- Arrow::BooleanArray.new([false, true, false]),
947
- Arrow::BooleanArray.new([true, true]),
948
- ]
949
- filter = Arrow::ChunkedArray.new(filter_chunks)
950
- assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
951
- count visible
952
- 0 (null) (null)
953
- 1 2 false
954
- 2 4 (null)
955
- 3 16 true
956
- 4 64 (null)
957
- 5 128 (null)
958
- TABLE
959
- end
960
- end
961
-
962
- sub_test_case("#take") do
963
- test("Arrow: boolean") do
964
- indices = [1, 0, 2]
965
- assert_equal(<<-TABLE, @table.take(indices).to_s)
966
- count visible
967
- 0 2 false
968
- 1 1 true
969
- 2 4 (null)
970
- TABLE
971
- end
972
-
973
- test("Arrow::Array") do
974
- indices = Arrow::Int16Array.new([1, 0, 2])
975
- assert_equal(<<-TABLE, @table.take(indices).to_s)
976
- count visible
977
- 0 2 false
978
- 1 1 true
979
- 2 4 (null)
980
- TABLE
981
- end
982
-
983
- test("Arrow::ChunkedArray") do
984
- chunks = [
985
- Arrow::Int16Array.new([1, 0]),
986
- Arrow::Int16Array.new([2])
987
- ]
988
- indices = Arrow::ChunkedArray.new(chunks)
989
- assert_equal(<<-TABLE, @table.take(indices).to_s)
990
- count visible
991
- 0 2 false
992
- 1 1 true
993
- 2 4 (null)
994
- TABLE
995
- end
996
- end
997
-
998
- sub_test_case("#concatenate") do
999
- test("options: :unify_schemas") do
1000
- table1 = Arrow::Table.new(a: [true],
1001
- b: [false])
1002
- table2 = Arrow::Table.new(b: [false])
1003
- concatenated = table1.concatenate([table2], unify_schemas: true)
1004
- assert_equal(<<-TABLE, concatenated.to_s)
1005
- a b
1006
- 0 true false
1007
- 1 (null) false
1008
- TABLE
1009
- end
1010
- end
1011
-
1012
- sub_test_case("#join") do
1013
- test("keys: String") do
1014
- table1 = Arrow::Table.new(key: [1, 2, 3],
1015
- number: [10, 20, 30])
1016
- table2 = Arrow::Table.new(key: [3, 1],
1017
- string: ["three", "one"])
1018
- assert_equal(Arrow::Table.new([
1019
- ["key", [1, 3]],
1020
- ["number", [10, 30]],
1021
- ["key", [1, 3]],
1022
- ["string", ["one", "three"]],
1023
- ]),
1024
- table1.join(table2, "key"))
1025
- end
1026
-
1027
- test("keys: Symbol") do
1028
- table1 = Arrow::Table.new(key: [1, 2, 3],
1029
- number: [10, 20, 30])
1030
- table2 = Arrow::Table.new(key: [3, 1],
1031
- string: ["three", "one"])
1032
- assert_equal(Arrow::Table.new([
1033
- ["key", [1, 3]],
1034
- ["number", [10, 30]],
1035
- ["key", [1, 3]],
1036
- ["string", ["one", "three"]],
1037
- ]),
1038
- table1.join(table2, :key))
1039
- end
1040
-
1041
- test("keys: [String, Symbol]") do
1042
- table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1043
- key2: [10, 100, 20, 200],
1044
- number: [1010, 1100, 2020, 2200])
1045
- table2 = Arrow::Table.new(key1: [1, 2, 2],
1046
- key2: [100, 20, 50],
1047
- string: ["1-100", "2-20", "2-50"])
1048
- assert_equal(Arrow::Table.new([
1049
- ["key1", [1, 2]],
1050
- ["key2", [100, 20]],
1051
- ["number", [1100, 2020]],
1052
- ["key1", [1, 2]],
1053
- ["key2", [100, 20]],
1054
- ["string", ["1-100", "2-20"]],
1055
- ]),
1056
- table1.join(table2, ["key1", :key2]))
1057
- end
1058
-
1059
- test("keys: {left: String, right: Symbol}") do
1060
- table1 = Arrow::Table.new(left_key: [1, 2, 3],
1061
- number: [10, 20, 30])
1062
- table2 = Arrow::Table.new(right_key: [3, 1],
1063
- string: ["three", "one"])
1064
- assert_equal(Arrow::Table.new([
1065
- ["left_key", [1, 3]],
1066
- ["number", [10, 30]],
1067
- ["right_key", [1, 3]],
1068
- ["string", ["one", "three"]],
1069
- ]),
1070
- table1.join(table2, {left: "left_key", right: :right_key}))
1071
- end
1072
-
1073
- test("keys: {left: [String, Symbol], right: [Symbol, String]}") do
1074
- table1 = Arrow::Table.new(left_key1: [1, 1, 2, 2],
1075
- left_key2: [10, 100, 20, 200],
1076
- number: [1010, 1100, 2020, 2200])
1077
- table2 = Arrow::Table.new(right_key1: [1, 2, 2],
1078
- right_key2: [100, 20, 50],
1079
- string: ["1-100", "2-20", "2-50"])
1080
- assert_equal(Arrow::Table.new([
1081
- ["left_key1", [1, 2]],
1082
- ["left_key2", [100, 20]],
1083
- ["number", [1100, 2020]],
1084
- ["right_key1", [1, 2]],
1085
- ["right_key2", [100, 20]],
1086
- ["string", ["1-100", "2-20"]],
1087
- ]),
1088
- table1.join(table2,
1089
- {
1090
- left: ["left_key1", :left_key2],
1091
- right: [:right_key1, "right_key2"],
1092
- }))
1093
- end
1094
-
1095
- test("type:") do
1096
- table1 = Arrow::Table.new(key: [1, 2, 3],
1097
- number: [10, 20, 30])
1098
- table2 = Arrow::Table.new(key: [3, 1],
1099
- string: ["three", "one"])
1100
- assert_equal(Arrow::Table.new([
1101
- ["key", [1, 3, 2]],
1102
- ["number", [10, 30, 20]],
1103
- ["key", [1, 3, nil]],
1104
- ["string", ["one", "three", nil]],
1105
- ]),
1106
- table1.join(table2, "key", type: :left_outer))
1107
- end
1108
-
1109
- test("left_outputs: & right_outputs:") do
1110
- table1 = Arrow::Table.new(key: [1, 2, 3],
1111
- number: [10, 20, 30])
1112
- table2 = Arrow::Table.new(key: [3, 1],
1113
- string: ["three", "one"])
1114
- assert_equal(Arrow::Table.new(key: [1, 3],
1115
- number: [10, 30],
1116
- string: ["one", "three"]),
1117
- table1.join(table2,
1118
- "key",
1119
- left_outputs: ["key", "number"],
1120
- right_outputs: ["string"]))
1121
- end
1122
- end
1123
- end