red-arrow 18.1.0 → 19.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
data/test/test-table.rb DELETED
@@ -1,1530 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class TableTest < Test::Unit::TestCase
19
- include Helper::Fixture
20
-
21
- def setup
22
- @count_field = Arrow::Field.new("count", :uint8)
23
- @visible_field = Arrow::Field.new("visible", :boolean)
24
- schema = Arrow::Schema.new([@count_field, @visible_field])
25
- count_arrays = [
26
- Arrow::UInt8Array.new([1, 2]),
27
- Arrow::UInt8Array.new([4, 8, 16]),
28
- Arrow::UInt8Array.new([32, 64]),
29
- Arrow::UInt8Array.new([128]),
30
- ]
31
- visible_arrays = [
32
- Arrow::BooleanArray.new([true, false, nil]),
33
- Arrow::BooleanArray.new([true]),
34
- Arrow::BooleanArray.new([true, false]),
35
- Arrow::BooleanArray.new([nil]),
36
- Arrow::BooleanArray.new([nil]),
37
- ]
38
- @count_array = Arrow::ChunkedArray.new(count_arrays)
39
- @visible_array = Arrow::ChunkedArray.new(visible_arrays)
40
- @table = Arrow::Table.new(schema, [@count_array, @visible_array])
41
- end
42
-
43
- sub_test_case(".new") do
44
- test("{Symbol: Arrow::Array}") do
45
- schema = Arrow::Schema.new(numbers: :int64)
46
- assert_equal(Arrow::Table.new(schema,
47
- [Arrow::Int64Array.new([1, 2, 3])]),
48
- Arrow::Table.new(numbers: Arrow::Int64Array.new([1, 2, 3])))
49
- end
50
-
51
- test("{Symbol: Arrow::ChunkedArray}") do
52
- chunked_array = Arrow::ChunkedArray.new([Arrow::Int64Array.new([1, 2, 3])])
53
- schema = Arrow::Schema.new(numbers: :int64)
54
- assert_equal(Arrow::Table.new(schema,
55
- [Arrow::Int64Array.new([1, 2, 3])]),
56
- Arrow::Table.new(numbers: chunked_array))
57
- end
58
-
59
- test("{Symbol: Arrow::Tensor}") do
60
- schema = Arrow::Schema.new(numbers: :uint8)
61
- assert_equal(Arrow::Table.new(schema,
62
- [Arrow::UInt8Array.new([1, 2, 3])]),
63
- Arrow::Table.new(numbers: Arrow::Tensor.new([1, 2, 3])))
64
- end
65
-
66
- test("{Symbol: #to_ary}") do
67
- array_like = Object.new
68
- def array_like.to_ary
69
- [1, 2, 3]
70
- end
71
- schema = Arrow::Schema.new(numbers: :uint8)
72
- assert_equal(Arrow::Table.new(schema, [Arrow::UInt8Array.new([1, 2, 3])]),
73
- Arrow::Table.new(numbers: array_like))
74
- end
75
- end
76
-
77
- test("#columns") do
78
- assert_equal([
79
- Arrow::Column.new(@table, 0),
80
- Arrow::Column.new(@table, 1),
81
- ],
82
- @table.columns)
83
- end
84
-
85
- sub_test_case("#slice") do
86
- test("Arrow::BooleanArray") do
87
- target_rows_raw = [nil, true, true, false, true, false, true, true]
88
- target_rows = Arrow::BooleanArray.new(target_rows_raw)
89
- assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
90
- count visible
91
- (uint8) (bool)
92
- 0 2 false
93
- 1 4 (null)
94
- 2 16 true
95
- 3 64 (null)
96
- 4 128 (null)
97
- TABLE
98
- end
99
-
100
- test("Array: boolean") do
101
- target_rows_raw = [nil, true, true, false, true, false, true, true]
102
- assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
103
- count visible
104
- (uint8) (bool)
105
- 0 2 false
106
- 1 4 (null)
107
- 2 16 true
108
- 3 64 (null)
109
- 4 128 (null)
110
- TABLE
111
- end
112
-
113
- test("Integer: positive") do
114
- assert_equal({"count" => 128, "visible" => nil},
115
- @table.slice(@table.n_rows - 1).to_h)
116
- end
117
-
118
- test("Integer: negative") do
119
- assert_equal({"count" => 1, "visible" => true},
120
- @table.slice(-@table.n_rows).to_h)
121
- end
122
-
123
- test("Integer: out of index") do
124
- assert_equal([
125
- nil,
126
- nil,
127
- ],
128
- [
129
- @table.slice(@table.n_rows),
130
- @table.slice(-(@table.n_rows + 1)),
131
- ])
132
- end
133
-
134
- test("Range: positive: include end") do
135
- assert_equal(<<-TABLE, @table.slice(2..4).to_s)
136
- count visible
137
- (uint8) (bool)
138
- 0 4 (null)
139
- 1 8 true
140
- 2 16 true
141
- TABLE
142
- end
143
-
144
- test("Range: positive: exclude end") do
145
- assert_equal(<<-TABLE, @table.slice(2...4).to_s)
146
- count visible
147
- (uint8) (bool)
148
- 0 4 (null)
149
- 1 8 true
150
- TABLE
151
- end
152
-
153
- test("Range: negative: include end") do
154
- assert_equal(<<-TABLE, @table.slice(-4..-2).to_s)
155
- count visible
156
- (uint8) (bool)
157
- 0 16 true
158
- 1 32 false
159
- 2 64 (null)
160
- TABLE
161
- end
162
-
163
- test("Range: negative: exclude end") do
164
- assert_equal(<<-TABLE, @table.slice(-4...-2).to_s)
165
- count visible
166
- (uint8) (bool)
167
- 0 16 true
168
- 1 32 false
169
- TABLE
170
- end
171
-
172
- test("[from, to]: positive") do
173
- assert_equal(<<-TABLE, @table.slice(0, 2).to_s)
174
- count visible
175
- (uint8) (bool)
176
- 0 1 true
177
- 1 2 false
178
- TABLE
179
- end
180
-
181
- test("[from, to]: negative") do
182
- assert_equal(<<-TABLE, @table.slice(-4, 2).to_s)
183
- count visible
184
- (uint8) (bool)
185
- 0 16 true
186
- 1 32 false
187
- TABLE
188
- end
189
-
190
- test("{key: Number}") do
191
- assert_equal(<<-TABLE, @table.slice(count: 16).to_s)
192
- count visible
193
- (uint8) (bool)
194
- 0 16 true
195
- TABLE
196
- end
197
-
198
- test("{key: String}") do
199
- table = Arrow::Table.new(name: Arrow::StringArray.new(["a", "b", "c"]))
200
- assert_equal(<<-TABLE, table.slice(name: 'b').to_s)
201
- name
202
- (utf8)
203
- 0 b
204
- TABLE
205
- end
206
-
207
- test("{key: true}") do
208
- assert_equal(<<-TABLE, @table.slice(visible: true).to_s)
209
- count visible
210
- (uint8) (bool)
211
- 0 1 true
212
- 1 8 true
213
- 2 16 true
214
- TABLE
215
- end
216
-
217
- test("{key: false}") do
218
- assert_equal(<<-TABLE, @table.slice(visible: false).to_s)
219
- count visible
220
- (uint8) (bool)
221
- 0 2 false
222
- 1 32 false
223
- TABLE
224
- end
225
-
226
- test("{key: Range}: beginless include end") do
227
- begin
228
- range = eval("..8")
229
- rescue SyntaxError
230
- omit("beginless range isn't supported")
231
- end
232
- assert_equal(<<-TABLE, @table.slice(count: range).to_s)
233
- count visible
234
- (uint8) (bool)
235
- 0 1 true
236
- 1 2 false
237
- 2 4 (null)
238
- 3 8 true
239
- TABLE
240
- end
241
-
242
- test("{key: Range}: beginless exclude end") do
243
- begin
244
- range = eval("...8")
245
- rescue SyntaxError
246
- omit("beginless range isn't supported")
247
- end
248
- assert_equal(<<-TABLE, @table.slice(count: range).to_s)
249
- count visible
250
- (uint8) (bool)
251
- 0 1 true
252
- 1 2 false
253
- 2 4 (null)
254
- TABLE
255
- end
256
-
257
- test("{key: Range}: endless") do
258
- begin
259
- range = eval("16..")
260
- rescue SyntaxError
261
- omit("endless range isn't supported")
262
- end
263
- assert_equal(<<-TABLE, @table.slice(count: range).to_s)
264
- count visible
265
- (uint8) (bool)
266
- 0 16 true
267
- 1 32 false
268
- 2 64 (null)
269
- 3 128 (null)
270
- TABLE
271
- end
272
-
273
- test("{key: Range}: include end") do
274
- assert_equal(<<-TABLE, @table.slice(count: 1..16).to_s)
275
- count visible
276
- (uint8) (bool)
277
- 0 1 true
278
- 1 2 false
279
- 2 4 (null)
280
- 3 8 true
281
- 4 16 true
282
- TABLE
283
- end
284
-
285
- test("{key: Range}: exclude end") do
286
- assert_equal(<<-TABLE, @table.slice(count: 1...16).to_s)
287
- count visible
288
- (uint8) (bool)
289
- 0 1 true
290
- 1 2 false
291
- 2 4 (null)
292
- 3 8 true
293
- TABLE
294
- end
295
-
296
- test("{key1: Range, key2: true}") do
297
- assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s)
298
- count visible
299
- (uint8) (bool)
300
- 0 2 false
301
- TABLE
302
- end
303
-
304
- sub_test_case("wrong argument") do
305
- test("no arguments") do
306
- message = "wrong number of arguments (given 0, expected 1..2)"
307
- assert_raise(ArgumentError.new(message)) do
308
- @table.slice
309
- end
310
- end
311
-
312
- test("too many arguments") do
313
- message = "wrong number of arguments (given 3, expected 1..2)"
314
- assert_raise(ArgumentError.new(message)) do
315
- @table.slice(1, 2, 3)
316
- end
317
- end
318
-
319
- test("arguments: with block") do
320
- message = "must not specify both arguments and block"
321
- assert_raise(ArgumentError.new(message)) do
322
- @table.slice(1, 2) {}
323
- end
324
- end
325
-
326
- test("offset: too small") do
327
- n_rows = @table.n_rows
328
- offset = -(n_rows + 1)
329
- message = "offset is out of range (-#{n_rows + 1},#{n_rows}): #{offset}"
330
- assert_raise(ArgumentError.new(message)) do
331
- @table.slice(offset, 1)
332
- end
333
- end
334
-
335
- test("offset: too large") do
336
- n_rows = @table.n_rows
337
- offset = n_rows
338
- message = "offset is out of range (-#{n_rows + 1},#{n_rows}): #{offset}"
339
- assert_raise(ArgumentError.new(message)) do
340
- @table.slice(offset, 1)
341
- end
342
- end
343
- end
344
- end
345
-
346
- sub_test_case("#[]") do
347
- def setup
348
- @table = Arrow::Table.new(a: [true],
349
- b: [true],
350
- c: [true],
351
- d: [true],
352
- e: [true],
353
- f: [true],
354
- g: [true])
355
- end
356
-
357
- test("[String]") do
358
- assert_equal(Arrow::Column.new(@table, 0),
359
- @table["a"])
360
- end
361
-
362
- test("[Symbol]") do
363
- assert_equal(Arrow::Column.new(@table, 1),
364
- @table[:b])
365
- end
366
-
367
- test("[Integer]") do
368
- assert_equal(Arrow::Column.new(@table, 6),
369
- @table[-1])
370
- end
371
-
372
- test("[Range]") do
373
- assert_equal(Arrow::Table.new(d: [true],
374
- e: [true]),
375
- @table[3..4])
376
- end
377
-
378
- test("[[Symbol, String, Integer, Range]]") do
379
- assert_equal(Arrow::Table.new(c: [true],
380
- a: [true],
381
- g: [true],
382
- d: [true],
383
- e: [true]),
384
- @table[[:c, "a", -1, 3..4]])
385
- end
386
- end
387
-
388
- sub_test_case("#merge") do
389
- sub_test_case("Hash") do
390
- test("add") do
391
- name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"])
392
- assert_equal(<<-TABLE, @table.merge(:name => name_array).to_s)
393
- count visible name
394
- (uint8) (bool) (utf8)
395
- 0 1 true a
396
- 1 2 false b
397
- 2 4 (null) c
398
- 3 8 true d
399
- 4 16 true e
400
- 5 32 false f
401
- 6 64 (null) g
402
- 7 128 (null) h
403
- TABLE
404
- end
405
-
406
- test("remove") do
407
- assert_equal(<<-TABLE, @table.merge(:visible => nil).to_s)
408
- count
409
- (uint8)
410
- 0 1
411
- 1 2
412
- 2 4
413
- 3 8
414
- 4 16
415
- 5 32
416
- 6 64
417
- 7 128
418
- TABLE
419
- end
420
-
421
- test("replace") do
422
- visible_array = Arrow::Int32Array.new([1] * @visible_array.length)
423
- assert_equal(<<-TABLE, @table.merge(:visible => visible_array).to_s)
424
- count visible
425
- (uint8) (int32)
426
- 0 1 1
427
- 1 2 1
428
- 2 4 1
429
- 3 8 1
430
- 4 16 1
431
- 5 32 1
432
- 6 64 1
433
- 7 128 1
434
- TABLE
435
- end
436
- end
437
-
438
- sub_test_case("Arrow::Table") do
439
- test("add") do
440
- name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"])
441
- table = Arrow::Table.new("name" => name_array)
442
- assert_equal(<<-TABLE, @table.merge(table).to_s)
443
- count visible name
444
- (uint8) (bool) (utf8)
445
- 0 1 true a
446
- 1 2 false b
447
- 2 4 (null) c
448
- 3 8 true d
449
- 4 16 true e
450
- 5 32 false f
451
- 6 64 (null) g
452
- 7 128 (null) h
453
- TABLE
454
- end
455
-
456
- test("replace") do
457
- visible_array = Arrow::Int32Array.new([1] * @visible_array.length)
458
- table = Arrow::Table.new("visible" => visible_array)
459
- assert_equal(<<-TABLE, @table.merge(table).to_s)
460
- count visible
461
- (uint8) (int32)
462
- 0 1 1
463
- 1 2 1
464
- 2 4 1
465
- 3 8 1
466
- 4 16 1
467
- 5 32 1
468
- 6 64 1
469
- 7 128 1
470
- TABLE
471
- end
472
- end
473
- end
474
-
475
- test("column name getter") do
476
- assert_equal(Arrow::Column.new(@table, 1),
477
- @table.visible)
478
- end
479
-
480
- sub_test_case("#remove_column") do
481
- test("String") do
482
- assert_equal(<<-TABLE, @table.remove_column("visible").to_s)
483
- count
484
- (uint8)
485
- 0 1
486
- 1 2
487
- 2 4
488
- 3 8
489
- 4 16
490
- 5 32
491
- 6 64
492
- 7 128
493
- TABLE
494
- end
495
-
496
- test("Symbol") do
497
- assert_equal(<<-TABLE, @table.remove_column(:visible).to_s)
498
- count
499
- (uint8)
500
- 0 1
501
- 1 2
502
- 2 4
503
- 3 8
504
- 4 16
505
- 5 32
506
- 6 64
507
- 7 128
508
- TABLE
509
- end
510
-
511
- test("unknown column name") do
512
- assert_raise(KeyError) do
513
- @table.remove_column(:nonexistent)
514
- end
515
- end
516
-
517
- test("Integer") do
518
- assert_equal(<<-TABLE, @table.remove_column(1).to_s)
519
- count
520
- (uint8)
521
- 0 1
522
- 1 2
523
- 2 4
524
- 3 8
525
- 4 16
526
- 5 32
527
- 6 64
528
- 7 128
529
- TABLE
530
- end
531
-
532
- test("negative integer") do
533
- assert_equal(<<-TABLE, @table.remove_column(-1).to_s)
534
- count
535
- (uint8)
536
- 0 1
537
- 1 2
538
- 2 4
539
- 3 8
540
- 4 16
541
- 5 32
542
- 6 64
543
- 7 128
544
- TABLE
545
- end
546
-
547
- test("too small index") do
548
- assert_raise(IndexError) do
549
- @table.remove_column(-3)
550
- end
551
- end
552
-
553
- test("too large index") do
554
- assert_raise(IndexError) do
555
- @table.remove_column(2)
556
- end
557
- end
558
- end
559
-
560
- sub_test_case("#select_columns") do
561
- def setup
562
- raw_table = {
563
- :a => Arrow::UInt8Array.new([1]),
564
- :b => Arrow::UInt8Array.new([1]),
565
- :c => Arrow::UInt8Array.new([1]),
566
- :d => Arrow::UInt8Array.new([1]),
567
- :e => Arrow::UInt8Array.new([1]),
568
- }
569
- @table = Arrow::Table.new(raw_table)
570
- end
571
-
572
- test("names") do
573
- assert_equal(<<-TABLE, @table.select_columns(:c, :a).to_s)
574
- c a
575
- (uint8) (uint8)
576
- 0 1 1
577
- TABLE
578
- end
579
-
580
- test("range") do
581
- assert_equal(<<-TABLE, @table.select_columns(2...4).to_s)
582
- c d
583
- (uint8) (uint8)
584
- 0 1 1
585
- TABLE
586
- end
587
-
588
- test("indexes") do
589
- assert_equal(<<-TABLE, @table.select_columns(0, -1, 2).to_s)
590
- a e c
591
- (uint8) (uint8) (uint8)
592
- 0 1 1 1
593
- TABLE
594
- end
595
-
596
- test("mixed") do
597
- assert_equal(<<-TABLE, @table.select_columns(:a, -1, 2..3).to_s)
598
- a e c d
599
- (uint8) (uint8) (uint8) (uint8)
600
- 0 1 1 1 1
601
- TABLE
602
- end
603
-
604
- test("block") do
605
- selected_table = @table.select_columns.with_index do |column, i|
606
- column.name == "a" or i.odd?
607
- end
608
- assert_equal(<<-TABLE, selected_table.to_s)
609
- a b d
610
- (uint8) (uint8) (uint8)
611
- 0 1 1 1
612
- TABLE
613
- end
614
-
615
- test("names, indexes and block") do
616
- selected_table = @table.select_columns(:a, -1) do |column|
617
- column.name == "a"
618
- end
619
- assert_equal(<<-TABLE, selected_table.to_s)
620
- a
621
- (uint8)
622
- 0 1
623
- TABLE
624
- end
625
-
626
- test("empty result") do
627
- selected_table = @table.filter([false] * @table.size).select_columns(:a)
628
- assert_equal(<<-TABLE, selected_table.to_s)
629
- a
630
- (uint8)
631
- TABLE
632
- end
633
- end
634
-
635
- sub_test_case("#column_names") do
636
- test("unique") do
637
- table = Arrow::Table.new(a: [1], b: [2], c: [3])
638
- assert_equal(%w[a b c], table.column_names)
639
- end
640
-
641
- test("duplicated") do
642
- table = Arrow::Table.new([["a", [1, 2, 3]], ["a", [4, 5, 6]]])
643
- assert_equal(%w[a a], table.column_names)
644
- end
645
- end
646
-
647
- sub_test_case("#save and .load") do
648
- module SaveLoadFormatTests
649
- def test_default
650
- output = create_output(".arrow")
651
- @table.save(output)
652
- assert_equal(@table, Arrow::Table.load(output))
653
- end
654
-
655
- def test_arrow_file
656
- output = create_output(".arrow")
657
- @table.save(output, format: :arrow_file)
658
- assert_equal(@table, Arrow::Table.load(output, format: :arrow_file))
659
- end
660
-
661
- def test_batch
662
- output = create_output(".arrow")
663
- @table.save(output, format: :batch)
664
- assert_equal(@table, Arrow::Table.load(output, format: :batch))
665
- end
666
-
667
- def test_arrows
668
- output = create_output(".arrows")
669
- @table.save(output, format: :arrows)
670
- assert_equal(@table, Arrow::Table.load(output, format: :arrows))
671
- end
672
-
673
- def test_arrow_streaming
674
- output = create_output(".arrows")
675
- @table.save(output, format: :arrow_streaming)
676
- assert_equal(@table, Arrow::Table.load(output, format: :arrow_streaming))
677
- end
678
-
679
- def test_stream
680
- output = create_output(".arrows")
681
- @table.save(output, format: :stream)
682
- assert_equal(@table, Arrow::Table.load(output, format: :stream))
683
- end
684
-
685
- def test_csv
686
- output = create_output(".csv")
687
- @table.save(output, format: :csv)
688
- assert_equal(@table,
689
- Arrow::Table.load(output,
690
- format: :csv,
691
- schema: @table.schema))
692
- end
693
-
694
- def test_csv_gz
695
- output = create_output(".csv.gz")
696
- @table.save(output,
697
- format: :csv,
698
- compression: :gzip)
699
- assert_equal(@table,
700
- Arrow::Table.load(output,
701
- format: :csv,
702
- compression: :gzip,
703
- schema: @table.schema))
704
- end
705
-
706
- def test_tsv
707
- output = create_output(".tsv")
708
- @table.save(output, format: :tsv)
709
- assert_equal(@table,
710
- Arrow::Table.load(output,
711
- format: :tsv,
712
- schema: @table.schema))
713
- end
714
-
715
- def test_json
716
- output = create_output(".json")
717
- # TODO: Implement this.
718
- # @table.save(output, format: :json)
719
- columns = +""
720
- @table.each_record.each do |record|
721
- column = {
722
- "count" => record.count,
723
- "visible" => record.visible,
724
- }
725
- columns << column.to_json
726
- columns << "\n"
727
- end
728
- if output.is_a?(String)
729
- File.write(output, columns)
730
- else
731
- output.resize(columns.bytesize)
732
- output.set_data(0, columns)
733
- end
734
- assert_equal(@table,
735
- Arrow::Table.load(output,
736
- format: :json,
737
- schema: @table.schema))
738
- end
739
- end
740
-
741
- sub_test_case("path") do
742
- sub_test_case(":format") do
743
- include SaveLoadFormatTests
744
-
745
- def create_output(extension)
746
- @file = Tempfile.new(["red-arrow", extension])
747
- @file.path
748
- end
749
-
750
- sub_test_case("save: auto detect") do
751
- test("arrow") do
752
- output = create_output(".arrow")
753
- @table.save(output)
754
- assert_equal(@table,
755
- Arrow::Table.load(output,
756
- format: :arrow,
757
- schema: @table.schema))
758
- end
759
-
760
- test("arrows") do
761
- output = create_output(".arrows")
762
- @table.save(output)
763
- assert_equal(@table,
764
- Arrow::Table.load(output,
765
- format: :arrows,
766
- schema: @table.schema))
767
- end
768
-
769
- test("csv") do
770
- output = create_output(".csv")
771
- @table.save(output)
772
- assert_equal(@table,
773
- Arrow::Table.load(output,
774
- format: :csv,
775
- schema: @table.schema))
776
- end
777
-
778
- test("csv, return value") do
779
- output = create_output(".csv")
780
- assert_equal(@table, @table.save(output))
781
- end
782
-
783
- test("csv.gz") do
784
- output = create_output(".csv.gz")
785
- @table.save(output)
786
- assert_equal(@table,
787
- Arrow::Table.load(output,
788
- format: :csv,
789
- compression: :gzip,
790
- schema: @table.schema))
791
- end
792
-
793
- test("tsv") do
794
- output = create_output(".tsv")
795
- @table.save(output)
796
- assert_equal(@table,
797
- Arrow::Table.load(output,
798
- format: :tsv,
799
- schema: @table.schema))
800
- end
801
- end
802
-
803
- sub_test_case("load: auto detect") do
804
- test("arrow: file") do
805
- output = create_output(".arrow")
806
- @table.save(output, format: :arrow_file)
807
- assert_equal(@table, Arrow::Table.load(output))
808
- end
809
-
810
- test("arrow: streaming") do
811
- output = create_output(".arrow")
812
- @table.save(output, format: :arrows)
813
- assert_equal(@table, Arrow::Table.load(output))
814
- end
815
-
816
- test("arrows") do
817
- output = create_output(".arrows")
818
- @table.save(output, format: :arrows)
819
- assert_equal(@table, Arrow::Table.load(output))
820
- end
821
-
822
- test("csv") do
823
- path = fixture_path("with-header.csv")
824
- table = Arrow::Table.load(path, skip_lines: /^\#/)
825
- assert_equal(<<-TABLE, table.to_s)
826
- name score
827
- (utf8) (int8)
828
- 0 alice 10
829
- 1 bob 29
830
- 2 chris -1
831
- TABLE
832
- end
833
-
834
- test("csv.gz") do
835
- file = Tempfile.new(["red-arrow", ".csv.gz"])
836
- file.close
837
- Zlib::GzipWriter.open(file.path) do |gz|
838
- gz.write(<<-CSV)
839
- name,score
840
- alice,10
841
- bob,29
842
- chris,-1
843
- CSV
844
- end
845
- assert_equal(<<-TABLE, Arrow::Table.load(file.path).to_s)
846
- name score
847
- (utf8) (int64)
848
- 0 alice 10
849
- 1 bob 29
850
- 2 chris -1
851
- TABLE
852
- end
853
-
854
- test("tsv") do
855
- file = Tempfile.new(["red-arrow", ".tsv"])
856
- file.puts(<<-TSV)
857
- name\tscore
858
- alice\t10
859
- bob\t29
860
- chris\t-1
861
- TSV
862
- file.close
863
- table = Arrow::Table.load(file.path)
864
- assert_equal(<<-TABLE, table.to_s)
865
- name score
866
- (utf8) (int64)
867
- 0 alice 10
868
- 1 bob 29
869
- 2 chris -1
870
- TABLE
871
- end
872
- end
873
- end
874
- end
875
-
876
- sub_test_case("Buffer") do
877
- sub_test_case(":format") do
878
- include SaveLoadFormatTests
879
-
880
- def create_output(extension)
881
- Arrow::ResizableBuffer.new(1024)
882
- end
883
- end
884
- end
885
-
886
- sub_test_case("URI") do
887
- def start_web_server(path, data, content_type)
888
- http_server = WEBrick::HTTPServer.new(:Port => 0)
889
- http_server.mount_proc(path) do |request, response|
890
- response.body = data
891
- response.content_type = content_type
892
- end
893
- http_server_thread = Thread.new do
894
- http_server.start
895
- end
896
- begin
897
- Timeout.timeout(1) do
898
- yield(http_server[:Port])
899
- end
900
- ensure
901
- http_server.shutdown
902
- http_server_thread.join
903
- end
904
- end
905
-
906
- data("Arrow File",
907
- ["arrow", "application/vnd.apache.arrow.file"])
908
- data("Arrow Stream",
909
- ["arrows", "application/vnd.apache.arrow.stream"])
910
- data("CSV",
911
- ["csv", "text/csv"])
912
- def test_http(data)
913
- extension, content_type = data
914
- output = Arrow::ResizableBuffer.new(1024)
915
- @table.save(output, format: extension.to_sym)
916
- path = "/data.#{extension}"
917
- start_web_server(path,
918
- output.data.to_s,
919
- content_type) do |port|
920
- input = URI("http://127.0.0.1:#{port}#{path}")
921
- loaded_table = Arrow::Table.load(input, schema: @table.schema)
922
- assert_equal(@table.to_s, loaded_table.to_s)
923
- end
924
- end
925
- end
926
-
927
- sub_test_case("GC") do
928
- def setup
929
- table = Arrow::Table.new(integer: [1, 2, 3],
930
- string: ["a", "b", "c"])
931
- @buffer = Arrow::ResizableBuffer.new(1024)
932
- table.save(@buffer, format: :arrow)
933
- @loaded_table = Arrow::Table.load(@buffer)
934
- end
935
-
936
- def test_chunked_array
937
- chunked_array = @loaded_table[0].data
938
- assert_equal(@buffer,
939
- chunked_array.instance_variable_get(:@input).buffer)
940
- end
941
-
942
- def test_array
943
- array = @loaded_table[0].data.chunks[0]
944
- assert_equal(@buffer,
945
- array.instance_variable_get(:@input).buffer)
946
- end
947
-
948
- def test_record_batch
949
- record_batch = @loaded_table.each_record_batch.first
950
- assert_equal(@buffer,
951
- record_batch.instance_variable_get(:@input).buffer)
952
- end
953
-
954
- def test_record_batch_array
955
- array = @loaded_table.each_record_batch.first[0].data
956
- assert_equal(@buffer,
957
- array.instance_variable_get(:@input).buffer)
958
- end
959
-
960
- def test_record_batch_table
961
- table = @loaded_table.each_record_batch.first.to_table
962
- assert_equal(@buffer,
963
- table.instance_variable_get(:@input).buffer)
964
- end
965
-
966
- def test_slice
967
- table = @loaded_table.slice(0..-1)
968
- assert_equal(@buffer,
969
- table.instance_variable_get(:@input).buffer)
970
- end
971
-
972
- def test_merge
973
- table = @loaded_table.merge({})
974
- assert_equal(@buffer,
975
- table.instance_variable_get(:@input).buffer)
976
- end
977
-
978
- def test_remove_column
979
- table = @loaded_table.remove_column(0)
980
- assert_equal(@buffer,
981
- table.instance_variable_get(:@input).buffer)
982
- end
983
-
984
- def test_pack
985
- table = @loaded_table.pack
986
- assert_equal(@buffer,
987
- table.instance_variable_get(:@input).buffer)
988
- end
989
-
990
- def test_join
991
- table = @loaded_table.join(@loaded_table, :integer)
992
- assert_equal(@buffer,
993
- table.instance_variable_get(:@input).buffer)
994
- end
995
- end
996
- end
997
-
998
- test("#pack") do
999
- packed_table = @table.pack
1000
- column_n_chunks = packed_table.columns.collect {|c| c.data.n_chunks}
1001
- assert_equal([[1, 1], <<-TABLE], [column_n_chunks, packed_table.to_s])
1002
- count visible
1003
- (uint8) (bool)
1004
- 0 1 true
1005
- 1 2 false
1006
- 2 4 (null)
1007
- 3 8 true
1008
- 4 16 true
1009
- 5 32 false
1010
- 6 64 (null)
1011
- 7 128 (null)
1012
- TABLE
1013
- end
1014
-
1015
- sub_test_case("#to_s") do
1016
- sub_test_case(":format") do
1017
- def setup
1018
- columns = {
1019
- "count" => Arrow::UInt8Array.new([1, 2]),
1020
- "visible" => Arrow::BooleanArray.new([true, false]),
1021
- }
1022
- @table = Arrow::Table.new(columns)
1023
- end
1024
-
1025
- test(":column") do
1026
- assert_equal(<<-TABLE, @table.to_s(format: :column))
1027
- count: uint8
1028
- visible: bool
1029
- ----
1030
- count:
1031
- [
1032
- [
1033
- 1,
1034
- 2
1035
- ]
1036
- ]
1037
- visible:
1038
- [
1039
- [
1040
- true,
1041
- false
1042
- ]
1043
- ]
1044
- TABLE
1045
- end
1046
-
1047
- test(":list") do
1048
- assert_equal(<<-TABLE, @table.to_s(format: :list))
1049
- ==================== 0 ====================
1050
- count(uint8): 1
1051
- visible(bool): true
1052
- ==================== 1 ====================
1053
- count(uint8): 2
1054
- visible(bool): false
1055
- TABLE
1056
- end
1057
-
1058
- test(":table") do
1059
- assert_equal(<<-TABLE, @table.to_s(format: :table))
1060
- count visible
1061
- (uint8) (bool)
1062
- 0 1 true
1063
- 1 2 false
1064
- TABLE
1065
- end
1066
-
1067
- test("invalid") do
1068
- message = ":format must be :column, :list, :table or nil: <:invalid>"
1069
- assert_raise(ArgumentError.new(message)) do
1070
- @table.to_s(format: :invalid)
1071
- end
1072
- end
1073
- end
1074
-
1075
- sub_test_case(":show_column_type") do
1076
- def setup
1077
- columns = {
1078
- "count" => Arrow::UInt8Array.new([1, 2]),
1079
- "visible" => Arrow::BooleanArray.new([true, false]),
1080
- }
1081
- @table = Arrow::Table.new(columns)
1082
- end
1083
-
1084
- test(":list") do
1085
- assert_equal(<<-TABLE, @table.to_s(format: :list, show_column_type: false))
1086
- ==================== 0 ====================
1087
- count: 1
1088
- visible: true
1089
- ==================== 1 ====================
1090
- count: 2
1091
- visible: false
1092
- TABLE
1093
- end
1094
-
1095
- test(":table") do
1096
- assert_equal(<<-TABLE, @table.to_s(format: :table, show_column_type: false))
1097
- count visible
1098
- 0 1 true
1099
- 1 2 false
1100
- TABLE
1101
- end
1102
- end
1103
-
1104
- sub_test_case("#==") do
1105
- test("Arrow::Table") do
1106
- assert do
1107
- @table == @table
1108
- end
1109
- end
1110
-
1111
- test("not Arrow::Table") do
1112
- assert do
1113
- not (@table == 29)
1114
- end
1115
- end
1116
- end
1117
- end
1118
-
1119
- sub_test_case("#filter") do
1120
- def setup
1121
- super
1122
- @options = Arrow::FilterOptions.new
1123
- @options.null_selection_behavior = :emit_null
1124
- end
1125
-
1126
- test("Array: boolean") do
1127
- filter = [nil, true, true, false, true, false, true, true]
1128
- assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
1129
- count visible
1130
- (uint8) (bool)
1131
- 0 (null) (null)
1132
- 1 2 false
1133
- 2 4 (null)
1134
- 3 16 true
1135
- 4 64 (null)
1136
- 5 128 (null)
1137
- TABLE
1138
- end
1139
-
1140
- test("Arrow::BooleanArray") do
1141
- array = [nil, true, true, false, true, false, true, true]
1142
- filter = Arrow::BooleanArray.new(array)
1143
- assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
1144
- count visible
1145
- (uint8) (bool)
1146
- 0 (null) (null)
1147
- 1 2 false
1148
- 2 4 (null)
1149
- 3 16 true
1150
- 4 64 (null)
1151
- 5 128 (null)
1152
- TABLE
1153
- end
1154
-
1155
- test("Arrow::ChunkedArray") do
1156
- filter_chunks = [
1157
- Arrow::BooleanArray.new([nil, true, true]),
1158
- Arrow::BooleanArray.new([false, true, false]),
1159
- Arrow::BooleanArray.new([true, true]),
1160
- ]
1161
- filter = Arrow::ChunkedArray.new(filter_chunks)
1162
- assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
1163
- count visible
1164
- (uint8) (bool)
1165
- 0 (null) (null)
1166
- 1 2 false
1167
- 2 4 (null)
1168
- 3 16 true
1169
- 4 64 (null)
1170
- 5 128 (null)
1171
- TABLE
1172
- end
1173
- end
1174
-
1175
- sub_test_case("#take") do
1176
- test("Arrow: boolean") do
1177
- indices = [1, 0, 2]
1178
- assert_equal(<<-TABLE, @table.take(indices).to_s)
1179
- count visible
1180
- (uint8) (bool)
1181
- 0 2 false
1182
- 1 1 true
1183
- 2 4 (null)
1184
- TABLE
1185
- end
1186
-
1187
- test("Arrow::Array") do
1188
- indices = Arrow::Int16Array.new([1, 0, 2])
1189
- assert_equal(<<-TABLE, @table.take(indices).to_s)
1190
- count visible
1191
- (uint8) (bool)
1192
- 0 2 false
1193
- 1 1 true
1194
- 2 4 (null)
1195
- TABLE
1196
- end
1197
-
1198
- test("Arrow::ChunkedArray") do
1199
- chunks = [
1200
- Arrow::Int16Array.new([1, 0]),
1201
- Arrow::Int16Array.new([2])
1202
- ]
1203
- indices = Arrow::ChunkedArray.new(chunks)
1204
- assert_equal(<<-TABLE, @table.take(indices).to_s)
1205
- count visible
1206
- (uint8) (bool)
1207
- 0 2 false
1208
- 1 1 true
1209
- 2 4 (null)
1210
- TABLE
1211
- end
1212
- end
1213
-
1214
- sub_test_case("#concatenate") do
1215
- test("options: :unify_schemas") do
1216
- table1 = Arrow::Table.new(a: [true],
1217
- b: [false])
1218
- table2 = Arrow::Table.new(b: [false])
1219
- concatenated = table1.concatenate([table2], unify_schemas: true)
1220
- assert_equal(<<-TABLE, concatenated.to_s)
1221
- a b
1222
- (bool) (bool)
1223
- 0 true false
1224
- 1 (null) false
1225
- TABLE
1226
- end
1227
- end
1228
-
1229
- sub_test_case("#join") do
1230
- test("keys: nil (natural join)") do
1231
- table1 = Arrow::Table.new(key: [1, 2, 3],
1232
- number: [10, 20, 30])
1233
- table2 = Arrow::Table.new(key: [3, 1],
1234
- string: ["three", "one"])
1235
- assert_equal(Arrow::Table.new([
1236
- ["key", [1, 3]],
1237
- ["number", [10, 30]],
1238
- ["string", ["one", "three"]],
1239
- ]),
1240
- table1.join(table2))
1241
- end
1242
-
1243
- test("keys: String") do
1244
- table1 = Arrow::Table.new(key: [1, 2, 3],
1245
- number: [10, 20, 30])
1246
- table2 = Arrow::Table.new(key: [3, 1],
1247
- string: ["three", "one"])
1248
- assert_equal(Arrow::Table.new([
1249
- ["key", [1, 3]],
1250
- ["number", [10, 30]],
1251
- ["string", ["one", "three"]],
1252
- ]),
1253
- table1.join(table2, "key"))
1254
- end
1255
-
1256
- test("keys: Symbol") do
1257
- table1 = Arrow::Table.new(key: [1, 2, 3],
1258
- number: [10, 20, 30])
1259
- table2 = Arrow::Table.new(key: [3, 1],
1260
- string: ["three", "one"])
1261
- assert_equal(Arrow::Table.new([
1262
- ["key", [1, 3]],
1263
- ["number", [10, 30]],
1264
- ["string", ["one", "three"]],
1265
- ]),
1266
- table1.join(table2, :key))
1267
- end
1268
-
1269
- test("keys: [String]") do
1270
- table1 = Arrow::Table.new(key: [1, 2, 3],
1271
- number: [10, 20, 30])
1272
- table2 = Arrow::Table.new(key: [3, 1],
1273
- string: ["three", "one"])
1274
- assert_equal(Arrow::Table.new([
1275
- ["key", [1, 3]],
1276
- ["number", [10, 30]],
1277
- ["key", [1, 3]],
1278
- ["string", ["one", "three"]],
1279
- ]),
1280
- table1.join(table2, ["key"]))
1281
- end
1282
-
1283
- test("keys: [String, Symbol]") do
1284
- table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1285
- key2: [10, 100, 20, 200],
1286
- number: [1010, 1100, 2020, 2200])
1287
- table2 = Arrow::Table.new(key1: [1, 2, 2],
1288
- key2: [100, 20, 50],
1289
- string: ["1-100", "2-20", "2-50"])
1290
- assert_equal(Arrow::Table.new([
1291
- ["key1", [1, 2]],
1292
- ["key2", [100, 20]],
1293
- ["number", [1100, 2020]],
1294
- ["key1", [1, 2]],
1295
- ["key2", [100, 20]],
1296
- ["string", ["1-100", "2-20"]],
1297
- ]),
1298
- table1.join(table2, ["key1", :key2]))
1299
- end
1300
-
1301
- test("keys: {left: String, right: Symbol}") do
1302
- table1 = Arrow::Table.new(left_key: [1, 2, 3],
1303
- number: [10, 20, 30])
1304
- table2 = Arrow::Table.new(right_key: [3, 1],
1305
- string: ["three", "one"])
1306
- assert_equal(Arrow::Table.new([
1307
- ["left_key", [1, 3]],
1308
- ["number", [10, 30]],
1309
- ["right_key", [1, 3]],
1310
- ["string", ["one", "three"]],
1311
- ]),
1312
- table1.join(table2,
1313
- {left: "left_key", right: :right_key},
1314
- type: :inner))
1315
- end
1316
-
1317
- test("keys: {left: [String, Symbol], right: [Symbol, String]}") do
1318
- table1 = Arrow::Table.new(left_key1: [1, 1, 2, 2],
1319
- left_key2: [10, 100, 20, 200],
1320
- number: [1010, 1100, 2020, 2200])
1321
- table2 = Arrow::Table.new(right_key1: [1, 2, 2],
1322
- right_key2: [100, 20, 50],
1323
- string: ["1-100", "2-20", "2-50"])
1324
- assert_equal(Arrow::Table.new([
1325
- ["left_key1", [1, 2]],
1326
- ["left_key2", [100, 20]],
1327
- ["number", [1100, 2020]],
1328
- ["right_key1", [1, 2]],
1329
- ["right_key2", [100, 20]],
1330
- ["string", ["1-100", "2-20"]],
1331
- ]),
1332
- table1.join(table2,
1333
- {
1334
- left: ["left_key1", :left_key2],
1335
- right: [:right_key1, "right_key2"],
1336
- },
1337
- type: :inner))
1338
- end
1339
-
1340
- test("type: :left_outer") do
1341
- table1 = Arrow::Table.new(key: [1, 2, 3],
1342
- number: [10, 20, 30])
1343
- table2 = Arrow::Table.new(key: [3, 1],
1344
- string: ["three", "one"])
1345
- assert_equal(Arrow::Table.new([
1346
- ["key", [1, 3, 2]],
1347
- ["number", [10, 30, 20]],
1348
- ["string", ["one", "three", nil]],
1349
- ]),
1350
- table1.join(table2, "key", type: :left_outer))
1351
- end
1352
-
1353
- test("type: :right_outer") do
1354
- table1 = Arrow::Table.new(key: [1, 2, 3],
1355
- number: [10, 20, 30])
1356
- table2 = Arrow::Table.new(key: [3, 1],
1357
- string: ["three", "one"])
1358
- assert_equal(Arrow::Table.new([
1359
- ["key", [1, 3]],
1360
- ["number", [10, 30]],
1361
- ["string", ["one", "three"]],
1362
- ]),
1363
- table1.join(table2, "key", type: :right_outer))
1364
- end
1365
-
1366
- test("type: :full_outer") do
1367
- table1 = Arrow::Table.new(key: [1, 2, 3],
1368
- number: [10, 20, 30])
1369
- table2 = Arrow::Table.new(key: [3, 1],
1370
- string: ["three", "one"])
1371
- assert_equal(Arrow::Table.new([
1372
- ["key", [1, 3, 2]],
1373
- ["number", [10, 30, 20]],
1374
- ["string", ["one", "three", nil]],
1375
- ]),
1376
- table1.join(table2, "key", type: :full_outer))
1377
- end
1378
-
1379
- test("type: :left_semi") do
1380
- table1 = Arrow::Table.new(key: [1, 2, 3],
1381
- number: [10, 20, 30])
1382
- table2 = Arrow::Table.new(key: [3, 1],
1383
- string: ["three", "one"])
1384
- assert_equal(Arrow::Table.new([
1385
- ["key", [1, 3]],
1386
- ["number", [10, 30]],
1387
- ]),
1388
- table1.join(table2, "key", type: :left_semi))
1389
- end
1390
-
1391
- test("type: :right_semi") do
1392
- table1 = Arrow::Table.new(key: [1, 2, 3],
1393
- number: [10, 20, 30])
1394
- table2 = Arrow::Table.new(key: [3, 1],
1395
- string: ["three", "one"])
1396
- assert_equal(Arrow::Table.new([
1397
- ["key", [3, 1]],
1398
- ["string", ["three", "one"]],
1399
- ]),
1400
- table1.join(table2, "key", type: :right_semi))
1401
- end
1402
-
1403
- test("type: :left_anti") do
1404
- table1 = Arrow::Table.new(key: [1, 2, 3],
1405
- number: [10, 20, 30])
1406
- table2 = Arrow::Table.new(key: [3, 1],
1407
- string: ["three", "one"])
1408
- assert_equal(Arrow::Table.new([
1409
- ["key", [2]],
1410
- ["number", [20]],
1411
- ]),
1412
- table1.join(table2, "key", type: :left_anti))
1413
- end
1414
-
1415
- test("type: :right_anti") do
1416
- table1 = Arrow::Table.new(key: [1, 2, 3],
1417
- number: [10, 20, 30])
1418
- table2 = Arrow::Table.new(key: [3, 1],
1419
- string: ["three", "one"])
1420
- assert_equal(Arrow::Table.new([
1421
- ["key", Arrow::ChunkedArray.new(:uint8)],
1422
- ["string", Arrow::ChunkedArray.new(:string)],
1423
- ]),
1424
- table1.join(table2, "key", type: :right_anti))
1425
- end
1426
-
1427
- test("left_outputs: & right_outputs:") do
1428
- table1 = Arrow::Table.new(key: [1, 2, 3],
1429
- number: [10, 20, 30])
1430
- table2 = Arrow::Table.new(key: [3, 1],
1431
- string: ["three", "one"])
1432
- assert_equal(Arrow::Table.new(key: [1, 3],
1433
- number: [10, 30],
1434
- string: ["one", "three"]),
1435
- table1.join(table2,
1436
- "key",
1437
- left_outputs: ["key", "number"],
1438
- right_outputs: ["string"]))
1439
- end
1440
-
1441
- test("left_outputs: & type: :inner") do
1442
- table1 = Arrow::Table.new(key: [1, 2, 3],
1443
- number: [10, 20, 30])
1444
- table2 = Arrow::Table.new(key: [3, 1],
1445
- string: ["three", "one"])
1446
- assert_equal(Arrow::Table.new([
1447
- ["key", [1, 3]],
1448
- ["number", [10, 30]],
1449
- ["key", [1, 3]],
1450
- ["string", ["one", "three"]]
1451
- ]),
1452
- table1.join(table2,
1453
- type: :inner,
1454
- left_outputs: table1.column_names,
1455
- right_outputs: table2.column_names))
1456
- end
1457
-
1458
- test("left_outputs: & type: :left_outer") do
1459
- table1 = Arrow::Table.new(key: [1, 2, 3],
1460
- number: [10, 20, 30])
1461
- table2 = Arrow::Table.new(key: [3, 1],
1462
- string: ["three", "one"])
1463
- assert_equal(Arrow::Table.new([
1464
- ["key", [1, 3, 2]],
1465
- ["number", [10, 30, 20]],
1466
- ["key", [1, 3, nil]],
1467
- ["string", ["one", "three", nil]],
1468
- ]),
1469
- table1.join(table2,
1470
- type: :left_outer,
1471
- left_outputs: table1.column_names,
1472
- right_outputs: table2.column_names))
1473
- end
1474
-
1475
- test("left_outputs: & type: :right_outer") do
1476
- table1 = Arrow::Table.new(key: [1, 2, 3],
1477
- number: [10, 20, 30])
1478
- table2 = Arrow::Table.new(key: [3, 1],
1479
- string: ["three", "one"])
1480
- assert_equal(Arrow::Table.new([
1481
- ["key", [1, 3]],
1482
- ["number", [10, 30]],
1483
- ["key", [1, 3]],
1484
- ["string", ["one", "three"]],
1485
- ]),
1486
- table1.join(table2,
1487
- type: :right_outer,
1488
- left_outputs: table1.column_names,
1489
- right_outputs: table2.column_names))
1490
- end
1491
-
1492
- test("left_outputs: & type: :full_outer") do
1493
- table1 = Arrow::Table.new(key: [1, 2, 3],
1494
- number: [10, 20, 30])
1495
- table2 = Arrow::Table.new(key: [3, 1],
1496
- string: ["three", "one"])
1497
- assert_equal(Arrow::Table.new([
1498
- ["key", [1, 3, 2]],
1499
- ["number", [10, 30, 20]],
1500
- ["key", [1, 3, nil]],
1501
- ["string", ["one", "three", nil]],
1502
- ]),
1503
- table1.join(table2,
1504
- type: :full_outer,
1505
- left_outputs: table1.column_names,
1506
- right_outputs: table2.column_names))
1507
- end
1508
-
1509
- test("left_suffix: & keys: [String]") do
1510
- table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1511
- key2: [10, 100, 20, 200],
1512
- number: [1010, 1100, 2020, 2200])
1513
- table2 = Arrow::Table.new(key1: [1, 2, 2],
1514
- key2: [100, 20, 50],
1515
- string: ["1-100", "2-20", "2-50"])
1516
- assert_equal(Arrow::Table.new([
1517
- ["key1_left", [1, 2]],
1518
- ["key2_left", [100, 20]],
1519
- ["number", [1100, 2020]],
1520
- ["key1_right", [1, 2]],
1521
- ["key2_right", [100, 20]],
1522
- ["string", ["1-100", "2-20"]],
1523
- ]),
1524
- table1.join(table2,
1525
- ["key1", "key2"],
1526
- left_suffix: "_left",
1527
- right_suffix: "_right"))
1528
- end
1529
- end
1530
- end