red-arrow 18.1.0 → 19.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
data/test/test-table.rb DELETED
@@ -1,1530 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class TableTest < Test::Unit::TestCase
19
- include Helper::Fixture
20
-
21
- def setup
22
- @count_field = Arrow::Field.new("count", :uint8)
23
- @visible_field = Arrow::Field.new("visible", :boolean)
24
- schema = Arrow::Schema.new([@count_field, @visible_field])
25
- count_arrays = [
26
- Arrow::UInt8Array.new([1, 2]),
27
- Arrow::UInt8Array.new([4, 8, 16]),
28
- Arrow::UInt8Array.new([32, 64]),
29
- Arrow::UInt8Array.new([128]),
30
- ]
31
- visible_arrays = [
32
- Arrow::BooleanArray.new([true, false, nil]),
33
- Arrow::BooleanArray.new([true]),
34
- Arrow::BooleanArray.new([true, false]),
35
- Arrow::BooleanArray.new([nil]),
36
- Arrow::BooleanArray.new([nil]),
37
- ]
38
- @count_array = Arrow::ChunkedArray.new(count_arrays)
39
- @visible_array = Arrow::ChunkedArray.new(visible_arrays)
40
- @table = Arrow::Table.new(schema, [@count_array, @visible_array])
41
- end
42
-
43
- sub_test_case(".new") do
44
- test("{Symbol: Arrow::Array}") do
45
- schema = Arrow::Schema.new(numbers: :int64)
46
- assert_equal(Arrow::Table.new(schema,
47
- [Arrow::Int64Array.new([1, 2, 3])]),
48
- Arrow::Table.new(numbers: Arrow::Int64Array.new([1, 2, 3])))
49
- end
50
-
51
- test("{Symbol: Arrow::ChunkedArray}") do
52
- chunked_array = Arrow::ChunkedArray.new([Arrow::Int64Array.new([1, 2, 3])])
53
- schema = Arrow::Schema.new(numbers: :int64)
54
- assert_equal(Arrow::Table.new(schema,
55
- [Arrow::Int64Array.new([1, 2, 3])]),
56
- Arrow::Table.new(numbers: chunked_array))
57
- end
58
-
59
- test("{Symbol: Arrow::Tensor}") do
60
- schema = Arrow::Schema.new(numbers: :uint8)
61
- assert_equal(Arrow::Table.new(schema,
62
- [Arrow::UInt8Array.new([1, 2, 3])]),
63
- Arrow::Table.new(numbers: Arrow::Tensor.new([1, 2, 3])))
64
- end
65
-
66
- test("{Symbol: #to_ary}") do
67
- array_like = Object.new
68
- def array_like.to_ary
69
- [1, 2, 3]
70
- end
71
- schema = Arrow::Schema.new(numbers: :uint8)
72
- assert_equal(Arrow::Table.new(schema, [Arrow::UInt8Array.new([1, 2, 3])]),
73
- Arrow::Table.new(numbers: array_like))
74
- end
75
- end
76
-
77
- test("#columns") do
78
- assert_equal([
79
- Arrow::Column.new(@table, 0),
80
- Arrow::Column.new(@table, 1),
81
- ],
82
- @table.columns)
83
- end
84
-
85
- sub_test_case("#slice") do
86
- test("Arrow::BooleanArray") do
87
- target_rows_raw = [nil, true, true, false, true, false, true, true]
88
- target_rows = Arrow::BooleanArray.new(target_rows_raw)
89
- assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
90
- count visible
91
- (uint8) (bool)
92
- 0 2 false
93
- 1 4 (null)
94
- 2 16 true
95
- 3 64 (null)
96
- 4 128 (null)
97
- TABLE
98
- end
99
-
100
- test("Array: boolean") do
101
- target_rows_raw = [nil, true, true, false, true, false, true, true]
102
- assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
103
- count visible
104
- (uint8) (bool)
105
- 0 2 false
106
- 1 4 (null)
107
- 2 16 true
108
- 3 64 (null)
109
- 4 128 (null)
110
- TABLE
111
- end
112
-
113
- test("Integer: positive") do
114
- assert_equal({"count" => 128, "visible" => nil},
115
- @table.slice(@table.n_rows - 1).to_h)
116
- end
117
-
118
- test("Integer: negative") do
119
- assert_equal({"count" => 1, "visible" => true},
120
- @table.slice(-@table.n_rows).to_h)
121
- end
122
-
123
- test("Integer: out of index") do
124
- assert_equal([
125
- nil,
126
- nil,
127
- ],
128
- [
129
- @table.slice(@table.n_rows),
130
- @table.slice(-(@table.n_rows + 1)),
131
- ])
132
- end
133
-
134
- test("Range: positive: include end") do
135
- assert_equal(<<-TABLE, @table.slice(2..4).to_s)
136
- count visible
137
- (uint8) (bool)
138
- 0 4 (null)
139
- 1 8 true
140
- 2 16 true
141
- TABLE
142
- end
143
-
144
- test("Range: positive: exclude end") do
145
- assert_equal(<<-TABLE, @table.slice(2...4).to_s)
146
- count visible
147
- (uint8) (bool)
148
- 0 4 (null)
149
- 1 8 true
150
- TABLE
151
- end
152
-
153
- test("Range: negative: include end") do
154
- assert_equal(<<-TABLE, @table.slice(-4..-2).to_s)
155
- count visible
156
- (uint8) (bool)
157
- 0 16 true
158
- 1 32 false
159
- 2 64 (null)
160
- TABLE
161
- end
162
-
163
- test("Range: negative: exclude end") do
164
- assert_equal(<<-TABLE, @table.slice(-4...-2).to_s)
165
- count visible
166
- (uint8) (bool)
167
- 0 16 true
168
- 1 32 false
169
- TABLE
170
- end
171
-
172
- test("[from, to]: positive") do
173
- assert_equal(<<-TABLE, @table.slice(0, 2).to_s)
174
- count visible
175
- (uint8) (bool)
176
- 0 1 true
177
- 1 2 false
178
- TABLE
179
- end
180
-
181
- test("[from, to]: negative") do
182
- assert_equal(<<-TABLE, @table.slice(-4, 2).to_s)
183
- count visible
184
- (uint8) (bool)
185
- 0 16 true
186
- 1 32 false
187
- TABLE
188
- end
189
-
190
- test("{key: Number}") do
191
- assert_equal(<<-TABLE, @table.slice(count: 16).to_s)
192
- count visible
193
- (uint8) (bool)
194
- 0 16 true
195
- TABLE
196
- end
197
-
198
- test("{key: String}") do
199
- table = Arrow::Table.new(name: Arrow::StringArray.new(["a", "b", "c"]))
200
- assert_equal(<<-TABLE, table.slice(name: 'b').to_s)
201
- name
202
- (utf8)
203
- 0 b
204
- TABLE
205
- end
206
-
207
- test("{key: true}") do
208
- assert_equal(<<-TABLE, @table.slice(visible: true).to_s)
209
- count visible
210
- (uint8) (bool)
211
- 0 1 true
212
- 1 8 true
213
- 2 16 true
214
- TABLE
215
- end
216
-
217
- test("{key: false}") do
218
- assert_equal(<<-TABLE, @table.slice(visible: false).to_s)
219
- count visible
220
- (uint8) (bool)
221
- 0 2 false
222
- 1 32 false
223
- TABLE
224
- end
225
-
226
- test("{key: Range}: beginless include end") do
227
- begin
228
- range = eval("..8")
229
- rescue SyntaxError
230
- omit("beginless range isn't supported")
231
- end
232
- assert_equal(<<-TABLE, @table.slice(count: range).to_s)
233
- count visible
234
- (uint8) (bool)
235
- 0 1 true
236
- 1 2 false
237
- 2 4 (null)
238
- 3 8 true
239
- TABLE
240
- end
241
-
242
- test("{key: Range}: beginless exclude end") do
243
- begin
244
- range = eval("...8")
245
- rescue SyntaxError
246
- omit("beginless range isn't supported")
247
- end
248
- assert_equal(<<-TABLE, @table.slice(count: range).to_s)
249
- count visible
250
- (uint8) (bool)
251
- 0 1 true
252
- 1 2 false
253
- 2 4 (null)
254
- TABLE
255
- end
256
-
257
- test("{key: Range}: endless") do
258
- begin
259
- range = eval("16..")
260
- rescue SyntaxError
261
- omit("endless range isn't supported")
262
- end
263
- assert_equal(<<-TABLE, @table.slice(count: range).to_s)
264
- count visible
265
- (uint8) (bool)
266
- 0 16 true
267
- 1 32 false
268
- 2 64 (null)
269
- 3 128 (null)
270
- TABLE
271
- end
272
-
273
- test("{key: Range}: include end") do
274
- assert_equal(<<-TABLE, @table.slice(count: 1..16).to_s)
275
- count visible
276
- (uint8) (bool)
277
- 0 1 true
278
- 1 2 false
279
- 2 4 (null)
280
- 3 8 true
281
- 4 16 true
282
- TABLE
283
- end
284
-
285
- test("{key: Range}: exclude end") do
286
- assert_equal(<<-TABLE, @table.slice(count: 1...16).to_s)
287
- count visible
288
- (uint8) (bool)
289
- 0 1 true
290
- 1 2 false
291
- 2 4 (null)
292
- 3 8 true
293
- TABLE
294
- end
295
-
296
- test("{key1: Range, key2: true}") do
297
- assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s)
298
- count visible
299
- (uint8) (bool)
300
- 0 2 false
301
- TABLE
302
- end
303
-
304
- sub_test_case("wrong argument") do
305
- test("no arguments") do
306
- message = "wrong number of arguments (given 0, expected 1..2)"
307
- assert_raise(ArgumentError.new(message)) do
308
- @table.slice
309
- end
310
- end
311
-
312
- test("too many arguments") do
313
- message = "wrong number of arguments (given 3, expected 1..2)"
314
- assert_raise(ArgumentError.new(message)) do
315
- @table.slice(1, 2, 3)
316
- end
317
- end
318
-
319
- test("arguments: with block") do
320
- message = "must not specify both arguments and block"
321
- assert_raise(ArgumentError.new(message)) do
322
- @table.slice(1, 2) {}
323
- end
324
- end
325
-
326
- test("offset: too small") do
327
- n_rows = @table.n_rows
328
- offset = -(n_rows + 1)
329
- message = "offset is out of range (-#{n_rows + 1},#{n_rows}): #{offset}"
330
- assert_raise(ArgumentError.new(message)) do
331
- @table.slice(offset, 1)
332
- end
333
- end
334
-
335
- test("offset: too large") do
336
- n_rows = @table.n_rows
337
- offset = n_rows
338
- message = "offset is out of range (-#{n_rows + 1},#{n_rows}): #{offset}"
339
- assert_raise(ArgumentError.new(message)) do
340
- @table.slice(offset, 1)
341
- end
342
- end
343
- end
344
- end
345
-
346
- sub_test_case("#[]") do
347
- def setup
348
- @table = Arrow::Table.new(a: [true],
349
- b: [true],
350
- c: [true],
351
- d: [true],
352
- e: [true],
353
- f: [true],
354
- g: [true])
355
- end
356
-
357
- test("[String]") do
358
- assert_equal(Arrow::Column.new(@table, 0),
359
- @table["a"])
360
- end
361
-
362
- test("[Symbol]") do
363
- assert_equal(Arrow::Column.new(@table, 1),
364
- @table[:b])
365
- end
366
-
367
- test("[Integer]") do
368
- assert_equal(Arrow::Column.new(@table, 6),
369
- @table[-1])
370
- end
371
-
372
- test("[Range]") do
373
- assert_equal(Arrow::Table.new(d: [true],
374
- e: [true]),
375
- @table[3..4])
376
- end
377
-
378
- test("[[Symbol, String, Integer, Range]]") do
379
- assert_equal(Arrow::Table.new(c: [true],
380
- a: [true],
381
- g: [true],
382
- d: [true],
383
- e: [true]),
384
- @table[[:c, "a", -1, 3..4]])
385
- end
386
- end
387
-
388
- sub_test_case("#merge") do
389
- sub_test_case("Hash") do
390
- test("add") do
391
- name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"])
392
- assert_equal(<<-TABLE, @table.merge(:name => name_array).to_s)
393
- count visible name
394
- (uint8) (bool) (utf8)
395
- 0 1 true a
396
- 1 2 false b
397
- 2 4 (null) c
398
- 3 8 true d
399
- 4 16 true e
400
- 5 32 false f
401
- 6 64 (null) g
402
- 7 128 (null) h
403
- TABLE
404
- end
405
-
406
- test("remove") do
407
- assert_equal(<<-TABLE, @table.merge(:visible => nil).to_s)
408
- count
409
- (uint8)
410
- 0 1
411
- 1 2
412
- 2 4
413
- 3 8
414
- 4 16
415
- 5 32
416
- 6 64
417
- 7 128
418
- TABLE
419
- end
420
-
421
- test("replace") do
422
- visible_array = Arrow::Int32Array.new([1] * @visible_array.length)
423
- assert_equal(<<-TABLE, @table.merge(:visible => visible_array).to_s)
424
- count visible
425
- (uint8) (int32)
426
- 0 1 1
427
- 1 2 1
428
- 2 4 1
429
- 3 8 1
430
- 4 16 1
431
- 5 32 1
432
- 6 64 1
433
- 7 128 1
434
- TABLE
435
- end
436
- end
437
-
438
- sub_test_case("Arrow::Table") do
439
- test("add") do
440
- name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"])
441
- table = Arrow::Table.new("name" => name_array)
442
- assert_equal(<<-TABLE, @table.merge(table).to_s)
443
- count visible name
444
- (uint8) (bool) (utf8)
445
- 0 1 true a
446
- 1 2 false b
447
- 2 4 (null) c
448
- 3 8 true d
449
- 4 16 true e
450
- 5 32 false f
451
- 6 64 (null) g
452
- 7 128 (null) h
453
- TABLE
454
- end
455
-
456
- test("replace") do
457
- visible_array = Arrow::Int32Array.new([1] * @visible_array.length)
458
- table = Arrow::Table.new("visible" => visible_array)
459
- assert_equal(<<-TABLE, @table.merge(table).to_s)
460
- count visible
461
- (uint8) (int32)
462
- 0 1 1
463
- 1 2 1
464
- 2 4 1
465
- 3 8 1
466
- 4 16 1
467
- 5 32 1
468
- 6 64 1
469
- 7 128 1
470
- TABLE
471
- end
472
- end
473
- end
474
-
475
- test("column name getter") do
476
- assert_equal(Arrow::Column.new(@table, 1),
477
- @table.visible)
478
- end
479
-
480
- sub_test_case("#remove_column") do
481
- test("String") do
482
- assert_equal(<<-TABLE, @table.remove_column("visible").to_s)
483
- count
484
- (uint8)
485
- 0 1
486
- 1 2
487
- 2 4
488
- 3 8
489
- 4 16
490
- 5 32
491
- 6 64
492
- 7 128
493
- TABLE
494
- end
495
-
496
- test("Symbol") do
497
- assert_equal(<<-TABLE, @table.remove_column(:visible).to_s)
498
- count
499
- (uint8)
500
- 0 1
501
- 1 2
502
- 2 4
503
- 3 8
504
- 4 16
505
- 5 32
506
- 6 64
507
- 7 128
508
- TABLE
509
- end
510
-
511
- test("unknown column name") do
512
- assert_raise(KeyError) do
513
- @table.remove_column(:nonexistent)
514
- end
515
- end
516
-
517
- test("Integer") do
518
- assert_equal(<<-TABLE, @table.remove_column(1).to_s)
519
- count
520
- (uint8)
521
- 0 1
522
- 1 2
523
- 2 4
524
- 3 8
525
- 4 16
526
- 5 32
527
- 6 64
528
- 7 128
529
- TABLE
530
- end
531
-
532
- test("negative integer") do
533
- assert_equal(<<-TABLE, @table.remove_column(-1).to_s)
534
- count
535
- (uint8)
536
- 0 1
537
- 1 2
538
- 2 4
539
- 3 8
540
- 4 16
541
- 5 32
542
- 6 64
543
- 7 128
544
- TABLE
545
- end
546
-
547
- test("too small index") do
548
- assert_raise(IndexError) do
549
- @table.remove_column(-3)
550
- end
551
- end
552
-
553
- test("too large index") do
554
- assert_raise(IndexError) do
555
- @table.remove_column(2)
556
- end
557
- end
558
- end
559
-
560
- sub_test_case("#select_columns") do
561
- def setup
562
- raw_table = {
563
- :a => Arrow::UInt8Array.new([1]),
564
- :b => Arrow::UInt8Array.new([1]),
565
- :c => Arrow::UInt8Array.new([1]),
566
- :d => Arrow::UInt8Array.new([1]),
567
- :e => Arrow::UInt8Array.new([1]),
568
- }
569
- @table = Arrow::Table.new(raw_table)
570
- end
571
-
572
- test("names") do
573
- assert_equal(<<-TABLE, @table.select_columns(:c, :a).to_s)
574
- c a
575
- (uint8) (uint8)
576
- 0 1 1
577
- TABLE
578
- end
579
-
580
- test("range") do
581
- assert_equal(<<-TABLE, @table.select_columns(2...4).to_s)
582
- c d
583
- (uint8) (uint8)
584
- 0 1 1
585
- TABLE
586
- end
587
-
588
- test("indexes") do
589
- assert_equal(<<-TABLE, @table.select_columns(0, -1, 2).to_s)
590
- a e c
591
- (uint8) (uint8) (uint8)
592
- 0 1 1 1
593
- TABLE
594
- end
595
-
596
- test("mixed") do
597
- assert_equal(<<-TABLE, @table.select_columns(:a, -1, 2..3).to_s)
598
- a e c d
599
- (uint8) (uint8) (uint8) (uint8)
600
- 0 1 1 1 1
601
- TABLE
602
- end
603
-
604
- test("block") do
605
- selected_table = @table.select_columns.with_index do |column, i|
606
- column.name == "a" or i.odd?
607
- end
608
- assert_equal(<<-TABLE, selected_table.to_s)
609
- a b d
610
- (uint8) (uint8) (uint8)
611
- 0 1 1 1
612
- TABLE
613
- end
614
-
615
- test("names, indexes and block") do
616
- selected_table = @table.select_columns(:a, -1) do |column|
617
- column.name == "a"
618
- end
619
- assert_equal(<<-TABLE, selected_table.to_s)
620
- a
621
- (uint8)
622
- 0 1
623
- TABLE
624
- end
625
-
626
- test("empty result") do
627
- selected_table = @table.filter([false] * @table.size).select_columns(:a)
628
- assert_equal(<<-TABLE, selected_table.to_s)
629
- a
630
- (uint8)
631
- TABLE
632
- end
633
- end
634
-
635
- sub_test_case("#column_names") do
636
- test("unique") do
637
- table = Arrow::Table.new(a: [1], b: [2], c: [3])
638
- assert_equal(%w[a b c], table.column_names)
639
- end
640
-
641
- test("duplicated") do
642
- table = Arrow::Table.new([["a", [1, 2, 3]], ["a", [4, 5, 6]]])
643
- assert_equal(%w[a a], table.column_names)
644
- end
645
- end
646
-
647
- sub_test_case("#save and .load") do
648
- module SaveLoadFormatTests
649
- def test_default
650
- output = create_output(".arrow")
651
- @table.save(output)
652
- assert_equal(@table, Arrow::Table.load(output))
653
- end
654
-
655
- def test_arrow_file
656
- output = create_output(".arrow")
657
- @table.save(output, format: :arrow_file)
658
- assert_equal(@table, Arrow::Table.load(output, format: :arrow_file))
659
- end
660
-
661
- def test_batch
662
- output = create_output(".arrow")
663
- @table.save(output, format: :batch)
664
- assert_equal(@table, Arrow::Table.load(output, format: :batch))
665
- end
666
-
667
- def test_arrows
668
- output = create_output(".arrows")
669
- @table.save(output, format: :arrows)
670
- assert_equal(@table, Arrow::Table.load(output, format: :arrows))
671
- end
672
-
673
- def test_arrow_streaming
674
- output = create_output(".arrows")
675
- @table.save(output, format: :arrow_streaming)
676
- assert_equal(@table, Arrow::Table.load(output, format: :arrow_streaming))
677
- end
678
-
679
- def test_stream
680
- output = create_output(".arrows")
681
- @table.save(output, format: :stream)
682
- assert_equal(@table, Arrow::Table.load(output, format: :stream))
683
- end
684
-
685
- def test_csv
686
- output = create_output(".csv")
687
- @table.save(output, format: :csv)
688
- assert_equal(@table,
689
- Arrow::Table.load(output,
690
- format: :csv,
691
- schema: @table.schema))
692
- end
693
-
694
- def test_csv_gz
695
- output = create_output(".csv.gz")
696
- @table.save(output,
697
- format: :csv,
698
- compression: :gzip)
699
- assert_equal(@table,
700
- Arrow::Table.load(output,
701
- format: :csv,
702
- compression: :gzip,
703
- schema: @table.schema))
704
- end
705
-
706
- def test_tsv
707
- output = create_output(".tsv")
708
- @table.save(output, format: :tsv)
709
- assert_equal(@table,
710
- Arrow::Table.load(output,
711
- format: :tsv,
712
- schema: @table.schema))
713
- end
714
-
715
- def test_json
716
- output = create_output(".json")
717
- # TODO: Implement this.
718
- # @table.save(output, format: :json)
719
- columns = +""
720
- @table.each_record.each do |record|
721
- column = {
722
- "count" => record.count,
723
- "visible" => record.visible,
724
- }
725
- columns << column.to_json
726
- columns << "\n"
727
- end
728
- if output.is_a?(String)
729
- File.write(output, columns)
730
- else
731
- output.resize(columns.bytesize)
732
- output.set_data(0, columns)
733
- end
734
- assert_equal(@table,
735
- Arrow::Table.load(output,
736
- format: :json,
737
- schema: @table.schema))
738
- end
739
- end
740
-
741
- sub_test_case("path") do
742
- sub_test_case(":format") do
743
- include SaveLoadFormatTests
744
-
745
- def create_output(extension)
746
- @file = Tempfile.new(["red-arrow", extension])
747
- @file.path
748
- end
749
-
750
- sub_test_case("save: auto detect") do
751
- test("arrow") do
752
- output = create_output(".arrow")
753
- @table.save(output)
754
- assert_equal(@table,
755
- Arrow::Table.load(output,
756
- format: :arrow,
757
- schema: @table.schema))
758
- end
759
-
760
- test("arrows") do
761
- output = create_output(".arrows")
762
- @table.save(output)
763
- assert_equal(@table,
764
- Arrow::Table.load(output,
765
- format: :arrows,
766
- schema: @table.schema))
767
- end
768
-
769
- test("csv") do
770
- output = create_output(".csv")
771
- @table.save(output)
772
- assert_equal(@table,
773
- Arrow::Table.load(output,
774
- format: :csv,
775
- schema: @table.schema))
776
- end
777
-
778
- test("csv, return value") do
779
- output = create_output(".csv")
780
- assert_equal(@table, @table.save(output))
781
- end
782
-
783
- test("csv.gz") do
784
- output = create_output(".csv.gz")
785
- @table.save(output)
786
- assert_equal(@table,
787
- Arrow::Table.load(output,
788
- format: :csv,
789
- compression: :gzip,
790
- schema: @table.schema))
791
- end
792
-
793
- test("tsv") do
794
- output = create_output(".tsv")
795
- @table.save(output)
796
- assert_equal(@table,
797
- Arrow::Table.load(output,
798
- format: :tsv,
799
- schema: @table.schema))
800
- end
801
- end
802
-
803
- sub_test_case("load: auto detect") do
804
- test("arrow: file") do
805
- output = create_output(".arrow")
806
- @table.save(output, format: :arrow_file)
807
- assert_equal(@table, Arrow::Table.load(output))
808
- end
809
-
810
- test("arrow: streaming") do
811
- output = create_output(".arrow")
812
- @table.save(output, format: :arrows)
813
- assert_equal(@table, Arrow::Table.load(output))
814
- end
815
-
816
- test("arrows") do
817
- output = create_output(".arrows")
818
- @table.save(output, format: :arrows)
819
- assert_equal(@table, Arrow::Table.load(output))
820
- end
821
-
822
- test("csv") do
823
- path = fixture_path("with-header.csv")
824
- table = Arrow::Table.load(path, skip_lines: /^\#/)
825
- assert_equal(<<-TABLE, table.to_s)
826
- name score
827
- (utf8) (int8)
828
- 0 alice 10
829
- 1 bob 29
830
- 2 chris -1
831
- TABLE
832
- end
833
-
834
- test("csv.gz") do
835
- file = Tempfile.new(["red-arrow", ".csv.gz"])
836
- file.close
837
- Zlib::GzipWriter.open(file.path) do |gz|
838
- gz.write(<<-CSV)
839
- name,score
840
- alice,10
841
- bob,29
842
- chris,-1
843
- CSV
844
- end
845
- assert_equal(<<-TABLE, Arrow::Table.load(file.path).to_s)
846
- name score
847
- (utf8) (int64)
848
- 0 alice 10
849
- 1 bob 29
850
- 2 chris -1
851
- TABLE
852
- end
853
-
854
- test("tsv") do
855
- file = Tempfile.new(["red-arrow", ".tsv"])
856
- file.puts(<<-TSV)
857
- name\tscore
858
- alice\t10
859
- bob\t29
860
- chris\t-1
861
- TSV
862
- file.close
863
- table = Arrow::Table.load(file.path)
864
- assert_equal(<<-TABLE, table.to_s)
865
- name score
866
- (utf8) (int64)
867
- 0 alice 10
868
- 1 bob 29
869
- 2 chris -1
870
- TABLE
871
- end
872
- end
873
- end
874
- end
875
-
876
- sub_test_case("Buffer") do
877
- sub_test_case(":format") do
878
- include SaveLoadFormatTests
879
-
880
- def create_output(extension)
881
- Arrow::ResizableBuffer.new(1024)
882
- end
883
- end
884
- end
885
-
886
- sub_test_case("URI") do
887
- def start_web_server(path, data, content_type)
888
- http_server = WEBrick::HTTPServer.new(:Port => 0)
889
- http_server.mount_proc(path) do |request, response|
890
- response.body = data
891
- response.content_type = content_type
892
- end
893
- http_server_thread = Thread.new do
894
- http_server.start
895
- end
896
- begin
897
- Timeout.timeout(1) do
898
- yield(http_server[:Port])
899
- end
900
- ensure
901
- http_server.shutdown
902
- http_server_thread.join
903
- end
904
- end
905
-
906
- data("Arrow File",
907
- ["arrow", "application/vnd.apache.arrow.file"])
908
- data("Arrow Stream",
909
- ["arrows", "application/vnd.apache.arrow.stream"])
910
- data("CSV",
911
- ["csv", "text/csv"])
912
- def test_http(data)
913
- extension, content_type = data
914
- output = Arrow::ResizableBuffer.new(1024)
915
- @table.save(output, format: extension.to_sym)
916
- path = "/data.#{extension}"
917
- start_web_server(path,
918
- output.data.to_s,
919
- content_type) do |port|
920
- input = URI("http://127.0.0.1:#{port}#{path}")
921
- loaded_table = Arrow::Table.load(input, schema: @table.schema)
922
- assert_equal(@table.to_s, loaded_table.to_s)
923
- end
924
- end
925
- end
926
-
927
- sub_test_case("GC") do
928
- def setup
929
- table = Arrow::Table.new(integer: [1, 2, 3],
930
- string: ["a", "b", "c"])
931
- @buffer = Arrow::ResizableBuffer.new(1024)
932
- table.save(@buffer, format: :arrow)
933
- @loaded_table = Arrow::Table.load(@buffer)
934
- end
935
-
936
- def test_chunked_array
937
- chunked_array = @loaded_table[0].data
938
- assert_equal(@buffer,
939
- chunked_array.instance_variable_get(:@input).buffer)
940
- end
941
-
942
- def test_array
943
- array = @loaded_table[0].data.chunks[0]
944
- assert_equal(@buffer,
945
- array.instance_variable_get(:@input).buffer)
946
- end
947
-
948
- def test_record_batch
949
- record_batch = @loaded_table.each_record_batch.first
950
- assert_equal(@buffer,
951
- record_batch.instance_variable_get(:@input).buffer)
952
- end
953
-
954
- def test_record_batch_array
955
- array = @loaded_table.each_record_batch.first[0].data
956
- assert_equal(@buffer,
957
- array.instance_variable_get(:@input).buffer)
958
- end
959
-
960
- def test_record_batch_table
961
- table = @loaded_table.each_record_batch.first.to_table
962
- assert_equal(@buffer,
963
- table.instance_variable_get(:@input).buffer)
964
- end
965
-
966
- def test_slice
967
- table = @loaded_table.slice(0..-1)
968
- assert_equal(@buffer,
969
- table.instance_variable_get(:@input).buffer)
970
- end
971
-
972
- def test_merge
973
- table = @loaded_table.merge({})
974
- assert_equal(@buffer,
975
- table.instance_variable_get(:@input).buffer)
976
- end
977
-
978
- def test_remove_column
979
- table = @loaded_table.remove_column(0)
980
- assert_equal(@buffer,
981
- table.instance_variable_get(:@input).buffer)
982
- end
983
-
984
- def test_pack
985
- table = @loaded_table.pack
986
- assert_equal(@buffer,
987
- table.instance_variable_get(:@input).buffer)
988
- end
989
-
990
- def test_join
991
- table = @loaded_table.join(@loaded_table, :integer)
992
- assert_equal(@buffer,
993
- table.instance_variable_get(:@input).buffer)
994
- end
995
- end
996
- end
997
-
998
- test("#pack") do
999
- packed_table = @table.pack
1000
- column_n_chunks = packed_table.columns.collect {|c| c.data.n_chunks}
1001
- assert_equal([[1, 1], <<-TABLE], [column_n_chunks, packed_table.to_s])
1002
- count visible
1003
- (uint8) (bool)
1004
- 0 1 true
1005
- 1 2 false
1006
- 2 4 (null)
1007
- 3 8 true
1008
- 4 16 true
1009
- 5 32 false
1010
- 6 64 (null)
1011
- 7 128 (null)
1012
- TABLE
1013
- end
1014
-
1015
- sub_test_case("#to_s") do
1016
- sub_test_case(":format") do
1017
- def setup
1018
- columns = {
1019
- "count" => Arrow::UInt8Array.new([1, 2]),
1020
- "visible" => Arrow::BooleanArray.new([true, false]),
1021
- }
1022
- @table = Arrow::Table.new(columns)
1023
- end
1024
-
1025
- test(":column") do
1026
- assert_equal(<<-TABLE, @table.to_s(format: :column))
1027
- count: uint8
1028
- visible: bool
1029
- ----
1030
- count:
1031
- [
1032
- [
1033
- 1,
1034
- 2
1035
- ]
1036
- ]
1037
- visible:
1038
- [
1039
- [
1040
- true,
1041
- false
1042
- ]
1043
- ]
1044
- TABLE
1045
- end
1046
-
1047
- test(":list") do
1048
- assert_equal(<<-TABLE, @table.to_s(format: :list))
1049
- ==================== 0 ====================
1050
- count(uint8): 1
1051
- visible(bool): true
1052
- ==================== 1 ====================
1053
- count(uint8): 2
1054
- visible(bool): false
1055
- TABLE
1056
- end
1057
-
1058
- test(":table") do
1059
- assert_equal(<<-TABLE, @table.to_s(format: :table))
1060
- count visible
1061
- (uint8) (bool)
1062
- 0 1 true
1063
- 1 2 false
1064
- TABLE
1065
- end
1066
-
1067
- test("invalid") do
1068
- message = ":format must be :column, :list, :table or nil: <:invalid>"
1069
- assert_raise(ArgumentError.new(message)) do
1070
- @table.to_s(format: :invalid)
1071
- end
1072
- end
1073
- end
1074
-
1075
- sub_test_case(":show_column_type") do
1076
- def setup
1077
- columns = {
1078
- "count" => Arrow::UInt8Array.new([1, 2]),
1079
- "visible" => Arrow::BooleanArray.new([true, false]),
1080
- }
1081
- @table = Arrow::Table.new(columns)
1082
- end
1083
-
1084
- test(":list") do
1085
- assert_equal(<<-TABLE, @table.to_s(format: :list, show_column_type: false))
1086
- ==================== 0 ====================
1087
- count: 1
1088
- visible: true
1089
- ==================== 1 ====================
1090
- count: 2
1091
- visible: false
1092
- TABLE
1093
- end
1094
-
1095
- test(":table") do
1096
- assert_equal(<<-TABLE, @table.to_s(format: :table, show_column_type: false))
1097
- count visible
1098
- 0 1 true
1099
- 1 2 false
1100
- TABLE
1101
- end
1102
- end
1103
-
1104
- sub_test_case("#==") do
1105
- test("Arrow::Table") do
1106
- assert do
1107
- @table == @table
1108
- end
1109
- end
1110
-
1111
- test("not Arrow::Table") do
1112
- assert do
1113
- not (@table == 29)
1114
- end
1115
- end
1116
- end
1117
- end
1118
-
1119
- sub_test_case("#filter") do
1120
- def setup
1121
- super
1122
- @options = Arrow::FilterOptions.new
1123
- @options.null_selection_behavior = :emit_null
1124
- end
1125
-
1126
- test("Array: boolean") do
1127
- filter = [nil, true, true, false, true, false, true, true]
1128
- assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
1129
- count visible
1130
- (uint8) (bool)
1131
- 0 (null) (null)
1132
- 1 2 false
1133
- 2 4 (null)
1134
- 3 16 true
1135
- 4 64 (null)
1136
- 5 128 (null)
1137
- TABLE
1138
- end
1139
-
1140
- test("Arrow::BooleanArray") do
1141
- array = [nil, true, true, false, true, false, true, true]
1142
- filter = Arrow::BooleanArray.new(array)
1143
- assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
1144
- count visible
1145
- (uint8) (bool)
1146
- 0 (null) (null)
1147
- 1 2 false
1148
- 2 4 (null)
1149
- 3 16 true
1150
- 4 64 (null)
1151
- 5 128 (null)
1152
- TABLE
1153
- end
1154
-
1155
- test("Arrow::ChunkedArray") do
1156
- filter_chunks = [
1157
- Arrow::BooleanArray.new([nil, true, true]),
1158
- Arrow::BooleanArray.new([false, true, false]),
1159
- Arrow::BooleanArray.new([true, true]),
1160
- ]
1161
- filter = Arrow::ChunkedArray.new(filter_chunks)
1162
- assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
1163
- count visible
1164
- (uint8) (bool)
1165
- 0 (null) (null)
1166
- 1 2 false
1167
- 2 4 (null)
1168
- 3 16 true
1169
- 4 64 (null)
1170
- 5 128 (null)
1171
- TABLE
1172
- end
1173
- end
1174
-
1175
- sub_test_case("#take") do
1176
- test("Arrow: boolean") do
1177
- indices = [1, 0, 2]
1178
- assert_equal(<<-TABLE, @table.take(indices).to_s)
1179
- count visible
1180
- (uint8) (bool)
1181
- 0 2 false
1182
- 1 1 true
1183
- 2 4 (null)
1184
- TABLE
1185
- end
1186
-
1187
- test("Arrow::Array") do
1188
- indices = Arrow::Int16Array.new([1, 0, 2])
1189
- assert_equal(<<-TABLE, @table.take(indices).to_s)
1190
- count visible
1191
- (uint8) (bool)
1192
- 0 2 false
1193
- 1 1 true
1194
- 2 4 (null)
1195
- TABLE
1196
- end
1197
-
1198
- test("Arrow::ChunkedArray") do
1199
- chunks = [
1200
- Arrow::Int16Array.new([1, 0]),
1201
- Arrow::Int16Array.new([2])
1202
- ]
1203
- indices = Arrow::ChunkedArray.new(chunks)
1204
- assert_equal(<<-TABLE, @table.take(indices).to_s)
1205
- count visible
1206
- (uint8) (bool)
1207
- 0 2 false
1208
- 1 1 true
1209
- 2 4 (null)
1210
- TABLE
1211
- end
1212
- end
1213
-
1214
- sub_test_case("#concatenate") do
1215
- test("options: :unify_schemas") do
1216
- table1 = Arrow::Table.new(a: [true],
1217
- b: [false])
1218
- table2 = Arrow::Table.new(b: [false])
1219
- concatenated = table1.concatenate([table2], unify_schemas: true)
1220
- assert_equal(<<-TABLE, concatenated.to_s)
1221
- a b
1222
- (bool) (bool)
1223
- 0 true false
1224
- 1 (null) false
1225
- TABLE
1226
- end
1227
- end
1228
-
1229
- sub_test_case("#join") do
1230
- test("keys: nil (natural join)") do
1231
- table1 = Arrow::Table.new(key: [1, 2, 3],
1232
- number: [10, 20, 30])
1233
- table2 = Arrow::Table.new(key: [3, 1],
1234
- string: ["three", "one"])
1235
- assert_equal(Arrow::Table.new([
1236
- ["key", [1, 3]],
1237
- ["number", [10, 30]],
1238
- ["string", ["one", "three"]],
1239
- ]),
1240
- table1.join(table2))
1241
- end
1242
-
1243
- test("keys: String") do
1244
- table1 = Arrow::Table.new(key: [1, 2, 3],
1245
- number: [10, 20, 30])
1246
- table2 = Arrow::Table.new(key: [3, 1],
1247
- string: ["three", "one"])
1248
- assert_equal(Arrow::Table.new([
1249
- ["key", [1, 3]],
1250
- ["number", [10, 30]],
1251
- ["string", ["one", "three"]],
1252
- ]),
1253
- table1.join(table2, "key"))
1254
- end
1255
-
1256
- test("keys: Symbol") do
1257
- table1 = Arrow::Table.new(key: [1, 2, 3],
1258
- number: [10, 20, 30])
1259
- table2 = Arrow::Table.new(key: [3, 1],
1260
- string: ["three", "one"])
1261
- assert_equal(Arrow::Table.new([
1262
- ["key", [1, 3]],
1263
- ["number", [10, 30]],
1264
- ["string", ["one", "three"]],
1265
- ]),
1266
- table1.join(table2, :key))
1267
- end
1268
-
1269
- test("keys: [String]") do
1270
- table1 = Arrow::Table.new(key: [1, 2, 3],
1271
- number: [10, 20, 30])
1272
- table2 = Arrow::Table.new(key: [3, 1],
1273
- string: ["three", "one"])
1274
- assert_equal(Arrow::Table.new([
1275
- ["key", [1, 3]],
1276
- ["number", [10, 30]],
1277
- ["key", [1, 3]],
1278
- ["string", ["one", "three"]],
1279
- ]),
1280
- table1.join(table2, ["key"]))
1281
- end
1282
-
1283
- test("keys: [String, Symbol]") do
1284
- table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1285
- key2: [10, 100, 20, 200],
1286
- number: [1010, 1100, 2020, 2200])
1287
- table2 = Arrow::Table.new(key1: [1, 2, 2],
1288
- key2: [100, 20, 50],
1289
- string: ["1-100", "2-20", "2-50"])
1290
- assert_equal(Arrow::Table.new([
1291
- ["key1", [1, 2]],
1292
- ["key2", [100, 20]],
1293
- ["number", [1100, 2020]],
1294
- ["key1", [1, 2]],
1295
- ["key2", [100, 20]],
1296
- ["string", ["1-100", "2-20"]],
1297
- ]),
1298
- table1.join(table2, ["key1", :key2]))
1299
- end
1300
-
1301
- test("keys: {left: String, right: Symbol}") do
1302
- table1 = Arrow::Table.new(left_key: [1, 2, 3],
1303
- number: [10, 20, 30])
1304
- table2 = Arrow::Table.new(right_key: [3, 1],
1305
- string: ["three", "one"])
1306
- assert_equal(Arrow::Table.new([
1307
- ["left_key", [1, 3]],
1308
- ["number", [10, 30]],
1309
- ["right_key", [1, 3]],
1310
- ["string", ["one", "three"]],
1311
- ]),
1312
- table1.join(table2,
1313
- {left: "left_key", right: :right_key},
1314
- type: :inner))
1315
- end
1316
-
1317
- test("keys: {left: [String, Symbol], right: [Symbol, String]}") do
1318
- table1 = Arrow::Table.new(left_key1: [1, 1, 2, 2],
1319
- left_key2: [10, 100, 20, 200],
1320
- number: [1010, 1100, 2020, 2200])
1321
- table2 = Arrow::Table.new(right_key1: [1, 2, 2],
1322
- right_key2: [100, 20, 50],
1323
- string: ["1-100", "2-20", "2-50"])
1324
- assert_equal(Arrow::Table.new([
1325
- ["left_key1", [1, 2]],
1326
- ["left_key2", [100, 20]],
1327
- ["number", [1100, 2020]],
1328
- ["right_key1", [1, 2]],
1329
- ["right_key2", [100, 20]],
1330
- ["string", ["1-100", "2-20"]],
1331
- ]),
1332
- table1.join(table2,
1333
- {
1334
- left: ["left_key1", :left_key2],
1335
- right: [:right_key1, "right_key2"],
1336
- },
1337
- type: :inner))
1338
- end
1339
-
1340
- test("type: :left_outer") do
1341
- table1 = Arrow::Table.new(key: [1, 2, 3],
1342
- number: [10, 20, 30])
1343
- table2 = Arrow::Table.new(key: [3, 1],
1344
- string: ["three", "one"])
1345
- assert_equal(Arrow::Table.new([
1346
- ["key", [1, 3, 2]],
1347
- ["number", [10, 30, 20]],
1348
- ["string", ["one", "three", nil]],
1349
- ]),
1350
- table1.join(table2, "key", type: :left_outer))
1351
- end
1352
-
1353
- test("type: :right_outer") do
1354
- table1 = Arrow::Table.new(key: [1, 2, 3],
1355
- number: [10, 20, 30])
1356
- table2 = Arrow::Table.new(key: [3, 1],
1357
- string: ["three", "one"])
1358
- assert_equal(Arrow::Table.new([
1359
- ["key", [1, 3]],
1360
- ["number", [10, 30]],
1361
- ["string", ["one", "three"]],
1362
- ]),
1363
- table1.join(table2, "key", type: :right_outer))
1364
- end
1365
-
1366
- test("type: :full_outer") do
1367
- table1 = Arrow::Table.new(key: [1, 2, 3],
1368
- number: [10, 20, 30])
1369
- table2 = Arrow::Table.new(key: [3, 1],
1370
- string: ["three", "one"])
1371
- assert_equal(Arrow::Table.new([
1372
- ["key", [1, 3, 2]],
1373
- ["number", [10, 30, 20]],
1374
- ["string", ["one", "three", nil]],
1375
- ]),
1376
- table1.join(table2, "key", type: :full_outer))
1377
- end
1378
-
1379
- test("type: :left_semi") do
1380
- table1 = Arrow::Table.new(key: [1, 2, 3],
1381
- number: [10, 20, 30])
1382
- table2 = Arrow::Table.new(key: [3, 1],
1383
- string: ["three", "one"])
1384
- assert_equal(Arrow::Table.new([
1385
- ["key", [1, 3]],
1386
- ["number", [10, 30]],
1387
- ]),
1388
- table1.join(table2, "key", type: :left_semi))
1389
- end
1390
-
1391
- test("type: :right_semi") do
1392
- table1 = Arrow::Table.new(key: [1, 2, 3],
1393
- number: [10, 20, 30])
1394
- table2 = Arrow::Table.new(key: [3, 1],
1395
- string: ["three", "one"])
1396
- assert_equal(Arrow::Table.new([
1397
- ["key", [3, 1]],
1398
- ["string", ["three", "one"]],
1399
- ]),
1400
- table1.join(table2, "key", type: :right_semi))
1401
- end
1402
-
1403
- test("type: :left_anti") do
1404
- table1 = Arrow::Table.new(key: [1, 2, 3],
1405
- number: [10, 20, 30])
1406
- table2 = Arrow::Table.new(key: [3, 1],
1407
- string: ["three", "one"])
1408
- assert_equal(Arrow::Table.new([
1409
- ["key", [2]],
1410
- ["number", [20]],
1411
- ]),
1412
- table1.join(table2, "key", type: :left_anti))
1413
- end
1414
-
1415
- test("type: :right_anti") do
1416
- table1 = Arrow::Table.new(key: [1, 2, 3],
1417
- number: [10, 20, 30])
1418
- table2 = Arrow::Table.new(key: [3, 1],
1419
- string: ["three", "one"])
1420
- assert_equal(Arrow::Table.new([
1421
- ["key", Arrow::ChunkedArray.new(:uint8)],
1422
- ["string", Arrow::ChunkedArray.new(:string)],
1423
- ]),
1424
- table1.join(table2, "key", type: :right_anti))
1425
- end
1426
-
1427
- test("left_outputs: & right_outputs:") do
1428
- table1 = Arrow::Table.new(key: [1, 2, 3],
1429
- number: [10, 20, 30])
1430
- table2 = Arrow::Table.new(key: [3, 1],
1431
- string: ["three", "one"])
1432
- assert_equal(Arrow::Table.new(key: [1, 3],
1433
- number: [10, 30],
1434
- string: ["one", "three"]),
1435
- table1.join(table2,
1436
- "key",
1437
- left_outputs: ["key", "number"],
1438
- right_outputs: ["string"]))
1439
- end
1440
-
1441
- test("left_outputs: & type: :inner") do
1442
- table1 = Arrow::Table.new(key: [1, 2, 3],
1443
- number: [10, 20, 30])
1444
- table2 = Arrow::Table.new(key: [3, 1],
1445
- string: ["three", "one"])
1446
- assert_equal(Arrow::Table.new([
1447
- ["key", [1, 3]],
1448
- ["number", [10, 30]],
1449
- ["key", [1, 3]],
1450
- ["string", ["one", "three"]]
1451
- ]),
1452
- table1.join(table2,
1453
- type: :inner,
1454
- left_outputs: table1.column_names,
1455
- right_outputs: table2.column_names))
1456
- end
1457
-
1458
- test("left_outputs: & type: :left_outer") do
1459
- table1 = Arrow::Table.new(key: [1, 2, 3],
1460
- number: [10, 20, 30])
1461
- table2 = Arrow::Table.new(key: [3, 1],
1462
- string: ["three", "one"])
1463
- assert_equal(Arrow::Table.new([
1464
- ["key", [1, 3, 2]],
1465
- ["number", [10, 30, 20]],
1466
- ["key", [1, 3, nil]],
1467
- ["string", ["one", "three", nil]],
1468
- ]),
1469
- table1.join(table2,
1470
- type: :left_outer,
1471
- left_outputs: table1.column_names,
1472
- right_outputs: table2.column_names))
1473
- end
1474
-
1475
- test("left_outputs: & type: :right_outer") do
1476
- table1 = Arrow::Table.new(key: [1, 2, 3],
1477
- number: [10, 20, 30])
1478
- table2 = Arrow::Table.new(key: [3, 1],
1479
- string: ["three", "one"])
1480
- assert_equal(Arrow::Table.new([
1481
- ["key", [1, 3]],
1482
- ["number", [10, 30]],
1483
- ["key", [1, 3]],
1484
- ["string", ["one", "three"]],
1485
- ]),
1486
- table1.join(table2,
1487
- type: :right_outer,
1488
- left_outputs: table1.column_names,
1489
- right_outputs: table2.column_names))
1490
- end
1491
-
1492
- test("left_outputs: & type: :full_outer") do
1493
- table1 = Arrow::Table.new(key: [1, 2, 3],
1494
- number: [10, 20, 30])
1495
- table2 = Arrow::Table.new(key: [3, 1],
1496
- string: ["three", "one"])
1497
- assert_equal(Arrow::Table.new([
1498
- ["key", [1, 3, 2]],
1499
- ["number", [10, 30, 20]],
1500
- ["key", [1, 3, nil]],
1501
- ["string", ["one", "three", nil]],
1502
- ]),
1503
- table1.join(table2,
1504
- type: :full_outer,
1505
- left_outputs: table1.column_names,
1506
- right_outputs: table2.column_names))
1507
- end
1508
-
1509
- test("left_suffix: & keys: [String]") do
1510
- table1 = Arrow::Table.new(key1: [1, 1, 2, 2],
1511
- key2: [10, 100, 20, 200],
1512
- number: [1010, 1100, 2020, 2200])
1513
- table2 = Arrow::Table.new(key1: [1, 2, 2],
1514
- key2: [100, 20, 50],
1515
- string: ["1-100", "2-20", "2-50"])
1516
- assert_equal(Arrow::Table.new([
1517
- ["key1_left", [1, 2]],
1518
- ["key2_left", [100, 20]],
1519
- ["number", [1100, 2020]],
1520
- ["key1_right", [1, 2]],
1521
- ["key2_right", [100, 20]],
1522
- ["string", ["1-100", "2-20"]],
1523
- ]),
1524
- table1.join(table2,
1525
- ["key1", "key2"],
1526
- left_suffix: "_left",
1527
- right_suffix: "_right"))
1528
- end
1529
- end
1530
- end