red-arrow 0.15.1 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +63 -33
  4. data/ext/arrow/raw-records.cpp +2 -1
  5. data/ext/arrow/values.cpp +2 -1
  6. data/lib/arrow/array-builder.rb +101 -52
  7. data/lib/arrow/array.rb +28 -10
  8. data/lib/arrow/{binary-array-builder.rb → buffer.rb} +7 -15
  9. data/lib/arrow/chunked-array.rb +2 -0
  10. data/lib/arrow/csv-loader.rb +5 -0
  11. data/lib/arrow/csv-read-options.rb +18 -0
  12. data/lib/arrow/data-type.rb +35 -2
  13. data/lib/arrow/decimal128-array-builder.rb +0 -2
  14. data/lib/arrow/dictionary-array.rb +24 -0
  15. data/lib/arrow/field.rb +1 -1
  16. data/lib/arrow/generic-filterable.rb +43 -0
  17. data/lib/arrow/generic-takeable.rb +38 -0
  18. data/lib/arrow/list-data-type.rb +58 -8
  19. data/lib/arrow/loader.rb +12 -1
  20. data/lib/arrow/null-array-builder.rb +1 -1
  21. data/lib/arrow/null-array.rb +24 -0
  22. data/lib/arrow/raw-table-converter.rb +47 -0
  23. data/lib/arrow/record-batch-iterator.rb +22 -0
  24. data/lib/arrow/record-batch.rb +8 -3
  25. data/lib/arrow/schema.rb +5 -2
  26. data/lib/arrow/struct-array-builder.rb +13 -7
  27. data/lib/arrow/struct-data-type.rb +0 -2
  28. data/lib/arrow/table-loader.rb +29 -6
  29. data/lib/arrow/table-saver.rb +37 -13
  30. data/lib/arrow/table.rb +20 -73
  31. data/lib/arrow/version.rb +1 -1
  32. data/red-arrow.gemspec +3 -1
  33. data/test/helper.rb +1 -0
  34. data/test/helper/omittable.rb +36 -0
  35. data/test/raw-records/test-dense-union-array.rb +1 -34
  36. data/test/raw-records/test-sparse-union-array.rb +1 -33
  37. data/test/run-test.rb +14 -3
  38. data/test/test-array-builder.rb +17 -0
  39. data/test/test-array.rb +104 -0
  40. data/test/test-buffer.rb +11 -0
  41. data/test/test-chunked-array.rb +96 -0
  42. data/test/test-csv-loader.rb +2 -2
  43. data/test/test-data-type.rb +11 -0
  44. data/test/test-dense-union-data-type.rb +2 -2
  45. data/test/test-dictionary-array.rb +41 -0
  46. data/test/test-feather.rb +21 -6
  47. data/test/test-list-data-type.rb +27 -1
  48. data/test/test-null-array.rb +23 -0
  49. data/test/test-record-batch-iterator.rb +37 -0
  50. data/test/test-record-batch.rb +14 -0
  51. data/test/test-schema.rb +16 -0
  52. data/test/test-slicer.rb +74 -30
  53. data/test/test-sparse-union-data-type.rb +2 -2
  54. data/test/test-struct-array-builder.rb +8 -4
  55. data/test/test-table.rb +153 -14
  56. data/test/test-timestamp-array.rb +19 -0
  57. data/test/values/test-dense-union-array.rb +1 -34
  58. data/test/values/test-sparse-union-array.rb +1 -33
  59. metadata +22 -8
@@ -0,0 +1,23 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class NullArrayTest < Test::Unit::TestCase
19
+ test("#[]") do
20
+ array = Arrow::NullArray.new(1)
21
+ assert_nil(array[0])
22
+ end
23
+ end
@@ -0,0 +1,37 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RecordBatchIteratorTest < Test::Unit::TestCase
19
+ def setup
20
+ @schema = Arrow::Schema.new(visible: :boolean,
21
+ count: :uint32)
22
+ @record_batches = [
23
+ Arrow::RecordBatch.new(@schema,
24
+ visible: [true],
25
+ count: [1]),
26
+ Arrow::RecordBatch.new(@schema,
27
+ visible: [false, nil],
28
+ count: [nil, 3]),
29
+ ]
30
+ @iterator = Arrow::RecordBatchIterator.new(@record_batches)
31
+ end
32
+
33
+ def test_to_a
34
+ assert_equal(@record_batches,
35
+ @iterator.to_a)
36
+ end
37
+ end
@@ -22,6 +22,20 @@ class RecordBatchTest < Test::Unit::TestCase
22
22
  count: :uint32)
23
23
  end
24
24
 
25
+ test("[raw_table]") do
26
+ raw_table = {
27
+ visible: [true, nil, false],
28
+ count: [1, nil, 3],
29
+ }
30
+ record_batch = Arrow::RecordBatch.new(raw_table)
31
+ assert_equal([
32
+ {"visible" => true, "count" => 1},
33
+ {"visible" => nil, "count" => nil},
34
+ {"visible" => false, "count" => 3},
35
+ ],
36
+ record_batch.each_record.collect(&:to_h))
37
+ end
38
+
25
39
  test("[Schema, records]") do
26
40
  records = [
27
41
  {visible: true, count: 1},
@@ -16,6 +16,8 @@
16
16
  # under the License.
17
17
 
18
18
  class SchemaTest < Test::Unit::TestCase
19
+ include Helper::Omittable
20
+
19
21
  def setup
20
22
  @count_field = Arrow::Field.new("count", :uint32)
21
23
  @visible_field = Arrow::Field.new("visible", :boolean)
@@ -114,5 +116,19 @@ class SchemaTest < Test::Unit::TestCase
114
116
  end
115
117
  end
116
118
  end
119
+
120
+ sub_test_case("#to_s") do
121
+ test("show_metadata") do
122
+ require_gi_bindings(3, 4, 2)
123
+
124
+ schema = @schema.with_metadata("key" => "value")
125
+ assert_equal(<<-SCHEMA.chomp, schema.to_s(show_metadata: true))
126
+ count: uint32
127
+ visible: bool
128
+ -- metadata --
129
+ key: value
130
+ SCHEMA
131
+ end
132
+ end
117
133
  end
118
134
  end
@@ -46,10 +46,14 @@ class SlicerTest < Test::Unit::TestCase
46
46
  end
47
47
  assert_equal(<<-TABLE, sliced_table.to_s)
48
48
  count visible
49
- 0 1 true
50
- 1 8 true
51
- 2 16 true
52
- 3 256 true
49
+ 0
50
+ 1 1 true
51
+ 2
52
+ 3 8 true
53
+ 4 16 true
54
+ 5
55
+ 6
56
+ 7 256 true
53
57
  TABLE
54
58
  end
55
59
 
@@ -66,7 +70,8 @@ class SlicerTest < Test::Unit::TestCase
66
70
  4 16 true
67
71
  5 32 false
68
72
  6 64
69
- 7 256 true
73
+ 7
74
+ 8 256 true
70
75
  TABLE
71
76
  end
72
77
  end
@@ -78,8 +83,12 @@ class SlicerTest < Test::Unit::TestCase
78
83
  end
79
84
  assert_equal(<<-TABLE, sliced_table.to_s)
80
85
  count visible
81
- 0 2 false
82
- 1 32 false
86
+ 0
87
+ 1 2 false
88
+ 2
89
+ 3 32 false
90
+ 4
91
+ 5
83
92
  TABLE
84
93
  end
85
94
 
@@ -90,6 +99,7 @@ class SlicerTest < Test::Unit::TestCase
90
99
  assert_equal(<<-TABLE, sliced_table.to_s)
91
100
  count visible
92
101
  0 0
102
+ 1
93
103
  TABLE
94
104
  end
95
105
  end
@@ -142,10 +152,14 @@ class SlicerTest < Test::Unit::TestCase
142
152
  end
143
153
  assert_equal(<<-TABLE, sliced_table.to_s)
144
154
  count visible
145
- 0 1 true
146
- 1 8 true
147
- 2 16 true
148
- 3 256 true
155
+ 0
156
+ 1 1 true
157
+ 2
158
+ 3 8 true
159
+ 4 16 true
160
+ 5
161
+ 6
162
+ 7 256 true
149
163
  TABLE
150
164
  end
151
165
  end
@@ -172,8 +186,12 @@ class SlicerTest < Test::Unit::TestCase
172
186
  end
173
187
  assert_equal(<<-TABLE, sliced_table.to_s)
174
188
  count visible
175
- 0 2 false
176
- 1 32 false
189
+ 0
190
+ 1 2 false
191
+ 2
192
+ 3 32 false
193
+ 4
194
+ 5
177
195
  TABLE
178
196
  end
179
197
  end
@@ -200,8 +218,12 @@ class SlicerTest < Test::Unit::TestCase
200
218
  end
201
219
  assert_equal(<<-TABLE, sliced_table.to_s)
202
220
  count visible
203
- 0 2 false
204
- 1 32 false
221
+ 0
222
+ 1 2 false
223
+ 2
224
+ 3 32 false
225
+ 4
226
+ 5
205
227
  TABLE
206
228
  end
207
229
  end
@@ -217,6 +239,7 @@ class SlicerTest < Test::Unit::TestCase
217
239
  2 2 false
218
240
  3 4
219
241
  4 8 true
242
+ 5
220
243
  TABLE
221
244
  end
222
245
 
@@ -229,7 +252,8 @@ class SlicerTest < Test::Unit::TestCase
229
252
  0 16 true
230
253
  1 32 false
231
254
  2 64
232
- 3 256 true
255
+ 3
256
+ 4 256 true
233
257
  TABLE
234
258
  end
235
259
 
@@ -245,6 +269,7 @@ class SlicerTest < Test::Unit::TestCase
245
269
  3 4
246
270
  4 8 true
247
271
  5 16 true
272
+ 6
248
273
  TABLE
249
274
  end
250
275
 
@@ -256,7 +281,8 @@ class SlicerTest < Test::Unit::TestCase
256
281
  count visible
257
282
  0 32 false
258
283
  1 64
259
- 2 256 true
284
+ 2
285
+ 3 256 true
260
286
  TABLE
261
287
  end
262
288
 
@@ -268,7 +294,8 @@ class SlicerTest < Test::Unit::TestCase
268
294
  count visible
269
295
  0 32 false
270
296
  1 64
271
- 2 256 true
297
+ 2
298
+ 3 256 true
272
299
  TABLE
273
300
  end
274
301
 
@@ -284,6 +311,7 @@ class SlicerTest < Test::Unit::TestCase
284
311
  3 4
285
312
  4 8 true
286
313
  5 16 true
314
+ 6
287
315
  TABLE
288
316
  end
289
317
 
@@ -296,7 +324,8 @@ class SlicerTest < Test::Unit::TestCase
296
324
  0 16 true
297
325
  1 32 false
298
326
  2 64
299
- 3 256 true
327
+ 3
328
+ 4 256 true
300
329
  TABLE
301
330
  end
302
331
 
@@ -311,6 +340,7 @@ class SlicerTest < Test::Unit::TestCase
311
340
  2 2 false
312
341
  3 4
313
342
  4 8 true
343
+ 5
314
344
  TABLE
315
345
  end
316
346
 
@@ -324,6 +354,7 @@ class SlicerTest < Test::Unit::TestCase
324
354
  1 4
325
355
  2 16 true
326
356
  3 64
357
+ 4
327
358
  TABLE
328
359
  end
329
360
 
@@ -337,7 +368,8 @@ class SlicerTest < Test::Unit::TestCase
337
368
  1 2 false
338
369
  2 8 true
339
370
  3 32 false
340
- 4 256 true
371
+ 4
372
+ 5 256 true
341
373
  TABLE
342
374
  end
343
375
 
@@ -347,8 +379,12 @@ class SlicerTest < Test::Unit::TestCase
347
379
  end
348
380
  assert_equal(<<-TABLE, sliced_table.to_s)
349
381
  count visible
350
- 0 16 true
351
- 1 256 true
382
+ 0
383
+ 1
384
+ 2 16 true
385
+ 3
386
+ 4
387
+ 5 256 true
352
388
  TABLE
353
389
  end
354
390
 
@@ -358,11 +394,15 @@ class SlicerTest < Test::Unit::TestCase
358
394
  end
359
395
  assert_equal(<<-TABLE, sliced_table.to_s)
360
396
  count visible
361
- 0 1 true
362
- 1 8 true
363
- 2 16 true
364
- 3 32 false
365
- 4 256 true
397
+ 0
398
+ 1 1 true
399
+ 2
400
+ 3 8 true
401
+ 4 16 true
402
+ 5 32 false
403
+ 6
404
+ 7
405
+ 8 256 true
366
406
  TABLE
367
407
  end
368
408
 
@@ -372,9 +412,13 @@ class SlicerTest < Test::Unit::TestCase
372
412
  end
373
413
  assert_equal(<<-TABLE, sliced_table.to_s)
374
414
  count visible
375
- 0 1 true
376
- 1 8 true
377
- 2 32 false
415
+ 0
416
+ 1 1 true
417
+ 2
418
+ 3 8 true
419
+ 4 32 false
420
+ 5
421
+ 6
378
422
  TABLE
379
423
  end
380
424
 
@@ -28,12 +28,12 @@ class SparseUnionDataTypeTest < Test::Unit::TestCase
28
28
  end
29
29
 
30
30
  test("ordered arguments") do
31
- assert_equal("union[sparse]<visible: bool=2, count: int32=9>",
31
+ assert_equal("sparse_union<visible: bool=2, count: int32=9>",
32
32
  Arrow::SparseUnionDataType.new(@fields, [2, 9]).to_s)
33
33
  end
34
34
 
35
35
  test("description") do
36
- assert_equal("union[sparse]<visible: bool=2, count: int32=9>",
36
+ assert_equal("sparse_union<visible: bool=2, count: int32=9>",
37
37
  Arrow::SparseUnionDataType.new(fields: @fields,
38
38
  type_codes: [2, 9]).to_s)
39
39
  end
@@ -38,10 +38,12 @@ class StructArrayBuilderTest < Test::Unit::TestCase
38
38
 
39
39
  test("Array") do
40
40
  @builder.append_value([true, 1])
41
+ @builder.append_value([])
42
+ @builder.append_value([false])
41
43
  array = @builder.finish
42
44
  assert_equal([
43
- [true],
44
- [1],
45
+ [true, nil, false],
46
+ [1, nil, nil],
45
47
  ],
46
48
  [
47
49
  array.find_field(0).to_a,
@@ -66,10 +68,12 @@ class StructArrayBuilderTest < Test::Unit::TestCase
66
68
 
67
69
  test("Hash") do
68
70
  @builder.append_value(count: 1, visible: true)
71
+ @builder.append_value(visible: false)
72
+ @builder.append_value(count: 2)
69
73
  array = @builder.finish
70
74
  assert_equal([
71
- [true],
72
- [1],
75
+ [true, false, nil],
76
+ [1, nil, 2],
73
77
  ],
74
78
  [
75
79
  array.find_field(0).to_a,
@@ -54,11 +54,12 @@ class TableTest < Test::Unit::TestCase
54
54
  target_rows = Arrow::BooleanArray.new(target_rows_raw)
55
55
  assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
56
56
  count visible
57
- 0 2 false
58
- 1 4
59
- 2 16 true
60
- 3 64
61
- 4 128
57
+ 0
58
+ 1 2 false
59
+ 2 4
60
+ 3 16 true
61
+ 4 64
62
+ 5 128
62
63
  TABLE
63
64
  end
64
65
 
@@ -66,11 +67,12 @@ class TableTest < Test::Unit::TestCase
66
67
  target_rows_raw = [nil, true, true, false, true, false, true, true]
67
68
  assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
68
69
  count visible
69
- 0 2 false
70
- 1 4
71
- 2 16 true
72
- 3 64
73
- 4 128
70
+ 0
71
+ 1 2 false
72
+ 2 4
73
+ 3 16 true
74
+ 4 64
75
+ 5 128
74
76
  TABLE
75
77
  end
76
78
 
@@ -436,12 +438,24 @@ class TableTest < Test::Unit::TestCase
436
438
  assert_equal(@table, Arrow::Table.load(output))
437
439
  end
438
440
 
441
+ def test_arrow_file
442
+ output = create_output(".arrow")
443
+ @table.save(output, format: :arrow_file)
444
+ assert_equal(@table, Arrow::Table.load(output, format: :arrow_file))
445
+ end
446
+
439
447
  def test_batch
440
448
  output = create_output(".arrow")
441
449
  @table.save(output, format: :batch)
442
450
  assert_equal(@table, Arrow::Table.load(output, format: :batch))
443
451
  end
444
452
 
453
+ def test_arrow_streaming
454
+ output = create_output(".arrow")
455
+ @table.save(output, format: :arrow_streaming)
456
+ assert_equal(@table, Arrow::Table.load(output, format: :arrow_streaming))
457
+ end
458
+
445
459
  def test_stream
446
460
  output = create_output(".arrow")
447
461
  @table.save(output, format: :stream)
@@ -468,6 +482,15 @@ class TableTest < Test::Unit::TestCase
468
482
  compression: :gzip,
469
483
  schema: @table.schema))
470
484
  end
485
+
486
+ def test_tsv
487
+ output = create_output(".tsv")
488
+ @table.save(output, format: :tsv)
489
+ assert_equal(@table,
490
+ Arrow::Table.load(output,
491
+ format: :tsv,
492
+ schema: @table.schema))
493
+ end
471
494
  end
472
495
 
473
496
  sub_test_case("path") do
@@ -498,18 +521,27 @@ class TableTest < Test::Unit::TestCase
498
521
  compression: :gzip,
499
522
  schema: @table.schema))
500
523
  end
524
+
525
+ test("tsv") do
526
+ output = create_output(".tsv")
527
+ @table.save(output)
528
+ assert_equal(@table,
529
+ Arrow::Table.load(output,
530
+ format: :tsv,
531
+ schema: @table.schema))
532
+ end
501
533
  end
502
534
 
503
535
  sub_test_case("load: auto detect") do
504
- test("batch") do
536
+ test("arrow: file") do
505
537
  output = create_output(".arrow")
506
- @table.save(output, format: :batch)
538
+ @table.save(output, format: :arrow_file)
507
539
  assert_equal(@table, Arrow::Table.load(output))
508
540
  end
509
541
 
510
- test("stream") do
542
+ test("arrow: streaming") do
511
543
  output = create_output(".arrow")
512
- @table.save(output, format: :stream)
544
+ @table.save(output, format: :arrow_streaming)
513
545
  assert_equal(@table, Arrow::Table.load(output))
514
546
  end
515
547
 
@@ -539,6 +571,24 @@ chris,-1
539
571
  name score
540
572
  0 alice 10
541
573
  1 bob 29
574
+ 2 chris -1
575
+ TABLE
576
+ end
577
+
578
+ test("tsv") do
579
+ file = Tempfile.new(["red-arrow", ".tsv"])
580
+ file.puts(<<-TSV)
581
+ name\tscore
582
+ alice\t10
583
+ bob\t29
584
+ chris\t-1
585
+ TSV
586
+ file.close
587
+ table = Arrow::Table.load(file.path)
588
+ assert_equal(<<-TABLE, table.to_s)
589
+ name score
590
+ 0 alice 10
591
+ 1 bob 29
542
592
  2 chris -1
543
593
  TABLE
544
594
  end
@@ -646,4 +696,93 @@ visible: false
646
696
  end
647
697
  end
648
698
  end
699
+
700
+ sub_test_case("#filter") do
701
+ def setup
702
+ super
703
+ @options = Arrow::FilterOptions.new
704
+ @options.null_selection_behavior = :emit_null
705
+ end
706
+
707
+ test("Array: boolean") do
708
+ filter = [nil, true, true, false, true, false, true, true]
709
+ assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
710
+ count visible
711
+ 0
712
+ 1 2 false
713
+ 2 4
714
+ 3 16 true
715
+ 4 64
716
+ 5 128
717
+ TABLE
718
+ end
719
+
720
+ test("Arrow::BooleanArray") do
721
+ array = [nil, true, true, false, true, false, true, true]
722
+ filter = Arrow::BooleanArray.new(array)
723
+ assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
724
+ count visible
725
+ 0
726
+ 1 2 false
727
+ 2 4
728
+ 3 16 true
729
+ 4 64
730
+ 5 128
731
+ TABLE
732
+ end
733
+
734
+ test("Arrow::ChunkedArray") do
735
+ filter_chunks = [
736
+ Arrow::BooleanArray.new([nil, true, true]),
737
+ Arrow::BooleanArray.new([false, true, false]),
738
+ Arrow::BooleanArray.new([true, true]),
739
+ ]
740
+ filter = Arrow::ChunkedArray.new(filter_chunks)
741
+ assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
742
+ count visible
743
+ 0
744
+ 1 2 false
745
+ 2 4
746
+ 3 16 true
747
+ 4 64
748
+ 5 128
749
+ TABLE
750
+ end
751
+ end
752
+
753
+ sub_test_case("#take") do
754
+ test("Arrow: boolean") do
755
+ indices = [1, 0, 2]
756
+ assert_equal(<<-TABLE, @table.take(indices).to_s)
757
+ count visible
758
+ 0 2 false
759
+ 1 1 true
760
+ 2 4
761
+ TABLE
762
+ end
763
+
764
+ test("Arrow::Array") do
765
+ indices = Arrow::Int16Array.new([1, 0, 2])
766
+ assert_equal(<<-TABLE, @table.take(indices).to_s)
767
+ count visible
768
+ 0 2 false
769
+ 1 1 true
770
+ 2 4
771
+ TABLE
772
+ end
773
+
774
+ test("Arrow::ChunkedArray") do
775
+ chunks = [
776
+ Arrow::Int16Array.new([1, 0]),
777
+ Arrow::Int16Array.new([2])
778
+ ]
779
+ indices = Arrow::ChunkedArray.new(chunks)
780
+ assert_equal(<<-TABLE, @table.take(indices).to_s)
781
+ count visible
782
+ 0 2 false
783
+ 1 1 true
784
+ 2 4
785
+ TABLE
786
+ end
787
+ end
649
788
  end