red-arrow 0.15.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +63 -33
  4. data/ext/arrow/raw-records.cpp +2 -1
  5. data/ext/arrow/values.cpp +2 -1
  6. data/lib/arrow/array-builder.rb +101 -52
  7. data/lib/arrow/array.rb +28 -10
  8. data/lib/arrow/{binary-array-builder.rb → buffer.rb} +7 -15
  9. data/lib/arrow/chunked-array.rb +2 -0
  10. data/lib/arrow/csv-loader.rb +15 -3
  11. data/lib/arrow/csv-read-options.rb +18 -0
  12. data/lib/arrow/data-type.rb +35 -2
  13. data/lib/arrow/decimal128-array-builder.rb +0 -2
  14. data/lib/arrow/dictionary-array.rb +24 -0
  15. data/lib/arrow/field.rb +1 -1
  16. data/lib/arrow/generic-filterable.rb +43 -0
  17. data/lib/arrow/generic-takeable.rb +38 -0
  18. data/lib/arrow/list-data-type.rb +58 -8
  19. data/lib/arrow/loader.rb +12 -1
  20. data/lib/arrow/null-array-builder.rb +1 -1
  21. data/lib/arrow/null-array.rb +24 -0
  22. data/lib/arrow/raw-table-converter.rb +47 -0
  23. data/lib/arrow/record-batch-iterator.rb +22 -0
  24. data/lib/arrow/record-batch.rb +8 -3
  25. data/lib/arrow/schema.rb +5 -2
  26. data/lib/arrow/struct-array-builder.rb +13 -7
  27. data/lib/arrow/struct-data-type.rb +0 -2
  28. data/lib/arrow/table-loader.rb +29 -6
  29. data/lib/arrow/table-saver.rb +37 -13
  30. data/lib/arrow/table.rb +20 -73
  31. data/lib/arrow/version.rb +1 -1
  32. data/red-arrow.gemspec +4 -2
  33. data/test/helper.rb +1 -0
  34. data/test/helper/omittable.rb +36 -0
  35. data/test/raw-records/test-dense-union-array.rb +1 -34
  36. data/test/raw-records/test-sparse-union-array.rb +1 -33
  37. data/test/run-test.rb +14 -3
  38. data/test/test-array-builder.rb +17 -0
  39. data/test/test-array.rb +104 -0
  40. data/test/test-buffer.rb +11 -0
  41. data/test/test-chunked-array.rb +96 -0
  42. data/test/test-csv-loader.rb +77 -2
  43. data/test/test-data-type.rb +11 -0
  44. data/test/test-dense-union-data-type.rb +2 -2
  45. data/test/test-dictionary-array.rb +41 -0
  46. data/test/test-feather.rb +21 -6
  47. data/test/test-list-data-type.rb +27 -1
  48. data/test/test-null-array.rb +23 -0
  49. data/test/test-record-batch-iterator.rb +37 -0
  50. data/test/test-record-batch.rb +14 -0
  51. data/test/test-schema.rb +16 -0
  52. data/test/test-slicer.rb +74 -30
  53. data/test/test-sparse-union-data-type.rb +2 -2
  54. data/test/test-struct-array-builder.rb +8 -4
  55. data/test/test-table.rb +153 -14
  56. data/test/test-timestamp-array.rb +19 -0
  57. data/test/values/test-dense-union-array.rb +1 -34
  58. data/test/values/test-sparse-union-array.rb +1 -33
  59. metadata +76 -63
@@ -54,11 +54,12 @@ class TableTest < Test::Unit::TestCase
54
54
  target_rows = Arrow::BooleanArray.new(target_rows_raw)
55
55
  assert_equal(<<-TABLE, @table.slice(target_rows).to_s)
56
56
  count visible
57
- 0 2 false
58
- 1 4
59
- 2 16 true
60
- 3 64
61
- 4 128
57
+ 0
58
+ 1 2 false
59
+ 2 4
60
+ 3 16 true
61
+ 4 64
62
+ 5 128
62
63
  TABLE
63
64
  end
64
65
 
@@ -66,11 +67,12 @@ class TableTest < Test::Unit::TestCase
66
67
  target_rows_raw = [nil, true, true, false, true, false, true, true]
67
68
  assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s)
68
69
  count visible
69
- 0 2 false
70
- 1 4
71
- 2 16 true
72
- 3 64
73
- 4 128
70
+ 0
71
+ 1 2 false
72
+ 2 4
73
+ 3 16 true
74
+ 4 64
75
+ 5 128
74
76
  TABLE
75
77
  end
76
78
 
@@ -436,12 +438,24 @@ class TableTest < Test::Unit::TestCase
436
438
  assert_equal(@table, Arrow::Table.load(output))
437
439
  end
438
440
 
441
+ def test_arrow_file
442
+ output = create_output(".arrow")
443
+ @table.save(output, format: :arrow_file)
444
+ assert_equal(@table, Arrow::Table.load(output, format: :arrow_file))
445
+ end
446
+
439
447
  def test_batch
440
448
  output = create_output(".arrow")
441
449
  @table.save(output, format: :batch)
442
450
  assert_equal(@table, Arrow::Table.load(output, format: :batch))
443
451
  end
444
452
 
453
+ def test_arrow_streaming
454
+ output = create_output(".arrow")
455
+ @table.save(output, format: :arrow_streaming)
456
+ assert_equal(@table, Arrow::Table.load(output, format: :arrow_streaming))
457
+ end
458
+
445
459
  def test_stream
446
460
  output = create_output(".arrow")
447
461
  @table.save(output, format: :stream)
@@ -468,6 +482,15 @@ class TableTest < Test::Unit::TestCase
468
482
  compression: :gzip,
469
483
  schema: @table.schema))
470
484
  end
485
+
486
+ def test_tsv
487
+ output = create_output(".tsv")
488
+ @table.save(output, format: :tsv)
489
+ assert_equal(@table,
490
+ Arrow::Table.load(output,
491
+ format: :tsv,
492
+ schema: @table.schema))
493
+ end
471
494
  end
472
495
 
473
496
  sub_test_case("path") do
@@ -498,18 +521,27 @@ class TableTest < Test::Unit::TestCase
498
521
  compression: :gzip,
499
522
  schema: @table.schema))
500
523
  end
524
+
525
+ test("tsv") do
526
+ output = create_output(".tsv")
527
+ @table.save(output)
528
+ assert_equal(@table,
529
+ Arrow::Table.load(output,
530
+ format: :tsv,
531
+ schema: @table.schema))
532
+ end
501
533
  end
502
534
 
503
535
  sub_test_case("load: auto detect") do
504
- test("batch") do
536
+ test("arrow: file") do
505
537
  output = create_output(".arrow")
506
- @table.save(output, format: :batch)
538
+ @table.save(output, format: :arrow_file)
507
539
  assert_equal(@table, Arrow::Table.load(output))
508
540
  end
509
541
 
510
- test("stream") do
542
+ test("arrow: streaming") do
511
543
  output = create_output(".arrow")
512
- @table.save(output, format: :stream)
544
+ @table.save(output, format: :arrow_streaming)
513
545
  assert_equal(@table, Arrow::Table.load(output))
514
546
  end
515
547
 
@@ -539,6 +571,24 @@ chris,-1
539
571
  name score
540
572
  0 alice 10
541
573
  1 bob 29
574
+ 2 chris -1
575
+ TABLE
576
+ end
577
+
578
+ test("tsv") do
579
+ file = Tempfile.new(["red-arrow", ".tsv"])
580
+ file.puts(<<-TSV)
581
+ name\tscore
582
+ alice\t10
583
+ bob\t29
584
+ chris\t-1
585
+ TSV
586
+ file.close
587
+ table = Arrow::Table.load(file.path)
588
+ assert_equal(<<-TABLE, table.to_s)
589
+ name score
590
+ 0 alice 10
591
+ 1 bob 29
542
592
  2 chris -1
543
593
  TABLE
544
594
  end
@@ -646,4 +696,93 @@ visible: false
646
696
  end
647
697
  end
648
698
  end
699
+
700
+ sub_test_case("#filter") do
701
+ def setup
702
+ super
703
+ @options = Arrow::FilterOptions.new
704
+ @options.null_selection_behavior = :emit_null
705
+ end
706
+
707
+ test("Array: boolean") do
708
+ filter = [nil, true, true, false, true, false, true, true]
709
+ assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
710
+ count visible
711
+ 0
712
+ 1 2 false
713
+ 2 4
714
+ 3 16 true
715
+ 4 64
716
+ 5 128
717
+ TABLE
718
+ end
719
+
720
+ test("Arrow::BooleanArray") do
721
+ array = [nil, true, true, false, true, false, true, true]
722
+ filter = Arrow::BooleanArray.new(array)
723
+ assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
724
+ count visible
725
+ 0
726
+ 1 2 false
727
+ 2 4
728
+ 3 16 true
729
+ 4 64
730
+ 5 128
731
+ TABLE
732
+ end
733
+
734
+ test("Arrow::ChunkedArray") do
735
+ filter_chunks = [
736
+ Arrow::BooleanArray.new([nil, true, true]),
737
+ Arrow::BooleanArray.new([false, true, false]),
738
+ Arrow::BooleanArray.new([true, true]),
739
+ ]
740
+ filter = Arrow::ChunkedArray.new(filter_chunks)
741
+ assert_equal(<<-TABLE, @table.filter(filter, @options).to_s)
742
+ count visible
743
+ 0
744
+ 1 2 false
745
+ 2 4
746
+ 3 16 true
747
+ 4 64
748
+ 5 128
749
+ TABLE
750
+ end
751
+ end
752
+
753
+ sub_test_case("#take") do
754
+ test("Arrow: boolean") do
755
+ indices = [1, 0, 2]
756
+ assert_equal(<<-TABLE, @table.take(indices).to_s)
757
+ count visible
758
+ 0 2 false
759
+ 1 1 true
760
+ 2 4
761
+ TABLE
762
+ end
763
+
764
+ test("Arrow::Array") do
765
+ indices = Arrow::Int16Array.new([1, 0, 2])
766
+ assert_equal(<<-TABLE, @table.take(indices).to_s)
767
+ count visible
768
+ 0 2 false
769
+ 1 1 true
770
+ 2 4
771
+ TABLE
772
+ end
773
+
774
+ test("Arrow::ChunkedArray") do
775
+ chunks = [
776
+ Arrow::Int16Array.new([1, 0]),
777
+ Arrow::Int16Array.new([2])
778
+ ]
779
+ indices = Arrow::ChunkedArray.new(chunks)
780
+ assert_equal(<<-TABLE, @table.take(indices).to_s)
781
+ count visible
782
+ 0 2 false
783
+ 1 1 true
784
+ 2 4
785
+ TABLE
786
+ end
787
+ end
649
788
  end
@@ -23,4 +23,23 @@ class TimestampArrayTest < Test::Unit::TestCase
23
23
  time = Time.at(sec, usec)
24
24
  assert_equal(time, array[0])
25
25
  end
26
+
27
+ sub_test_case("#is_in") do
28
+ def setup
29
+ values = [
30
+ Time.parse("2019-11-18T00:09:11"),
31
+ Time.parse("2019-11-18T00:09:12"),
32
+ Time.parse("2019-11-18T00:09:13"),
33
+ ]
34
+ @array = Arrow::TimestampArray.new(:micro, values)
35
+ end
36
+
37
+ test("Arrow: Array") do
38
+ right = [
39
+ Time.parse("2019-11-18T00:09:12"),
40
+ ]
41
+ assert_equal(Arrow::BooleanArray.new([false, true, false]),
42
+ @array.is_in(right))
43
+ end
44
+ end
26
45
  end
@@ -48,10 +48,7 @@ module ValuesDenseUnionArrayTests
48
48
  sub_record_batch.columns[0].data
49
49
  end
50
50
  values.each do |value|
51
- if value.nil?
52
- type_ids << nil
53
- offsets << 0
54
- elsif value.key?("0")
51
+ if value.key?("0")
55
52
  type_id = type_codes[0]
56
53
  type_ids << type_id
57
54
  offsets << (type_ids.count(type_id) - 1)
@@ -70,7 +67,6 @@ module ValuesDenseUnionArrayTests
70
67
  def test_null
71
68
  values = [
72
69
  {"0" => nil},
73
- nil,
74
70
  ]
75
71
  target = build(:null, values)
76
72
  assert_equal(values, target.values)
@@ -79,7 +75,6 @@ module ValuesDenseUnionArrayTests
79
75
  def test_boolean
80
76
  values = [
81
77
  {"0" => true},
82
- nil,
83
78
  {"1" => nil},
84
79
  ]
85
80
  target = build(:boolean, values)
@@ -89,7 +84,6 @@ module ValuesDenseUnionArrayTests
89
84
  def test_int8
90
85
  values = [
91
86
  {"0" => -(2 ** 7)},
92
- nil,
93
87
  {"1" => nil},
94
88
  ]
95
89
  target = build(:int8, values)
@@ -99,7 +93,6 @@ module ValuesDenseUnionArrayTests
99
93
  def test_uint8
100
94
  values = [
101
95
  {"0" => (2 ** 8) - 1},
102
- nil,
103
96
  {"1" => nil},
104
97
  ]
105
98
  target = build(:uint8, values)
@@ -109,7 +102,6 @@ module ValuesDenseUnionArrayTests
109
102
  def test_int16
110
103
  values = [
111
104
  {"0" => -(2 ** 15)},
112
- nil,
113
105
  {"1" => nil},
114
106
  ]
115
107
  target = build(:int16, values)
@@ -119,7 +111,6 @@ module ValuesDenseUnionArrayTests
119
111
  def test_uint16
120
112
  values = [
121
113
  {"0" => (2 ** 16) - 1},
122
- nil,
123
114
  {"1" => nil},
124
115
  ]
125
116
  target = build(:uint16, values)
@@ -129,7 +120,6 @@ module ValuesDenseUnionArrayTests
129
120
  def test_int32
130
121
  values = [
131
122
  {"0" => -(2 ** 31)},
132
- nil,
133
123
  {"1" => nil},
134
124
  ]
135
125
  target = build(:int32, values)
@@ -139,7 +129,6 @@ module ValuesDenseUnionArrayTests
139
129
  def test_uint32
140
130
  values = [
141
131
  {"0" => (2 ** 32) - 1},
142
- nil,
143
132
  {"1" => nil},
144
133
  ]
145
134
  target = build(:uint32, values)
@@ -149,7 +138,6 @@ module ValuesDenseUnionArrayTests
149
138
  def test_int64
150
139
  values = [
151
140
  {"0" => -(2 ** 63)},
152
- nil,
153
141
  {"1" => nil},
154
142
  ]
155
143
  target = build(:int64, values)
@@ -159,7 +147,6 @@ module ValuesDenseUnionArrayTests
159
147
  def test_uint64
160
148
  values = [
161
149
  {"0" => (2 ** 64) - 1},
162
- nil,
163
150
  {"1" => nil},
164
151
  ]
165
152
  target = build(:uint64, values)
@@ -169,7 +156,6 @@ module ValuesDenseUnionArrayTests
169
156
  def test_float
170
157
  values = [
171
158
  {"0" => -1.0},
172
- nil,
173
159
  {"1" => nil},
174
160
  ]
175
161
  target = build(:float, values)
@@ -179,7 +165,6 @@ module ValuesDenseUnionArrayTests
179
165
  def test_double
180
166
  values = [
181
167
  {"0" => -1.0},
182
- nil,
183
168
  {"1" => nil},
184
169
  ]
185
170
  target = build(:double, values)
@@ -189,7 +174,6 @@ module ValuesDenseUnionArrayTests
189
174
  def test_binary
190
175
  values = [
191
176
  {"0" => "\xff".b},
192
- nil,
193
177
  {"1" => nil},
194
178
  ]
195
179
  target = build(:binary, values)
@@ -199,7 +183,6 @@ module ValuesDenseUnionArrayTests
199
183
  def test_string
200
184
  values = [
201
185
  {"0" => "Ruby"},
202
- nil,
203
186
  {"1" => nil},
204
187
  ]
205
188
  target = build(:string, values)
@@ -209,7 +192,6 @@ module ValuesDenseUnionArrayTests
209
192
  def test_date32
210
193
  values = [
211
194
  {"0" => Date.new(1960, 1, 1)},
212
- nil,
213
195
  {"1" => nil},
214
196
  ]
215
197
  target = build(:date32, values)
@@ -219,7 +201,6 @@ module ValuesDenseUnionArrayTests
219
201
  def test_date64
220
202
  values = [
221
203
  {"0" => DateTime.new(1960, 1, 1, 2, 9, 30)},
222
- nil,
223
204
  {"1" => nil},
224
205
  ]
225
206
  target = build(:date64, values)
@@ -229,7 +210,6 @@ module ValuesDenseUnionArrayTests
229
210
  def test_timestamp_second
230
211
  values = [
231
212
  {"0" => Time.parse("1960-01-01T02:09:30Z")},
232
- nil,
233
213
  {"1" => nil},
234
214
  ]
235
215
  target = build({
@@ -243,7 +223,6 @@ module ValuesDenseUnionArrayTests
243
223
  def test_timestamp_milli
244
224
  values = [
245
225
  {"0" => Time.parse("1960-01-01T02:09:30.123Z")},
246
- nil,
247
226
  {"1" => nil},
248
227
  ]
249
228
  target = build({
@@ -257,7 +236,6 @@ module ValuesDenseUnionArrayTests
257
236
  def test_timestamp_micro
258
237
  values = [
259
238
  {"0" => Time.parse("1960-01-01T02:09:30.123456Z")},
260
- nil,
261
239
  {"1" => nil},
262
240
  ]
263
241
  target = build({
@@ -271,7 +249,6 @@ module ValuesDenseUnionArrayTests
271
249
  def test_timestamp_nano
272
250
  values = [
273
251
  {"0" => Time.parse("1960-01-01T02:09:30.123456789Z")},
274
- nil,
275
252
  {"1" => nil},
276
253
  ]
277
254
  target = build({
@@ -287,7 +264,6 @@ module ValuesDenseUnionArrayTests
287
264
  values = [
288
265
  # 00:10:00
289
266
  {"0" => Arrow::Time.new(unit, 60 * 10)},
290
- nil,
291
267
  {"1" => nil},
292
268
  ]
293
269
  target = build({
@@ -303,7 +279,6 @@ module ValuesDenseUnionArrayTests
303
279
  values = [
304
280
  # 00:10:00.123
305
281
  {"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)},
306
- nil,
307
282
  {"1" => nil},
308
283
  ]
309
284
  target = build({
@@ -319,7 +294,6 @@ module ValuesDenseUnionArrayTests
319
294
  values = [
320
295
  # 00:10:00.123456
321
296
  {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)},
322
- nil,
323
297
  {"1" => nil},
324
298
  ]
325
299
  target = build({
@@ -335,7 +309,6 @@ module ValuesDenseUnionArrayTests
335
309
  values = [
336
310
  # 00:10:00.123456789
337
311
  {"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)},
338
- nil,
339
312
  {"1" => nil},
340
313
  ]
341
314
  target = build({
@@ -349,7 +322,6 @@ module ValuesDenseUnionArrayTests
349
322
  def test_decimal128
350
323
  values = [
351
324
  {"0" => BigDecimal("92.92")},
352
- nil,
353
325
  {"1" => nil},
354
326
  ]
355
327
  target = build({
@@ -364,7 +336,6 @@ module ValuesDenseUnionArrayTests
364
336
  def test_list
365
337
  values = [
366
338
  {"0" => [true, nil, false]},
367
- nil,
368
339
  {"1" => nil},
369
340
  ]
370
341
  target = build({
@@ -381,7 +352,6 @@ module ValuesDenseUnionArrayTests
381
352
  def test_struct
382
353
  values = [
383
354
  {"0" => {"sub_field" => true}},
384
- nil,
385
355
  {"1" => nil},
386
356
  {"0" => {"sub_field" => nil}},
387
357
  ]
@@ -402,7 +372,6 @@ module ValuesDenseUnionArrayTests
402
372
  omit("Need to add support for SparseUnionArrayBuilder")
403
373
  values = [
404
374
  {"0" => {"field1" => true}},
405
- nil,
406
375
  {"1" => nil},
407
376
  {"0" => {"field2" => nil}},
408
377
  ]
@@ -428,7 +397,6 @@ module ValuesDenseUnionArrayTests
428
397
  omit("Need to add support for DenseUnionArrayBuilder")
429
398
  values = [
430
399
  {"0" => {"field1" => true}},
431
- nil,
432
400
  {"1" => nil},
433
401
  {"0" => {"field2" => nil}},
434
402
  ]
@@ -454,7 +422,6 @@ module ValuesDenseUnionArrayTests
454
422
  omit("Need to add support for DictionaryArrayBuilder")
455
423
  values = [
456
424
  {"0" => "Ruby"},
457
- nil,
458
425
  {"1" => nil},
459
426
  {"0" => "GLib"},
460
427
  ]