red-arrow 0.15.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +63 -33
  4. data/ext/arrow/raw-records.cpp +2 -1
  5. data/ext/arrow/values.cpp +2 -1
  6. data/lib/arrow/array-builder.rb +101 -52
  7. data/lib/arrow/array.rb +28 -10
  8. data/lib/arrow/{binary-array-builder.rb → buffer.rb} +7 -15
  9. data/lib/arrow/chunked-array.rb +2 -0
  10. data/lib/arrow/csv-loader.rb +15 -3
  11. data/lib/arrow/csv-read-options.rb +18 -0
  12. data/lib/arrow/data-type.rb +35 -2
  13. data/lib/arrow/decimal128-array-builder.rb +0 -2
  14. data/lib/arrow/dictionary-array.rb +24 -0
  15. data/lib/arrow/field.rb +1 -1
  16. data/lib/arrow/generic-filterable.rb +43 -0
  17. data/lib/arrow/generic-takeable.rb +38 -0
  18. data/lib/arrow/list-data-type.rb +58 -8
  19. data/lib/arrow/loader.rb +12 -1
  20. data/lib/arrow/null-array-builder.rb +1 -1
  21. data/lib/arrow/null-array.rb +24 -0
  22. data/lib/arrow/raw-table-converter.rb +47 -0
  23. data/lib/arrow/record-batch-iterator.rb +22 -0
  24. data/lib/arrow/record-batch.rb +8 -3
  25. data/lib/arrow/schema.rb +5 -2
  26. data/lib/arrow/struct-array-builder.rb +13 -7
  27. data/lib/arrow/struct-data-type.rb +0 -2
  28. data/lib/arrow/table-loader.rb +29 -6
  29. data/lib/arrow/table-saver.rb +37 -13
  30. data/lib/arrow/table.rb +20 -73
  31. data/lib/arrow/version.rb +1 -1
  32. data/red-arrow.gemspec +4 -2
  33. data/test/helper.rb +1 -0
  34. data/test/helper/omittable.rb +36 -0
  35. data/test/raw-records/test-dense-union-array.rb +1 -34
  36. data/test/raw-records/test-sparse-union-array.rb +1 -33
  37. data/test/run-test.rb +14 -3
  38. data/test/test-array-builder.rb +17 -0
  39. data/test/test-array.rb +104 -0
  40. data/test/test-buffer.rb +11 -0
  41. data/test/test-chunked-array.rb +96 -0
  42. data/test/test-csv-loader.rb +77 -2
  43. data/test/test-data-type.rb +11 -0
  44. data/test/test-dense-union-data-type.rb +2 -2
  45. data/test/test-dictionary-array.rb +41 -0
  46. data/test/test-feather.rb +21 -6
  47. data/test/test-list-data-type.rb +27 -1
  48. data/test/test-null-array.rb +23 -0
  49. data/test/test-record-batch-iterator.rb +37 -0
  50. data/test/test-record-batch.rb +14 -0
  51. data/test/test-schema.rb +16 -0
  52. data/test/test-slicer.rb +74 -30
  53. data/test/test-sparse-union-data-type.rb +2 -2
  54. data/test/test-struct-array-builder.rb +8 -4
  55. data/test/test-table.rb +153 -14
  56. data/test/test-timestamp-array.rb +19 -0
  57. data/test/values/test-dense-union-array.rb +1 -34
  58. data/test/values/test-sparse-union-array.rb +1 -33
  59. metadata +76 -63
@@ -18,17 +18,32 @@
18
18
  class FeatherTest < Test::Unit::TestCase
19
19
  include Helper::Fixture
20
20
 
21
- def test_save_load
21
+ def setup
22
22
  columns = {
23
23
  "message" => Arrow::StringArray.new(["Start", "Crash", "Shutdown"]),
24
24
  "is_critical" => Arrow::BooleanArray.new([false, true, false]),
25
25
  }
26
- table = Arrow::Table.new(columns)
26
+ @table = Arrow::Table.new(columns)
27
27
 
28
- output = Tempfile.new(["red-arrow", ".feather"])
29
- table.save(output.path)
30
- output.close
28
+ @output = Tempfile.new(["red-arrow", ".feather"])
29
+ begin
30
+ yield(@output)
31
+ ensure
32
+ @output.close!
33
+ end
34
+ end
35
+
36
+ def test_default
37
+ @table.save(@output.path)
38
+ @output.close
39
+
40
+ assert_equal(@table, Arrow::Table.load(@output.path))
41
+ end
42
+
43
+ def test_compression
44
+ @table.save(@output.path, compression: :zstd)
45
+ @output.close
31
46
 
32
- assert_equal(table, Arrow::Table.load(output.path))
47
+ assert_equal(@table, Arrow::Table.load(@output.path))
33
48
  end
34
49
  end
@@ -23,7 +23,7 @@ class ListDataTypeTest < Test::Unit::TestCase
23
23
  Arrow::ListDataType.new(field).to_s)
24
24
  end
25
25
 
26
- test("Hash") do
26
+ test("name: String") do
27
27
  assert_equal("list<tag: string>",
28
28
  Arrow::ListDataType.new(name: "tag", type: :string).to_s)
29
29
  end
@@ -39,5 +39,31 @@ class ListDataTypeTest < Test::Unit::TestCase
39
39
  assert_equal("list<tag: string>",
40
40
  Arrow::ListDataType.new(field: field_description).to_s)
41
41
  end
42
+
43
+ test("Arrow::DataType") do
44
+ data_type = Arrow::BooleanDataType.new
45
+ assert_equal("list<item: bool>",
46
+ Arrow::ListDataType.new(data_type).to_s)
47
+ end
48
+
49
+ test("String") do
50
+ assert_equal("list<item: bool>",
51
+ Arrow::ListDataType.new("boolean").to_s)
52
+ end
53
+
54
+ test("Symbol") do
55
+ assert_equal("list<item: bool>",
56
+ Arrow::ListDataType.new(:boolean).to_s)
57
+ end
58
+
59
+ test("[data type name, additional information]") do
60
+ assert_equal("list<item: time32[ms]>",
61
+ Arrow::ListDataType.new([:time32, :milli]).to_s)
62
+ end
63
+
64
+ test("type: Symbol") do
65
+ assert_equal("list<item: bool>",
66
+ Arrow::ListDataType.new(type: :boolean).to_s)
67
+ end
42
68
  end
43
69
  end
@@ -0,0 +1,23 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class NullArrayTest < Test::Unit::TestCase
19
+ test("#[]") do
20
+ array = Arrow::NullArray.new(1)
21
+ assert_nil(array[0])
22
+ end
23
+ end
@@ -0,0 +1,37 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RecordBatchIteratorTest < Test::Unit::TestCase
19
+ def setup
20
+ @schema = Arrow::Schema.new(visible: :boolean,
21
+ count: :uint32)
22
+ @record_batches = [
23
+ Arrow::RecordBatch.new(@schema,
24
+ visible: [true],
25
+ count: [1]),
26
+ Arrow::RecordBatch.new(@schema,
27
+ visible: [false, nil],
28
+ count: [nil, 3]),
29
+ ]
30
+ @iterator = Arrow::RecordBatchIterator.new(@record_batches)
31
+ end
32
+
33
+ def test_to_a
34
+ assert_equal(@record_batches,
35
+ @iterator.to_a)
36
+ end
37
+ end
@@ -22,6 +22,20 @@ class RecordBatchTest < Test::Unit::TestCase
22
22
  count: :uint32)
23
23
  end
24
24
 
25
+ test("[raw_table]") do
26
+ raw_table = {
27
+ visible: [true, nil, false],
28
+ count: [1, nil, 3],
29
+ }
30
+ record_batch = Arrow::RecordBatch.new(raw_table)
31
+ assert_equal([
32
+ {"visible" => true, "count" => 1},
33
+ {"visible" => nil, "count" => nil},
34
+ {"visible" => false, "count" => 3},
35
+ ],
36
+ record_batch.each_record.collect(&:to_h))
37
+ end
38
+
25
39
  test("[Schema, records]") do
26
40
  records = [
27
41
  {visible: true, count: 1},
@@ -16,6 +16,8 @@
16
16
  # under the License.
17
17
 
18
18
  class SchemaTest < Test::Unit::TestCase
19
+ include Helper::Omittable
20
+
19
21
  def setup
20
22
  @count_field = Arrow::Field.new("count", :uint32)
21
23
  @visible_field = Arrow::Field.new("visible", :boolean)
@@ -114,5 +116,19 @@ class SchemaTest < Test::Unit::TestCase
114
116
  end
115
117
  end
116
118
  end
119
+
120
+ sub_test_case("#to_s") do
121
+ test("show_metadata") do
122
+ require_gi_bindings(3, 4, 2)
123
+
124
+ schema = @schema.with_metadata("key" => "value")
125
+ assert_equal(<<-SCHEMA.chomp, schema.to_s(show_metadata: true))
126
+ count: uint32
127
+ visible: bool
128
+ -- metadata --
129
+ key: value
130
+ SCHEMA
131
+ end
132
+ end
117
133
  end
118
134
  end
@@ -46,10 +46,14 @@ class SlicerTest < Test::Unit::TestCase
46
46
  end
47
47
  assert_equal(<<-TABLE, sliced_table.to_s)
48
48
  count visible
49
- 0 1 true
50
- 1 8 true
51
- 2 16 true
52
- 3 256 true
49
+ 0
50
+ 1 1 true
51
+ 2
52
+ 3 8 true
53
+ 4 16 true
54
+ 5
55
+ 6
56
+ 7 256 true
53
57
  TABLE
54
58
  end
55
59
 
@@ -66,7 +70,8 @@ class SlicerTest < Test::Unit::TestCase
66
70
  4 16 true
67
71
  5 32 false
68
72
  6 64
69
- 7 256 true
73
+ 7
74
+ 8 256 true
70
75
  TABLE
71
76
  end
72
77
  end
@@ -78,8 +83,12 @@ class SlicerTest < Test::Unit::TestCase
78
83
  end
79
84
  assert_equal(<<-TABLE, sliced_table.to_s)
80
85
  count visible
81
- 0 2 false
82
- 1 32 false
86
+ 0
87
+ 1 2 false
88
+ 2
89
+ 3 32 false
90
+ 4
91
+ 5
83
92
  TABLE
84
93
  end
85
94
 
@@ -90,6 +99,7 @@ class SlicerTest < Test::Unit::TestCase
90
99
  assert_equal(<<-TABLE, sliced_table.to_s)
91
100
  count visible
92
101
  0 0
102
+ 1
93
103
  TABLE
94
104
  end
95
105
  end
@@ -142,10 +152,14 @@ class SlicerTest < Test::Unit::TestCase
142
152
  end
143
153
  assert_equal(<<-TABLE, sliced_table.to_s)
144
154
  count visible
145
- 0 1 true
146
- 1 8 true
147
- 2 16 true
148
- 3 256 true
155
+ 0
156
+ 1 1 true
157
+ 2
158
+ 3 8 true
159
+ 4 16 true
160
+ 5
161
+ 6
162
+ 7 256 true
149
163
  TABLE
150
164
  end
151
165
  end
@@ -172,8 +186,12 @@ class SlicerTest < Test::Unit::TestCase
172
186
  end
173
187
  assert_equal(<<-TABLE, sliced_table.to_s)
174
188
  count visible
175
- 0 2 false
176
- 1 32 false
189
+ 0
190
+ 1 2 false
191
+ 2
192
+ 3 32 false
193
+ 4
194
+ 5
177
195
  TABLE
178
196
  end
179
197
  end
@@ -200,8 +218,12 @@ class SlicerTest < Test::Unit::TestCase
200
218
  end
201
219
  assert_equal(<<-TABLE, sliced_table.to_s)
202
220
  count visible
203
- 0 2 false
204
- 1 32 false
221
+ 0
222
+ 1 2 false
223
+ 2
224
+ 3 32 false
225
+ 4
226
+ 5
205
227
  TABLE
206
228
  end
207
229
  end
@@ -217,6 +239,7 @@ class SlicerTest < Test::Unit::TestCase
217
239
  2 2 false
218
240
  3 4
219
241
  4 8 true
242
+ 5
220
243
  TABLE
221
244
  end
222
245
 
@@ -229,7 +252,8 @@ class SlicerTest < Test::Unit::TestCase
229
252
  0 16 true
230
253
  1 32 false
231
254
  2 64
232
- 3 256 true
255
+ 3
256
+ 4 256 true
233
257
  TABLE
234
258
  end
235
259
 
@@ -245,6 +269,7 @@ class SlicerTest < Test::Unit::TestCase
245
269
  3 4
246
270
  4 8 true
247
271
  5 16 true
272
+ 6
248
273
  TABLE
249
274
  end
250
275
 
@@ -256,7 +281,8 @@ class SlicerTest < Test::Unit::TestCase
256
281
  count visible
257
282
  0 32 false
258
283
  1 64
259
- 2 256 true
284
+ 2
285
+ 3 256 true
260
286
  TABLE
261
287
  end
262
288
 
@@ -268,7 +294,8 @@ class SlicerTest < Test::Unit::TestCase
268
294
  count visible
269
295
  0 32 false
270
296
  1 64
271
- 2 256 true
297
+ 2
298
+ 3 256 true
272
299
  TABLE
273
300
  end
274
301
 
@@ -284,6 +311,7 @@ class SlicerTest < Test::Unit::TestCase
284
311
  3 4
285
312
  4 8 true
286
313
  5 16 true
314
+ 6
287
315
  TABLE
288
316
  end
289
317
 
@@ -296,7 +324,8 @@ class SlicerTest < Test::Unit::TestCase
296
324
  0 16 true
297
325
  1 32 false
298
326
  2 64
299
- 3 256 true
327
+ 3
328
+ 4 256 true
300
329
  TABLE
301
330
  end
302
331
 
@@ -311,6 +340,7 @@ class SlicerTest < Test::Unit::TestCase
311
340
  2 2 false
312
341
  3 4
313
342
  4 8 true
343
+ 5
314
344
  TABLE
315
345
  end
316
346
 
@@ -324,6 +354,7 @@ class SlicerTest < Test::Unit::TestCase
324
354
  1 4
325
355
  2 16 true
326
356
  3 64
357
+ 4
327
358
  TABLE
328
359
  end
329
360
 
@@ -337,7 +368,8 @@ class SlicerTest < Test::Unit::TestCase
337
368
  1 2 false
338
369
  2 8 true
339
370
  3 32 false
340
- 4 256 true
371
+ 4
372
+ 5 256 true
341
373
  TABLE
342
374
  end
343
375
 
@@ -347,8 +379,12 @@ class SlicerTest < Test::Unit::TestCase
347
379
  end
348
380
  assert_equal(<<-TABLE, sliced_table.to_s)
349
381
  count visible
350
- 0 16 true
351
- 1 256 true
382
+ 0
383
+ 1
384
+ 2 16 true
385
+ 3
386
+ 4
387
+ 5 256 true
352
388
  TABLE
353
389
  end
354
390
 
@@ -358,11 +394,15 @@ class SlicerTest < Test::Unit::TestCase
358
394
  end
359
395
  assert_equal(<<-TABLE, sliced_table.to_s)
360
396
  count visible
361
- 0 1 true
362
- 1 8 true
363
- 2 16 true
364
- 3 32 false
365
- 4 256 true
397
+ 0
398
+ 1 1 true
399
+ 2
400
+ 3 8 true
401
+ 4 16 true
402
+ 5 32 false
403
+ 6
404
+ 7
405
+ 8 256 true
366
406
  TABLE
367
407
  end
368
408
 
@@ -372,9 +412,13 @@ class SlicerTest < Test::Unit::TestCase
372
412
  end
373
413
  assert_equal(<<-TABLE, sliced_table.to_s)
374
414
  count visible
375
- 0 1 true
376
- 1 8 true
377
- 2 32 false
415
+ 0
416
+ 1 1 true
417
+ 2
418
+ 3 8 true
419
+ 4 32 false
420
+ 5
421
+ 6
378
422
  TABLE
379
423
  end
380
424
 
@@ -28,12 +28,12 @@ class SparseUnionDataTypeTest < Test::Unit::TestCase
28
28
  end
29
29
 
30
30
  test("ordered arguments") do
31
- assert_equal("union[sparse]<visible: bool=2, count: int32=9>",
31
+ assert_equal("sparse_union<visible: bool=2, count: int32=9>",
32
32
  Arrow::SparseUnionDataType.new(@fields, [2, 9]).to_s)
33
33
  end
34
34
 
35
35
  test("description") do
36
- assert_equal("union[sparse]<visible: bool=2, count: int32=9>",
36
+ assert_equal("sparse_union<visible: bool=2, count: int32=9>",
37
37
  Arrow::SparseUnionDataType.new(fields: @fields,
38
38
  type_codes: [2, 9]).to_s)
39
39
  end
@@ -38,10 +38,12 @@ class StructArrayBuilderTest < Test::Unit::TestCase
38
38
 
39
39
  test("Array") do
40
40
  @builder.append_value([true, 1])
41
+ @builder.append_value([])
42
+ @builder.append_value([false])
41
43
  array = @builder.finish
42
44
  assert_equal([
43
- [true],
44
- [1],
45
+ [true, nil, false],
46
+ [1, nil, nil],
45
47
  ],
46
48
  [
47
49
  array.find_field(0).to_a,
@@ -66,10 +68,12 @@ class StructArrayBuilderTest < Test::Unit::TestCase
66
68
 
67
69
  test("Hash") do
68
70
  @builder.append_value(count: 1, visible: true)
71
+ @builder.append_value(visible: false)
72
+ @builder.append_value(count: 2)
69
73
  array = @builder.finish
70
74
  assert_equal([
71
- [true],
72
- [1],
75
+ [true, false, nil],
76
+ [1, nil, 2],
73
77
  ],
74
78
  [
75
79
  array.find_field(0).to_a,