red-arrow 0.15.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +63 -33
  4. data/ext/arrow/raw-records.cpp +2 -1
  5. data/ext/arrow/values.cpp +2 -1
  6. data/lib/arrow/array-builder.rb +101 -52
  7. data/lib/arrow/array.rb +28 -10
  8. data/lib/arrow/{binary-array-builder.rb → buffer.rb} +7 -15
  9. data/lib/arrow/chunked-array.rb +2 -0
  10. data/lib/arrow/csv-loader.rb +15 -3
  11. data/lib/arrow/csv-read-options.rb +18 -0
  12. data/lib/arrow/data-type.rb +35 -2
  13. data/lib/arrow/decimal128-array-builder.rb +0 -2
  14. data/lib/arrow/dictionary-array.rb +24 -0
  15. data/lib/arrow/field.rb +1 -1
  16. data/lib/arrow/generic-filterable.rb +43 -0
  17. data/lib/arrow/generic-takeable.rb +38 -0
  18. data/lib/arrow/list-data-type.rb +58 -8
  19. data/lib/arrow/loader.rb +12 -1
  20. data/lib/arrow/null-array-builder.rb +1 -1
  21. data/lib/arrow/null-array.rb +24 -0
  22. data/lib/arrow/raw-table-converter.rb +47 -0
  23. data/lib/arrow/record-batch-iterator.rb +22 -0
  24. data/lib/arrow/record-batch.rb +8 -3
  25. data/lib/arrow/schema.rb +5 -2
  26. data/lib/arrow/struct-array-builder.rb +13 -7
  27. data/lib/arrow/struct-data-type.rb +0 -2
  28. data/lib/arrow/table-loader.rb +29 -6
  29. data/lib/arrow/table-saver.rb +37 -13
  30. data/lib/arrow/table.rb +20 -73
  31. data/lib/arrow/version.rb +1 -1
  32. data/red-arrow.gemspec +4 -2
  33. data/test/helper.rb +1 -0
  34. data/test/helper/omittable.rb +36 -0
  35. data/test/raw-records/test-dense-union-array.rb +1 -34
  36. data/test/raw-records/test-sparse-union-array.rb +1 -33
  37. data/test/run-test.rb +14 -3
  38. data/test/test-array-builder.rb +17 -0
  39. data/test/test-array.rb +104 -0
  40. data/test/test-buffer.rb +11 -0
  41. data/test/test-chunked-array.rb +96 -0
  42. data/test/test-csv-loader.rb +77 -2
  43. data/test/test-data-type.rb +11 -0
  44. data/test/test-dense-union-data-type.rb +2 -2
  45. data/test/test-dictionary-array.rb +41 -0
  46. data/test/test-feather.rb +21 -6
  47. data/test/test-list-data-type.rb +27 -1
  48. data/test/test-null-array.rb +23 -0
  49. data/test/test-record-batch-iterator.rb +37 -0
  50. data/test/test-record-batch.rb +14 -0
  51. data/test/test-schema.rb +16 -0
  52. data/test/test-slicer.rb +74 -30
  53. data/test/test-sparse-union-data-type.rb +2 -2
  54. data/test/test-struct-array-builder.rb +8 -4
  55. data/test/test-table.rb +153 -14
  56. data/test/test-timestamp-array.rb +19 -0
  57. data/test/values/test-dense-union-array.rb +1 -34
  58. data/test/values/test-sparse-union-array.rb +1 -33
  59. metadata +76 -63
@@ -18,17 +18,32 @@
18
18
  class FeatherTest < Test::Unit::TestCase
19
19
  include Helper::Fixture
20
20
 
21
- def test_save_load
21
+ def setup
22
22
  columns = {
23
23
  "message" => Arrow::StringArray.new(["Start", "Crash", "Shutdown"]),
24
24
  "is_critical" => Arrow::BooleanArray.new([false, true, false]),
25
25
  }
26
- table = Arrow::Table.new(columns)
26
+ @table = Arrow::Table.new(columns)
27
27
 
28
- output = Tempfile.new(["red-arrow", ".feather"])
29
- table.save(output.path)
30
- output.close
28
+ @output = Tempfile.new(["red-arrow", ".feather"])
29
+ begin
30
+ yield(@output)
31
+ ensure
32
+ @output.close!
33
+ end
34
+ end
35
+
36
+ def test_default
37
+ @table.save(@output.path)
38
+ @output.close
39
+
40
+ assert_equal(@table, Arrow::Table.load(@output.path))
41
+ end
42
+
43
+ def test_compression
44
+ @table.save(@output.path, compression: :zstd)
45
+ @output.close
31
46
 
32
- assert_equal(table, Arrow::Table.load(output.path))
47
+ assert_equal(@table, Arrow::Table.load(@output.path))
33
48
  end
34
49
  end
@@ -23,7 +23,7 @@ class ListDataTypeTest < Test::Unit::TestCase
23
23
  Arrow::ListDataType.new(field).to_s)
24
24
  end
25
25
 
26
- test("Hash") do
26
+ test("name: String") do
27
27
  assert_equal("list<tag: string>",
28
28
  Arrow::ListDataType.new(name: "tag", type: :string).to_s)
29
29
  end
@@ -39,5 +39,31 @@ class ListDataTypeTest < Test::Unit::TestCase
39
39
  assert_equal("list<tag: string>",
40
40
  Arrow::ListDataType.new(field: field_description).to_s)
41
41
  end
42
+
43
+ test("Arrow::DataType") do
44
+ data_type = Arrow::BooleanDataType.new
45
+ assert_equal("list<item: bool>",
46
+ Arrow::ListDataType.new(data_type).to_s)
47
+ end
48
+
49
+ test("String") do
50
+ assert_equal("list<item: bool>",
51
+ Arrow::ListDataType.new("boolean").to_s)
52
+ end
53
+
54
+ test("Symbol") do
55
+ assert_equal("list<item: bool>",
56
+ Arrow::ListDataType.new(:boolean).to_s)
57
+ end
58
+
59
+ test("[data type name, additional information]") do
60
+ assert_equal("list<item: time32[ms]>",
61
+ Arrow::ListDataType.new([:time32, :milli]).to_s)
62
+ end
63
+
64
+ test("type: Symbol") do
65
+ assert_equal("list<item: bool>",
66
+ Arrow::ListDataType.new(type: :boolean).to_s)
67
+ end
42
68
  end
43
69
  end
@@ -0,0 +1,23 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class NullArrayTest < Test::Unit::TestCase
19
+ test("#[]") do
20
+ array = Arrow::NullArray.new(1)
21
+ assert_nil(array[0])
22
+ end
23
+ end
@@ -0,0 +1,37 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RecordBatchIteratorTest < Test::Unit::TestCase
19
+ def setup
20
+ @schema = Arrow::Schema.new(visible: :boolean,
21
+ count: :uint32)
22
+ @record_batches = [
23
+ Arrow::RecordBatch.new(@schema,
24
+ visible: [true],
25
+ count: [1]),
26
+ Arrow::RecordBatch.new(@schema,
27
+ visible: [false, nil],
28
+ count: [nil, 3]),
29
+ ]
30
+ @iterator = Arrow::RecordBatchIterator.new(@record_batches)
31
+ end
32
+
33
+ def test_to_a
34
+ assert_equal(@record_batches,
35
+ @iterator.to_a)
36
+ end
37
+ end
@@ -22,6 +22,20 @@ class RecordBatchTest < Test::Unit::TestCase
22
22
  count: :uint32)
23
23
  end
24
24
 
25
+ test("[raw_table]") do
26
+ raw_table = {
27
+ visible: [true, nil, false],
28
+ count: [1, nil, 3],
29
+ }
30
+ record_batch = Arrow::RecordBatch.new(raw_table)
31
+ assert_equal([
32
+ {"visible" => true, "count" => 1},
33
+ {"visible" => nil, "count" => nil},
34
+ {"visible" => false, "count" => 3},
35
+ ],
36
+ record_batch.each_record.collect(&:to_h))
37
+ end
38
+
25
39
  test("[Schema, records]") do
26
40
  records = [
27
41
  {visible: true, count: 1},
@@ -16,6 +16,8 @@
16
16
  # under the License.
17
17
 
18
18
  class SchemaTest < Test::Unit::TestCase
19
+ include Helper::Omittable
20
+
19
21
  def setup
20
22
  @count_field = Arrow::Field.new("count", :uint32)
21
23
  @visible_field = Arrow::Field.new("visible", :boolean)
@@ -114,5 +116,19 @@ class SchemaTest < Test::Unit::TestCase
114
116
  end
115
117
  end
116
118
  end
119
+
120
+ sub_test_case("#to_s") do
121
+ test("show_metadata") do
122
+ require_gi_bindings(3, 4, 2)
123
+
124
+ schema = @schema.with_metadata("key" => "value")
125
+ assert_equal(<<-SCHEMA.chomp, schema.to_s(show_metadata: true))
126
+ count: uint32
127
+ visible: bool
128
+ -- metadata --
129
+ key: value
130
+ SCHEMA
131
+ end
132
+ end
117
133
  end
118
134
  end
@@ -46,10 +46,14 @@ class SlicerTest < Test::Unit::TestCase
46
46
  end
47
47
  assert_equal(<<-TABLE, sliced_table.to_s)
48
48
  count visible
49
- 0 1 true
50
- 1 8 true
51
- 2 16 true
52
- 3 256 true
49
+ 0
50
+ 1 1 true
51
+ 2
52
+ 3 8 true
53
+ 4 16 true
54
+ 5
55
+ 6
56
+ 7 256 true
53
57
  TABLE
54
58
  end
55
59
 
@@ -66,7 +70,8 @@ class SlicerTest < Test::Unit::TestCase
66
70
  4 16 true
67
71
  5 32 false
68
72
  6 64
69
- 7 256 true
73
+ 7
74
+ 8 256 true
70
75
  TABLE
71
76
  end
72
77
  end
@@ -78,8 +83,12 @@ class SlicerTest < Test::Unit::TestCase
78
83
  end
79
84
  assert_equal(<<-TABLE, sliced_table.to_s)
80
85
  count visible
81
- 0 2 false
82
- 1 32 false
86
+ 0
87
+ 1 2 false
88
+ 2
89
+ 3 32 false
90
+ 4
91
+ 5
83
92
  TABLE
84
93
  end
85
94
 
@@ -90,6 +99,7 @@ class SlicerTest < Test::Unit::TestCase
90
99
  assert_equal(<<-TABLE, sliced_table.to_s)
91
100
  count visible
92
101
  0 0
102
+ 1
93
103
  TABLE
94
104
  end
95
105
  end
@@ -142,10 +152,14 @@ class SlicerTest < Test::Unit::TestCase
142
152
  end
143
153
  assert_equal(<<-TABLE, sliced_table.to_s)
144
154
  count visible
145
- 0 1 true
146
- 1 8 true
147
- 2 16 true
148
- 3 256 true
155
+ 0
156
+ 1 1 true
157
+ 2
158
+ 3 8 true
159
+ 4 16 true
160
+ 5
161
+ 6
162
+ 7 256 true
149
163
  TABLE
150
164
  end
151
165
  end
@@ -172,8 +186,12 @@ class SlicerTest < Test::Unit::TestCase
172
186
  end
173
187
  assert_equal(<<-TABLE, sliced_table.to_s)
174
188
  count visible
175
- 0 2 false
176
- 1 32 false
189
+ 0
190
+ 1 2 false
191
+ 2
192
+ 3 32 false
193
+ 4
194
+ 5
177
195
  TABLE
178
196
  end
179
197
  end
@@ -200,8 +218,12 @@ class SlicerTest < Test::Unit::TestCase
200
218
  end
201
219
  assert_equal(<<-TABLE, sliced_table.to_s)
202
220
  count visible
203
- 0 2 false
204
- 1 32 false
221
+ 0
222
+ 1 2 false
223
+ 2
224
+ 3 32 false
225
+ 4
226
+ 5
205
227
  TABLE
206
228
  end
207
229
  end
@@ -217,6 +239,7 @@ class SlicerTest < Test::Unit::TestCase
217
239
  2 2 false
218
240
  3 4
219
241
  4 8 true
242
+ 5
220
243
  TABLE
221
244
  end
222
245
 
@@ -229,7 +252,8 @@ class SlicerTest < Test::Unit::TestCase
229
252
  0 16 true
230
253
  1 32 false
231
254
  2 64
232
- 3 256 true
255
+ 3
256
+ 4 256 true
233
257
  TABLE
234
258
  end
235
259
 
@@ -245,6 +269,7 @@ class SlicerTest < Test::Unit::TestCase
245
269
  3 4
246
270
  4 8 true
247
271
  5 16 true
272
+ 6
248
273
  TABLE
249
274
  end
250
275
 
@@ -256,7 +281,8 @@ class SlicerTest < Test::Unit::TestCase
256
281
  count visible
257
282
  0 32 false
258
283
  1 64
259
- 2 256 true
284
+ 2
285
+ 3 256 true
260
286
  TABLE
261
287
  end
262
288
 
@@ -268,7 +294,8 @@ class SlicerTest < Test::Unit::TestCase
268
294
  count visible
269
295
  0 32 false
270
296
  1 64
271
- 2 256 true
297
+ 2
298
+ 3 256 true
272
299
  TABLE
273
300
  end
274
301
 
@@ -284,6 +311,7 @@ class SlicerTest < Test::Unit::TestCase
284
311
  3 4
285
312
  4 8 true
286
313
  5 16 true
314
+ 6
287
315
  TABLE
288
316
  end
289
317
 
@@ -296,7 +324,8 @@ class SlicerTest < Test::Unit::TestCase
296
324
  0 16 true
297
325
  1 32 false
298
326
  2 64
299
- 3 256 true
327
+ 3
328
+ 4 256 true
300
329
  TABLE
301
330
  end
302
331
 
@@ -311,6 +340,7 @@ class SlicerTest < Test::Unit::TestCase
311
340
  2 2 false
312
341
  3 4
313
342
  4 8 true
343
+ 5
314
344
  TABLE
315
345
  end
316
346
 
@@ -324,6 +354,7 @@ class SlicerTest < Test::Unit::TestCase
324
354
  1 4
325
355
  2 16 true
326
356
  3 64
357
+ 4
327
358
  TABLE
328
359
  end
329
360
 
@@ -337,7 +368,8 @@ class SlicerTest < Test::Unit::TestCase
337
368
  1 2 false
338
369
  2 8 true
339
370
  3 32 false
340
- 4 256 true
371
+ 4
372
+ 5 256 true
341
373
  TABLE
342
374
  end
343
375
 
@@ -347,8 +379,12 @@ class SlicerTest < Test::Unit::TestCase
347
379
  end
348
380
  assert_equal(<<-TABLE, sliced_table.to_s)
349
381
  count visible
350
- 0 16 true
351
- 1 256 true
382
+ 0
383
+ 1
384
+ 2 16 true
385
+ 3
386
+ 4
387
+ 5 256 true
352
388
  TABLE
353
389
  end
354
390
 
@@ -358,11 +394,15 @@ class SlicerTest < Test::Unit::TestCase
358
394
  end
359
395
  assert_equal(<<-TABLE, sliced_table.to_s)
360
396
  count visible
361
- 0 1 true
362
- 1 8 true
363
- 2 16 true
364
- 3 32 false
365
- 4 256 true
397
+ 0
398
+ 1 1 true
399
+ 2
400
+ 3 8 true
401
+ 4 16 true
402
+ 5 32 false
403
+ 6
404
+ 7
405
+ 8 256 true
366
406
  TABLE
367
407
  end
368
408
 
@@ -372,9 +412,13 @@ class SlicerTest < Test::Unit::TestCase
372
412
  end
373
413
  assert_equal(<<-TABLE, sliced_table.to_s)
374
414
  count visible
375
- 0 1 true
376
- 1 8 true
377
- 2 32 false
415
+ 0
416
+ 1 1 true
417
+ 2
418
+ 3 8 true
419
+ 4 32 false
420
+ 5
421
+ 6
378
422
  TABLE
379
423
  end
380
424
 
@@ -28,12 +28,12 @@ class SparseUnionDataTypeTest < Test::Unit::TestCase
28
28
  end
29
29
 
30
30
  test("ordered arguments") do
31
- assert_equal("union[sparse]<visible: bool=2, count: int32=9>",
31
+ assert_equal("sparse_union<visible: bool=2, count: int32=9>",
32
32
  Arrow::SparseUnionDataType.new(@fields, [2, 9]).to_s)
33
33
  end
34
34
 
35
35
  test("description") do
36
- assert_equal("union[sparse]<visible: bool=2, count: int32=9>",
36
+ assert_equal("sparse_union<visible: bool=2, count: int32=9>",
37
37
  Arrow::SparseUnionDataType.new(fields: @fields,
38
38
  type_codes: [2, 9]).to_s)
39
39
  end
@@ -38,10 +38,12 @@ class StructArrayBuilderTest < Test::Unit::TestCase
38
38
 
39
39
  test("Array") do
40
40
  @builder.append_value([true, 1])
41
+ @builder.append_value([])
42
+ @builder.append_value([false])
41
43
  array = @builder.finish
42
44
  assert_equal([
43
- [true],
44
- [1],
45
+ [true, nil, false],
46
+ [1, nil, nil],
45
47
  ],
46
48
  [
47
49
  array.find_field(0).to_a,
@@ -66,10 +68,12 @@ class StructArrayBuilderTest < Test::Unit::TestCase
66
68
 
67
69
  test("Hash") do
68
70
  @builder.append_value(count: 1, visible: true)
71
+ @builder.append_value(visible: false)
72
+ @builder.append_value(count: 2)
69
73
  array = @builder.finish
70
74
  assert_equal([
71
- [true],
72
- [1],
75
+ [true, false, nil],
76
+ [1, nil, 2],
73
77
  ],
74
78
  [
75
79
  array.find_field(0).to_a,