red-arrow 0.14.1 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/arrow.cpp +34 -0
  3. data/ext/arrow/converters.cpp +42 -0
  4. data/ext/arrow/converters.hpp +626 -0
  5. data/ext/arrow/raw-records.cpp +6 -625
  6. data/ext/arrow/red-arrow.hpp +37 -3
  7. data/ext/arrow/values.cpp +154 -0
  8. data/lib/arrow/array-builder.rb +24 -1
  9. data/lib/arrow/array.rb +9 -0
  10. data/lib/arrow/chunked-array.rb +5 -0
  11. data/lib/arrow/column-containable.rb +48 -0
  12. data/lib/arrow/column.rb +36 -10
  13. data/lib/arrow/csv-loader.rb +2 -2
  14. data/lib/arrow/data-type.rb +22 -5
  15. data/lib/arrow/date64-array-builder.rb +2 -2
  16. data/lib/arrow/date64-array.rb +1 -1
  17. data/lib/arrow/decimal128-array.rb +24 -0
  18. data/lib/arrow/field-containable.rb +3 -0
  19. data/lib/arrow/group.rb +10 -13
  20. data/lib/arrow/loader.rb +20 -1
  21. data/lib/arrow/record-batch.rb +6 -4
  22. data/lib/arrow/record-containable.rb +0 -35
  23. data/lib/arrow/record.rb +12 -9
  24. data/lib/arrow/slicer.rb +2 -2
  25. data/lib/arrow/struct-array-builder.rb +1 -7
  26. data/lib/arrow/struct-array.rb +13 -11
  27. data/lib/arrow/table-loader.rb +3 -9
  28. data/lib/arrow/table-table-formatter.rb +2 -2
  29. data/lib/arrow/table.rb +61 -24
  30. data/lib/arrow/time.rb +159 -0
  31. data/lib/arrow/time32-array-builder.rb +49 -0
  32. data/lib/arrow/time32-array.rb +28 -0
  33. data/lib/arrow/time64-array-builder.rb +49 -0
  34. data/lib/arrow/time64-array.rb +28 -0
  35. data/lib/arrow/timestamp-array-builder.rb +20 -1
  36. data/lib/arrow/timestamp-array.rb +10 -22
  37. data/lib/arrow/version.rb +1 -1
  38. data/red-arrow.gemspec +1 -1
  39. data/test/raw-records/test-basic-arrays.rb +16 -8
  40. data/test/raw-records/test-dense-union-array.rb +12 -5
  41. data/test/raw-records/test-list-array.rb +21 -9
  42. data/test/raw-records/test-sparse-union-array.rb +13 -5
  43. data/test/raw-records/test-struct-array.rb +11 -4
  44. data/test/test-column.rb +56 -31
  45. data/test/test-decimal128-array-builder.rb +11 -11
  46. data/test/test-decimal128-array.rb +4 -4
  47. data/test/test-slicer.rb +1 -3
  48. data/test/test-struct-array-builder.rb +4 -4
  49. data/test/test-struct-array.rb +4 -4
  50. data/test/test-table.rb +17 -8
  51. data/test/test-time.rb +288 -0
  52. data/test/test-time32-array.rb +81 -0
  53. data/test/test-time64-array.rb +81 -0
  54. data/test/values/test-basic-arrays.rb +284 -0
  55. data/test/values/test-dense-union-array.rb +487 -0
  56. data/test/values/test-list-array.rb +497 -0
  57. data/test/values/test-sparse-union-array.rb +477 -0
  58. data/test/values/test-struct-array.rb +452 -0
  59. metadata +78 -54
  60. data/lib/arrow/struct.rb +0 -79
  61. data/test/test-struct.rb +0 -81
@@ -52,7 +52,7 @@ module RawRecordsDenseUnionArrayTests
52
52
  end
53
53
  sub_record_batch = Arrow::RecordBatch.new(sub_schema,
54
54
  sub_records)
55
- sub_record_batch.columns[0]
55
+ sub_record_batch.columns[0].data
56
56
  end
57
57
  records.each do |record|
58
58
  column = record[0]
@@ -295,8 +295,10 @@ module RawRecordsDenseUnionArrayTests
295
295
  end
296
296
 
297
297
  def test_time32_second
298
+ unit = Arrow::TimeUnit::SECOND
298
299
  records = [
299
- [{"0" => 60 * 10}], # 00:10:00
300
+ # 00:10:00
301
+ [{"0" => Arrow::Time.new(unit, 60 * 10)}],
300
302
  [nil],
301
303
  [{"1" => nil}],
302
304
  ]
@@ -309,8 +311,10 @@ module RawRecordsDenseUnionArrayTests
309
311
  end
310
312
 
311
313
  def test_time32_milli
314
+ unit = Arrow::TimeUnit::MILLI
312
315
  records = [
313
- [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
316
+ # 00:10:00.123
317
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
314
318
  [nil],
315
319
  [{"1" => nil}],
316
320
  ]
@@ -323,8 +327,10 @@ module RawRecordsDenseUnionArrayTests
323
327
  end
324
328
 
325
329
  def test_time64_micro
330
+ unit = Arrow::TimeUnit::MICRO
326
331
  records = [
327
- [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
332
+ # 00:10:00.123456
333
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
328
334
  [nil],
329
335
  [{"1" => nil}],
330
336
  ]
@@ -337,9 +343,10 @@ module RawRecordsDenseUnionArrayTests
337
343
  end
338
344
 
339
345
  def test_time64_nano
346
+ unit = Arrow::TimeUnit::NANO
340
347
  records = [
341
348
  # 00:10:00.123456789
342
- [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}],
349
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
343
350
  [nil],
344
351
  [{"1" => nil}],
345
352
  ]
@@ -271,13 +271,16 @@ module RawRecordsListArrayTests
271
271
  assert_equal(records, target.raw_records)
272
272
  end
273
273
 
274
- def test_time32_test
274
+ def test_time32_second
275
+ unit = Arrow::TimeUnit::SECOND
275
276
  records = [
276
277
  [
277
278
  [
278
- 60 * 10, # 00:10:00
279
+ # 00:10:00
280
+ Arrow::Time.new(unit, 60 * 10),
279
281
  nil,
280
- 60 * 60 * 2 + 9, # 02:00:09
282
+ # 02:00:09
283
+ Arrow::Time.new(unit, 60 * 60 * 2 + 9),
281
284
  ],
282
285
  ],
283
286
  [nil],
@@ -291,12 +294,15 @@ module RawRecordsListArrayTests
291
294
  end
292
295
 
293
296
  def test_time32_milli
297
+ unit = Arrow::TimeUnit::MILLI
294
298
  records = [
295
299
  [
296
300
  [
297
- (60 * 10) * 1000 + 123, # 00:10:00.123
301
+ # 00:10:00.123
302
+ Arrow::Time.new(unit, (60 * 10) * 1000 + 123),
298
303
  nil,
299
- (60 * 60 * 2 + 9) * 1000 + 987, # 02:00:09.987
304
+ # 02:00:09.987
305
+ Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987),
300
306
  ],
301
307
  ],
302
308
  [nil],
@@ -310,12 +316,15 @@ module RawRecordsListArrayTests
310
316
  end
311
317
 
312
318
  def test_time64_micro
319
+ unit = Arrow::TimeUnit::MICRO
313
320
  records = [
314
321
  [
315
322
  [
316
- (60 * 10) * 1_000_000 + 123_456, # 00:10:00.123456
323
+ # 00:10:00.123456
324
+ Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456),
317
325
  nil,
318
- (60 * 60 * 2 + 9) * 1_000_000 + 987_654, # 02:00:09.987654
326
+ # 02:00:09.987654
327
+ Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654),
319
328
  ],
320
329
  ],
321
330
  [nil],
@@ -329,12 +338,15 @@ module RawRecordsListArrayTests
329
338
  end
330
339
 
331
340
  def test_time64_nano
341
+ unit = Arrow::TimeUnit::NANO
332
342
  records = [
333
343
  [
334
344
  [
335
- (60 * 10) * 1_000_000_000 + 123_456_789, # 00:10:00.123456789
345
+ # 00:10:00.123456789
346
+ Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789),
336
347
  nil,
337
- (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321, # 02:00:09.987654321
348
+ # 02:00:09.987654321
349
+ Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321),
338
350
  ],
339
351
  ],
340
352
  [nil],
@@ -47,7 +47,7 @@ module RawRecordsSparseUnionArrayTests
47
47
  end
48
48
  sub_record_batch = Arrow::RecordBatch.new(sub_schema,
49
49
  sub_records)
50
- sub_record_batch.columns[0]
50
+ sub_record_batch.columns[0].data
51
51
  end
52
52
  records.each do |record|
53
53
  column = record[0]
@@ -284,8 +284,10 @@ module RawRecordsSparseUnionArrayTests
284
284
  end
285
285
 
286
286
  def test_time32_second
287
+ unit = Arrow::TimeUnit::SECOND
287
288
  records = [
288
- [{"0" => 60 * 10}], # 00:10:00
289
+ # 00:10:00
290
+ [{"0" => Arrow::Time.new(unit, 60 * 10)}],
289
291
  [nil],
290
292
  [{"1" => nil}],
291
293
  ]
@@ -298,8 +300,10 @@ module RawRecordsSparseUnionArrayTests
298
300
  end
299
301
 
300
302
  def test_time32_milli
303
+ unit = Arrow::TimeUnit::MILLI
301
304
  records = [
302
- [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
305
+ # 00:10:00.123
306
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
303
307
  [nil],
304
308
  [{"1" => nil}],
305
309
  ]
@@ -312,8 +316,10 @@ module RawRecordsSparseUnionArrayTests
312
316
  end
313
317
 
314
318
  def test_time64_micro
319
+ unit = Arrow::TimeUnit::MICRO
315
320
  records = [
316
- [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
321
+ # 00:10:00.123456
322
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
317
323
  [nil],
318
324
  [{"1" => nil}],
319
325
  ]
@@ -326,8 +332,10 @@ module RawRecordsSparseUnionArrayTests
326
332
  end
327
333
 
328
334
  def test_time64_nano
335
+ unit = Arrow::TimeUnit::NANO
329
336
  records = [
330
- [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}], # 00:10:00.123456789
337
+ # 00:10:00.123456789
338
+ [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
331
339
  [nil],
332
340
  [{"1" => nil}],
333
341
  ]
@@ -251,8 +251,10 @@ module RawRecordsStructArrayTests
251
251
  end
252
252
 
253
253
  def test_time32_second
254
+ unit = Arrow::TimeUnit::SECOND
254
255
  records = [
255
- [{"field" => 60 * 10}], # 00:10:00
256
+ # 00:10:00
257
+ [{"field" => Arrow::Time.new(unit, 60 * 10)}],
256
258
  [nil],
257
259
  [{"field" => nil}],
258
260
  ]
@@ -265,8 +267,10 @@ module RawRecordsStructArrayTests
265
267
  end
266
268
 
267
269
  def test_time32_milli
270
+ unit = Arrow::TimeUnit::MILLI
268
271
  records = [
269
- [{"field" => (60 * 10) * 1000 + 123}], # 00:10:00.123
272
+ # 00:10:00.123
273
+ [{"field" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
270
274
  [nil],
271
275
  [{"field" => nil}],
272
276
  ]
@@ -279,8 +283,10 @@ module RawRecordsStructArrayTests
279
283
  end
280
284
 
281
285
  def test_time64_micro
286
+ unit = Arrow::TimeUnit::MICRO
282
287
  records = [
283
- [{"field" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
288
+ # 00:10:00.123456
289
+ [{"field" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
284
290
  [nil],
285
291
  [{"field" => nil}],
286
292
  ]
@@ -293,9 +299,10 @@ module RawRecordsStructArrayTests
293
299
  end
294
300
 
295
301
  def test_time64_nano
302
+ unit = Arrow::TimeUnit::NANO
296
303
  records = [
297
304
  # 00:10:00.123456789
298
- [{"field" => (60 * 10) * 1_000_000_000 + 123_456_789}],
305
+ [{"field" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
299
306
  [nil],
300
307
  [{"field" => nil}],
301
308
  ]
@@ -16,51 +16,76 @@
16
16
  # under the License.
17
17
 
18
18
  class ColumnTest < Test::Unit::TestCase
19
+ def setup
20
+ table = Arrow::Table.new("visible" => [true, nil, false])
21
+ @column = table.visible
22
+ end
23
+
24
+ test("#name") do
25
+ assert_equal("visible", @column.name)
26
+ end
27
+
28
+ test("#data_type") do
29
+ assert_equal(Arrow::BooleanDataType.new, @column.data_type)
30
+ end
31
+
32
+ test("#null?") do
33
+ assert do
34
+ @column.null?(1)
35
+ end
36
+ end
37
+
38
+ test("#valid?") do
39
+ assert do
40
+ @column.valid?(0)
41
+ end
42
+ end
43
+
19
44
  test("#each") do
20
- arrays = [
21
- Arrow::BooleanArray.new([true, false]),
22
- Arrow::BooleanArray.new([nil, true]),
23
- ]
24
- chunked_array = Arrow::ChunkedArray.new(arrays)
25
- column = Arrow::Column.new(Arrow::Field.new("visible", :boolean),
26
- chunked_array)
27
- assert_equal([true, false, nil, true],
28
- column.to_a)
45
+ assert_equal([true, nil, false], @column.each.to_a)
46
+ end
47
+
48
+ test("#reverse_each") do
49
+ assert_equal([false, nil, true], @column.reverse_each.to_a)
29
50
  end
30
51
 
31
- test("#pack") do
32
- arrays = [
33
- Arrow::BooleanArray.new([true, false]),
34
- Arrow::BooleanArray.new([nil, true]),
35
- ]
36
- chunked_array = Arrow::ChunkedArray.new(arrays)
37
- column = Arrow::Column.new(Arrow::Field.new("visible", :boolean),
38
- chunked_array)
39
- packed_column = column.pack
40
- assert_equal([1, [true, false, nil, true]],
41
- [packed_column.data.n_chunks, packed_column.to_a])
52
+ test("#n_rows") do
53
+ assert_equal(3, @column.n_rows)
54
+ end
55
+
56
+ test("#n_nulls") do
57
+ assert_equal(1, @column.n_nulls)
42
58
  end
43
59
 
44
60
  sub_test_case("#==") do
45
- def setup
46
- arrays = [
47
- Arrow::BooleanArray.new([true]),
48
- Arrow::BooleanArray.new([false, true]),
49
- ]
50
- chunked_array = Arrow::ChunkedArray.new(arrays)
51
- @column = Arrow::Column.new(Arrow::Field.new("visible", :boolean),
52
- chunked_array)
61
+ test("same value") do
62
+ table1 = Arrow::Table.new("visible" => [true, false])
63
+ table2 = Arrow::Table.new("visible" => [true, false])
64
+ assert do
65
+ table1.visible == table2.visible
66
+ end
67
+ end
68
+
69
+ test("different name") do
70
+ table1 = Arrow::Table.new("visible" => [true, false])
71
+ table2 = Arrow::Table.new("invisible" => [true, false])
72
+ assert do
73
+ not table1.visible == table2.invisible
74
+ end
53
75
  end
54
76
 
55
- test("Arrow::Column") do
77
+ test("different value") do
78
+ table1 = Arrow::Table.new("visible" => [true, false])
79
+ table2 = Arrow::Table.new("visible" => [true, true])
56
80
  assert do
57
- @column == @column
81
+ not table1.visible == table2.visible
58
82
  end
59
83
  end
60
84
 
61
85
  test("not Arrow::Column") do
86
+ table = Arrow::Table.new("visible" => [true, false])
62
87
  assert do
63
- not (@column == 29)
88
+ not table.visible == 29
64
89
  end
65
90
  end
66
91
  end
@@ -17,7 +17,7 @@
17
17
 
18
18
  class Decimal128ArrayBuilderTest < Test::Unit::TestCase
19
19
  def setup
20
- @data_type = Arrow::Decimal128DataType.new(8, 2)
20
+ @data_type = Arrow::Decimal128DataType.new(3, 1)
21
21
  @builder = Arrow::Decimal128ArrayBuilder.new(@data_type)
22
22
  end
23
23
 
@@ -31,28 +31,28 @@ class Decimal128ArrayBuilderTest < Test::Unit::TestCase
31
31
  test("Arrow::Decimal128") do
32
32
  @builder.append_value(Arrow::Decimal128.new("10.1"))
33
33
  array = @builder.finish
34
- assert_equal(Arrow::Decimal128.new("10.1"),
34
+ assert_equal(BigDecimal("10.1"),
35
35
  array[0])
36
36
  end
37
37
 
38
38
  test("String") do
39
39
  @builder.append_value("10.1")
40
40
  array = @builder.finish
41
- assert_equal(Arrow::Decimal128.new("10.1"),
41
+ assert_equal(BigDecimal("10.1"),
42
42
  array[0])
43
43
  end
44
44
 
45
45
  test("Float") do
46
46
  @builder.append_value(10.1)
47
47
  array = @builder.finish
48
- assert_equal(Arrow::Decimal128.new("10.1"),
48
+ assert_equal(BigDecimal("10.1"),
49
49
  array[0])
50
50
  end
51
51
 
52
52
  test("BigDecimal") do
53
53
  @builder.append_value(BigDecimal("10.1"))
54
54
  array = @builder.finish
55
- assert_equal(Arrow::Decimal128.new("10.1"),
55
+ assert_equal(BigDecimal("10.1"),
56
56
  array[0])
57
57
  end
58
58
  end
@@ -68,11 +68,11 @@ class Decimal128ArrayBuilderTest < Test::Unit::TestCase
68
68
  ])
69
69
  array = @builder.finish
70
70
  assert_equal([
71
- Arrow::Decimal128.new("10.1"),
71
+ BigDecimal("10.1"),
72
72
  nil,
73
- Arrow::Decimal128.new("10.1"),
74
- Arrow::Decimal128.new("10.1"),
75
- Arrow::Decimal128.new("10.1"),
73
+ BigDecimal("10.1"),
74
+ BigDecimal("10.1"),
75
+ BigDecimal("10.1"),
76
76
  ],
77
77
  array.to_a)
78
78
  end
@@ -85,9 +85,9 @@ class Decimal128ArrayBuilderTest < Test::Unit::TestCase
85
85
  ])
86
86
  array = @builder.finish
87
87
  assert_equal([
88
- Arrow::Decimal128.new("10.1"),
88
+ BigDecimal("10.1"),
89
89
  nil,
90
- Arrow::Decimal128.new("10.1"),
90
+ BigDecimal("10.1"),
91
91
  ],
92
92
  array.to_a)
93
93
  end
@@ -18,7 +18,7 @@
18
18
  class Decimal128ArrayTest < Test::Unit::TestCase
19
19
  sub_test_case(".new") do
20
20
  test("build") do
21
- data_type = Arrow::Decimal128DataType.new(8, 2)
21
+ data_type = Arrow::Decimal128DataType.new(3, 1)
22
22
  values = [
23
23
  10.1,
24
24
  nil,
@@ -27,10 +27,10 @@ class Decimal128ArrayTest < Test::Unit::TestCase
27
27
  ]
28
28
  array = Arrow::Decimal128Array.new(data_type, values)
29
29
  assert_equal([
30
- Arrow::Decimal128.new("10.1"),
30
+ BigDecimal("10.1"),
31
31
  nil,
32
- Arrow::Decimal128.new("10.1"),
33
- Arrow::Decimal128.new("10.1"),
32
+ BigDecimal("10.1"),
33
+ BigDecimal("10.1"),
34
34
  ],
35
35
  array.to_a)
36
36
  end
@@ -36,9 +36,7 @@ class SlicerTest < Test::Unit::TestCase
36
36
  ]
37
37
  @count_array = Arrow::ChunkedArray.new(count_arrays)
38
38
  @visible_array = Arrow::ChunkedArray.new(visible_arrays)
39
- @count_column = Arrow::Column.new(@count_field, @count_array)
40
- @visible_column = Arrow::Column.new(@visible_field, @visible_array)
41
- @table = Arrow::Table.new(schema, [@count_column, @visible_column])
39
+ @table = Arrow::Table.new(schema, [@count_array, @visible_array])
42
40
  end
43
41
 
44
42
  sub_test_case("column") do
@@ -168,12 +168,12 @@ class StructArrayBuilderTest < Test::Unit::TestCase
168
168
  @builder.get_field_builder(1).append(2)
169
169
  array = @builder.finish
170
170
  assert_equal([
171
- [true, 1],
172
- [false, 2],
171
+ {"visible" => true, "count" => 1},
172
+ {"visible" => false, "count" => 2},
173
173
  ],
174
174
  [
175
- array.get_value(0).values,
176
- array.get_value(1).values,
175
+ array.get_value(0),
176
+ array.get_value(1),
177
177
  ])
178
178
  end
179
179
  end