red-arrow 0.16.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +60 -30
  4. data/ext/arrow/extconf.rb +14 -3
  5. data/ext/arrow/raw-records.cpp +2 -1
  6. data/ext/arrow/values.cpp +2 -1
  7. data/lib/arrow/buffer.rb +28 -0
  8. data/lib/arrow/dictionary-array.rb +24 -0
  9. data/lib/arrow/generic-filterable.rb +7 -7
  10. data/lib/arrow/generic-takeable.rb +2 -2
  11. data/lib/arrow/loader.rb +3 -0
  12. data/lib/arrow/null-array-builder.rb +1 -1
  13. data/lib/arrow/raw-table-converter.rb +47 -0
  14. data/lib/arrow/record-batch-iterator.rb +22 -0
  15. data/lib/arrow/record-batch.rb +9 -1
  16. data/lib/arrow/schema.rb +5 -0
  17. data/lib/arrow/struct-array-builder.rb +13 -7
  18. data/lib/arrow/table-saver.rb +8 -4
  19. data/lib/arrow/table.rb +8 -25
  20. data/lib/arrow/version.rb +1 -1
  21. data/test/helper.rb +1 -0
  22. data/test/helper/omittable.rb +36 -0
  23. data/test/raw-records/test-dense-union-array.rb +1 -34
  24. data/test/raw-records/test-sparse-union-array.rb +1 -33
  25. data/test/run-test.rb +14 -3
  26. data/test/test-array.rb +5 -3
  27. data/test/test-buffer.rb +11 -0
  28. data/test/test-chunked-array.rb +5 -3
  29. data/test/test-dense-union-data-type.rb +2 -2
  30. data/test/test-dictionary-array.rb +41 -0
  31. data/test/test-feather.rb +21 -6
  32. data/test/test-record-batch-iterator.rb +37 -0
  33. data/test/test-record-batch.rb +14 -0
  34. data/test/test-schema.rb +16 -0
  35. data/test/test-sparse-union-data-type.rb +2 -2
  36. data/test/test-struct-array-builder.rb +8 -4
  37. data/test/test-table.rb +9 -3
  38. data/test/values/test-dense-union-array.rb +1 -34
  39. data/test/values/test-sparse-union-array.rb +1 -33
  40. metadata +68 -59
@@ -51,9 +51,7 @@ module RawRecordsSparseUnionArrayTests
51
51
  end
52
52
  records.each do |record|
53
53
  column = record[0]
54
- if column.nil?
55
- type_ids << nil
56
- elsif column.key?("0")
54
+ if column.key?("0")
57
55
  type_ids << type_codes[0]
58
56
  elsif column.key?("1")
59
57
  type_ids << type_codes[1]
@@ -71,7 +69,6 @@ module RawRecordsSparseUnionArrayTests
71
69
  def test_null
72
70
  records = [
73
71
  [{"0" => nil}],
74
- [nil],
75
72
  ]
76
73
  target = build(:null, records)
77
74
  assert_equal(records, target.raw_records)
@@ -80,7 +77,6 @@ module RawRecordsSparseUnionArrayTests
80
77
  def test_boolean
81
78
  records = [
82
79
  [{"0" => true}],
83
- [nil],
84
80
  [{"1" => nil}],
85
81
  ]
86
82
  target = build(:boolean, records)
@@ -90,7 +86,6 @@ module RawRecordsSparseUnionArrayTests
90
86
  def test_int8
91
87
  records = [
92
88
  [{"0" => -(2 ** 7)}],
93
- [nil],
94
89
  [{"1" => nil}],
95
90
  ]
96
91
  target = build(:int8, records)
@@ -100,7 +95,6 @@ module RawRecordsSparseUnionArrayTests
100
95
  def test_uint8
101
96
  records = [
102
97
  [{"0" => (2 ** 8) - 1}],
103
- [nil],
104
98
  [{"1" => nil}],
105
99
  ]
106
100
  target = build(:uint8, records)
@@ -110,7 +104,6 @@ module RawRecordsSparseUnionArrayTests
110
104
  def test_int16
111
105
  records = [
112
106
  [{"0" => -(2 ** 15)}],
113
- [nil],
114
107
  [{"1" => nil}],
115
108
  ]
116
109
  target = build(:int16, records)
@@ -120,7 +113,6 @@ module RawRecordsSparseUnionArrayTests
120
113
  def test_uint16
121
114
  records = [
122
115
  [{"0" => (2 ** 16) - 1}],
123
- [nil],
124
116
  [{"1" => nil}],
125
117
  ]
126
118
  target = build(:uint16, records)
@@ -130,7 +122,6 @@ module RawRecordsSparseUnionArrayTests
130
122
  def test_int32
131
123
  records = [
132
124
  [{"0" => -(2 ** 31)}],
133
- [nil],
134
125
  [{"1" => nil}],
135
126
  ]
136
127
  target = build(:int32, records)
@@ -140,7 +131,6 @@ module RawRecordsSparseUnionArrayTests
140
131
  def test_uint32
141
132
  records = [
142
133
  [{"0" => (2 ** 32) - 1}],
143
- [nil],
144
134
  [{"1" => nil}],
145
135
  ]
146
136
  target = build(:uint32, records)
@@ -150,7 +140,6 @@ module RawRecordsSparseUnionArrayTests
150
140
  def test_int64
151
141
  records = [
152
142
  [{"0" => -(2 ** 63)}],
153
- [nil],
154
143
  [{"1" => nil}],
155
144
  ]
156
145
  target = build(:int64, records)
@@ -160,7 +149,6 @@ module RawRecordsSparseUnionArrayTests
160
149
  def test_uint64
161
150
  records = [
162
151
  [{"0" => (2 ** 64) - 1}],
163
- [nil],
164
152
  [{"1" => nil}],
165
153
  ]
166
154
  target = build(:uint64, records)
@@ -170,7 +158,6 @@ module RawRecordsSparseUnionArrayTests
170
158
  def test_float
171
159
  records = [
172
160
  [{"0" => -1.0}],
173
- [nil],
174
161
  [{"1" => nil}],
175
162
  ]
176
163
  target = build(:float, records)
@@ -180,7 +167,6 @@ module RawRecordsSparseUnionArrayTests
180
167
  def test_double
181
168
  records = [
182
169
  [{"0" => -1.0}],
183
- [nil],
184
170
  [{"1" => nil}],
185
171
  ]
186
172
  target = build(:double, records)
@@ -190,7 +176,6 @@ module RawRecordsSparseUnionArrayTests
190
176
  def test_binary
191
177
  records = [
192
178
  [{"0" => "\xff".b}],
193
- [nil],
194
179
  [{"1" => nil}],
195
180
  ]
196
181
  target = build(:binary, records)
@@ -200,7 +185,6 @@ module RawRecordsSparseUnionArrayTests
200
185
  def test_string
201
186
  records = [
202
187
  [{"0" => "Ruby"}],
203
- [nil],
204
188
  [{"1" => nil}],
205
189
  ]
206
190
  target = build(:string, records)
@@ -210,7 +194,6 @@ module RawRecordsSparseUnionArrayTests
210
194
  def test_date32
211
195
  records = [
212
196
  [{"0" => Date.new(1960, 1, 1)}],
213
- [nil],
214
197
  [{"1" => nil}],
215
198
  ]
216
199
  target = build(:date32, records)
@@ -220,7 +203,6 @@ module RawRecordsSparseUnionArrayTests
220
203
  def test_date64
221
204
  records = [
222
205
  [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
223
- [nil],
224
206
  [{"1" => nil}],
225
207
  ]
226
208
  target = build(:date64, records)
@@ -230,7 +212,6 @@ module RawRecordsSparseUnionArrayTests
230
212
  def test_timestamp_second
231
213
  records = [
232
214
  [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
233
- [nil],
234
215
  [{"1" => nil}],
235
216
  ]
236
217
  target = build({
@@ -244,7 +225,6 @@ module RawRecordsSparseUnionArrayTests
244
225
  def test_timestamp_milli
245
226
  records = [
246
227
  [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
247
- [nil],
248
228
  [{"1" => nil}],
249
229
  ]
250
230
  target = build({
@@ -258,7 +238,6 @@ module RawRecordsSparseUnionArrayTests
258
238
  def test_timestamp_micro
259
239
  records = [
260
240
  [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
261
- [nil],
262
241
  [{"1" => nil}],
263
242
  ]
264
243
  target = build({
@@ -272,7 +251,6 @@ module RawRecordsSparseUnionArrayTests
272
251
  def test_timestamp_nano
273
252
  records = [
274
253
  [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
275
- [nil],
276
254
  [{"1" => nil}],
277
255
  ]
278
256
  target = build({
@@ -288,7 +266,6 @@ module RawRecordsSparseUnionArrayTests
288
266
  records = [
289
267
  # 00:10:00
290
268
  [{"0" => Arrow::Time.new(unit, 60 * 10)}],
291
- [nil],
292
269
  [{"1" => nil}],
293
270
  ]
294
271
  target = build({
@@ -304,7 +281,6 @@ module RawRecordsSparseUnionArrayTests
304
281
  records = [
305
282
  # 00:10:00.123
306
283
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
307
- [nil],
308
284
  [{"1" => nil}],
309
285
  ]
310
286
  target = build({
@@ -320,7 +296,6 @@ module RawRecordsSparseUnionArrayTests
320
296
  records = [
321
297
  # 00:10:00.123456
322
298
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
323
- [nil],
324
299
  [{"1" => nil}],
325
300
  ]
326
301
  target = build({
@@ -336,7 +311,6 @@ module RawRecordsSparseUnionArrayTests
336
311
  records = [
337
312
  # 00:10:00.123456789
338
313
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
339
- [nil],
340
314
  [{"1" => nil}],
341
315
  ]
342
316
  target = build({
@@ -350,7 +324,6 @@ module RawRecordsSparseUnionArrayTests
350
324
  def test_decimal128
351
325
  records = [
352
326
  [{"0" => BigDecimal("92.92")}],
353
- [nil],
354
327
  [{"1" => nil}],
355
328
  ]
356
329
  target = build({
@@ -365,7 +338,6 @@ module RawRecordsSparseUnionArrayTests
365
338
  def test_list
366
339
  records = [
367
340
  [{"0" => [true, nil, false]}],
368
- [nil],
369
341
  [{"1" => nil}],
370
342
  ]
371
343
  target = build({
@@ -382,7 +354,6 @@ module RawRecordsSparseUnionArrayTests
382
354
  def test_struct
383
355
  records = [
384
356
  [{"0" => {"sub_field" => true}}],
385
- [nil],
386
357
  [{"1" => nil}],
387
358
  [{"0" => {"sub_field" => nil}}],
388
359
  ]
@@ -403,7 +374,6 @@ module RawRecordsSparseUnionArrayTests
403
374
  omit("Need to add support for SparseUnionArrayBuilder")
404
375
  records = [
405
376
  [{"0" => {"field1" => true}}],
406
- [nil],
407
377
  [{"1" => nil}],
408
378
  [{"0" => {"field2" => nil}}],
409
379
  ]
@@ -429,7 +399,6 @@ module RawRecordsSparseUnionArrayTests
429
399
  omit("Need to add support for DenseUnionArrayBuilder")
430
400
  records = [
431
401
  [{"0" => {"field1" => true}}],
432
- [nil],
433
402
  [{"1" => nil}],
434
403
  [{"0" => {"field2" => nil}}],
435
404
  ]
@@ -455,7 +424,6 @@ module RawRecordsSparseUnionArrayTests
455
424
  omit("Need to add support for DictionaryArrayBuilder")
456
425
  records = [
457
426
  [{"0" => "Ruby"}],
458
- [nil],
459
427
  [{"1" => nil}],
460
428
  [{"0" => "GLib"}],
461
429
  ]
@@ -19,6 +19,7 @@
19
19
 
20
20
  $VERBOSE = true
21
21
 
22
+ require "fileutils"
22
23
  require "pathname"
23
24
 
24
25
  (ENV["ARROW_DLL_PATH"] || "").split(File::PATH_SEPARATOR).each do |path|
@@ -31,6 +32,14 @@ lib_dir = base_dir + "lib"
31
32
  ext_dir = base_dir + "ext" + "arrow"
32
33
  test_dir = base_dir + "test"
33
34
 
35
+ build_dir = ENV["BUILD_DIR"]
36
+ if build_dir
37
+ build_dir = File.join(build_dir, "red-arrow")
38
+ FileUtils.mkdir_p(build_dir)
39
+ else
40
+ build_dir = ext_dir
41
+ end
42
+
34
43
  make = nil
35
44
  if ENV["NO_MAKE"] != "yes"
36
45
  if ENV["MAKE"]
@@ -42,15 +51,17 @@ if ENV["NO_MAKE"] != "yes"
42
51
  end
43
52
  end
44
53
  if make
45
- Dir.chdir(ext_dir.to_s) do
54
+ Dir.chdir(build_dir.to_s) do
46
55
  unless File.exist?("Makefile")
47
- system(RbConfig.ruby, "extconf.rb", "--enable-debug-build") or exit(false)
56
+ system(RbConfig.ruby,
57
+ (ext_dir + "extconf.rb").to_s,
58
+ "--enable-debug-build") or exit(false)
48
59
  end
49
60
  system("#{make} > #{File::NULL}") or exit(false)
50
61
  end
51
62
  end
52
63
 
53
- $LOAD_PATH.unshift(ext_dir.to_s)
64
+ $LOAD_PATH.unshift(build_dir.to_s)
54
65
  $LOAD_PATH.unshift(lib_dir.to_s)
55
66
 
56
67
  require_relative "helper"
@@ -76,20 +76,22 @@ class ArrayTest < Test::Unit::TestCase
76
76
  def setup
77
77
  values = [true, false, false, true]
78
78
  @array = Arrow::BooleanArray.new(values)
79
+ @options = Arrow::FilterOptions.new
80
+ @options.null_selection_behavior = :emit_null
79
81
  end
80
82
 
81
83
  test("Array: boolean") do
82
84
  filter = [nil, true, true, false]
83
85
  filtered_array = Arrow::BooleanArray.new([nil, false, false])
84
86
  assert_equal(filtered_array,
85
- @array.filter(filter))
87
+ @array.filter(filter, @options))
86
88
  end
87
89
 
88
90
  test("Arrow::BooleanArray") do
89
91
  filter = Arrow::BooleanArray.new([nil, true, true, false])
90
92
  filtered_array = Arrow::BooleanArray.new([nil, false, false])
91
93
  assert_equal(filtered_array,
92
- @array.filter(filter))
94
+ @array.filter(filter, @options))
93
95
  end
94
96
 
95
97
  test("Arrow::ChunkedArray") do
@@ -100,7 +102,7 @@ class ArrayTest < Test::Unit::TestCase
100
102
  filter = Arrow::ChunkedArray.new(chunks)
101
103
  filtered_array = Arrow::BooleanArray.new([nil, false, false])
102
104
  assert_equal(filtered_array,
103
- @array.filter(filter))
105
+ @array.filter(filter, @options))
104
106
  end
105
107
  end
106
108
 
@@ -16,6 +16,17 @@
16
16
  # under the License.
17
17
 
18
18
  class BufferTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("GC") do
21
+ data = "Hello"
22
+ data_id = data.object_id
23
+ _buffer = Arrow::Buffer.new(data)
24
+ data = nil
25
+ GC.start
26
+ assert_equal("Hello", ObjectSpace._id2ref(data_id))
27
+ end
28
+ end
29
+
19
30
  sub_test_case("instance methods") do
20
31
  def setup
21
32
  @buffer = Arrow::Buffer.new("Hello")
@@ -92,6 +92,8 @@ class ChunkedArrayTest < Test::Unit::TestCase
92
92
  Arrow::BooleanArray.new([false, true, false]),
93
93
  ]
94
94
  @chunked_array = Arrow::ChunkedArray.new(arrays)
95
+ @options = Arrow::FilterOptions.new
96
+ @options.null_selection_behavior = :emit_null
95
97
  end
96
98
 
97
99
  test("Array: boolean") do
@@ -102,7 +104,7 @@ class ChunkedArrayTest < Test::Unit::TestCase
102
104
  ]
103
105
  filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
104
106
  assert_equal(filtered_chunked_array,
105
- @chunked_array.filter(filter))
107
+ @chunked_array.filter(filter, @options))
106
108
  end
107
109
 
108
110
  test("Arrow::BooleanArray") do
@@ -113,7 +115,7 @@ class ChunkedArrayTest < Test::Unit::TestCase
113
115
  ]
114
116
  filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
115
117
  assert_equal(filtered_chunked_array,
116
- @chunked_array.filter(filter))
118
+ @chunked_array.filter(filter, @options))
117
119
  end
118
120
 
119
121
  test("Arrow::ChunkedArray") do
@@ -128,7 +130,7 @@ class ChunkedArrayTest < Test::Unit::TestCase
128
130
  ]
129
131
  filtered_chunked_array = Arrow::ChunkedArray.new(filtered_chunks)
130
132
  assert_equal(filtered_chunked_array,
131
- @chunked_array.filter(filter))
133
+ @chunked_array.filter(filter, @options))
132
134
  end
133
135
  end
134
136
 
@@ -28,12 +28,12 @@ class DenseUnionDataTypeTest < Test::Unit::TestCase
28
28
  end
29
29
 
30
30
  test("ordered arguments") do
31
- assert_equal("union[dense]<visible: bool=2, count: int32=9>",
31
+ assert_equal("dense_union<visible: bool=2, count: int32=9>",
32
32
  Arrow::DenseUnionDataType.new(@fields, [2, 9]).to_s)
33
33
  end
34
34
 
35
35
  test("description") do
36
- assert_equal("union[dense]<visible: bool=2, count: int32=9>",
36
+ assert_equal("dense_union<visible: bool=2, count: int32=9>",
37
37
  Arrow::DenseUnionDataType.new(fields: @fields,
38
38
  type_codes: [2, 9]).to_s)
39
39
  end
@@ -0,0 +1,41 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class DictionaryArrayTest < Test::Unit::TestCase
19
+ sub_test_case("instance methods") do
20
+ def setup
21
+ @values = ["a", "b", "c", "b", "a"]
22
+ @string_array = Arrow::StringArray.new(@values)
23
+ @array = @string_array.dictionary_encode
24
+ end
25
+
26
+ test("#[]") do
27
+ assert_equal(@values, @array.to_a)
28
+ end
29
+
30
+ test("#get_value") do
31
+ assert_equal([
32
+ @values[0],
33
+ @values[3],
34
+ ],
35
+ [
36
+ @array.get_value(0),
37
+ @array.get_value(3),
38
+ ])
39
+ end
40
+ end
41
+ end
@@ -18,17 +18,32 @@
18
18
  class FeatherTest < Test::Unit::TestCase
19
19
  include Helper::Fixture
20
20
 
21
- def test_save_load
21
+ def setup
22
22
  columns = {
23
23
  "message" => Arrow::StringArray.new(["Start", "Crash", "Shutdown"]),
24
24
  "is_critical" => Arrow::BooleanArray.new([false, true, false]),
25
25
  }
26
- table = Arrow::Table.new(columns)
26
+ @table = Arrow::Table.new(columns)
27
27
 
28
- output = Tempfile.new(["red-arrow", ".feather"])
29
- table.save(output.path)
30
- output.close
28
+ @output = Tempfile.new(["red-arrow", ".feather"])
29
+ begin
30
+ yield(@output)
31
+ ensure
32
+ @output.close!
33
+ end
34
+ end
35
+
36
+ def test_default
37
+ @table.save(@output.path)
38
+ @output.close
39
+
40
+ assert_equal(@table, Arrow::Table.load(@output.path))
41
+ end
42
+
43
+ def test_compression
44
+ @table.save(@output.path, compression: :zstd)
45
+ @output.close
31
46
 
32
- assert_equal(table, Arrow::Table.load(output.path))
47
+ assert_equal(@table, Arrow::Table.load(@output.path))
33
48
  end
34
49
  end