red-arrow 0.16.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +60 -30
  4. data/ext/arrow/extconf.rb +14 -3
  5. data/ext/arrow/raw-records.cpp +2 -1
  6. data/ext/arrow/values.cpp +2 -1
  7. data/lib/arrow/buffer.rb +28 -0
  8. data/lib/arrow/dictionary-array.rb +24 -0
  9. data/lib/arrow/generic-filterable.rb +7 -7
  10. data/lib/arrow/generic-takeable.rb +2 -2
  11. data/lib/arrow/loader.rb +3 -0
  12. data/lib/arrow/null-array-builder.rb +1 -1
  13. data/lib/arrow/raw-table-converter.rb +47 -0
  14. data/lib/arrow/record-batch-iterator.rb +22 -0
  15. data/lib/arrow/record-batch.rb +9 -1
  16. data/lib/arrow/schema.rb +5 -0
  17. data/lib/arrow/struct-array-builder.rb +13 -7
  18. data/lib/arrow/table-saver.rb +8 -4
  19. data/lib/arrow/table.rb +8 -25
  20. data/lib/arrow/version.rb +1 -1
  21. data/test/helper.rb +1 -0
  22. data/test/helper/omittable.rb +36 -0
  23. data/test/raw-records/test-dense-union-array.rb +1 -34
  24. data/test/raw-records/test-sparse-union-array.rb +1 -33
  25. data/test/run-test.rb +14 -3
  26. data/test/test-array.rb +5 -3
  27. data/test/test-buffer.rb +11 -0
  28. data/test/test-chunked-array.rb +5 -3
  29. data/test/test-dense-union-data-type.rb +2 -2
  30. data/test/test-dictionary-array.rb +41 -0
  31. data/test/test-feather.rb +21 -6
  32. data/test/test-record-batch-iterator.rb +37 -0
  33. data/test/test-record-batch.rb +14 -0
  34. data/test/test-schema.rb +16 -0
  35. data/test/test-sparse-union-data-type.rb +2 -2
  36. data/test/test-struct-array-builder.rb +8 -4
  37. data/test/test-table.rb +9 -3
  38. data/test/values/test-dense-union-array.rb +1 -34
  39. data/test/values/test-sparse-union-array.rb +1 -33
  40. metadata +68 -59
@@ -51,9 +51,7 @@ module RawRecordsSparseUnionArrayTests
51
51
  end
52
52
  records.each do |record|
53
53
  column = record[0]
54
- if column.nil?
55
- type_ids << nil
56
- elsif column.key?("0")
54
+ if column.key?("0")
57
55
  type_ids << type_codes[0]
58
56
  elsif column.key?("1")
59
57
  type_ids << type_codes[1]
@@ -71,7 +69,6 @@ module RawRecordsSparseUnionArrayTests
71
69
  def test_null
72
70
  records = [
73
71
  [{"0" => nil}],
74
- [nil],
75
72
  ]
76
73
  target = build(:null, records)
77
74
  assert_equal(records, target.raw_records)
@@ -80,7 +77,6 @@ module RawRecordsSparseUnionArrayTests
80
77
  def test_boolean
81
78
  records = [
82
79
  [{"0" => true}],
83
- [nil],
84
80
  [{"1" => nil}],
85
81
  ]
86
82
  target = build(:boolean, records)
@@ -90,7 +86,6 @@ module RawRecordsSparseUnionArrayTests
90
86
  def test_int8
91
87
  records = [
92
88
  [{"0" => -(2 ** 7)}],
93
- [nil],
94
89
  [{"1" => nil}],
95
90
  ]
96
91
  target = build(:int8, records)
@@ -100,7 +95,6 @@ module RawRecordsSparseUnionArrayTests
100
95
  def test_uint8
101
96
  records = [
102
97
  [{"0" => (2 ** 8) - 1}],
103
- [nil],
104
98
  [{"1" => nil}],
105
99
  ]
106
100
  target = build(:uint8, records)
@@ -110,7 +104,6 @@ module RawRecordsSparseUnionArrayTests
110
104
  def test_int16
111
105
  records = [
112
106
  [{"0" => -(2 ** 15)}],
113
- [nil],
114
107
  [{"1" => nil}],
115
108
  ]
116
109
  target = build(:int16, records)
@@ -120,7 +113,6 @@ module RawRecordsSparseUnionArrayTests
120
113
  def test_uint16
121
114
  records = [
122
115
  [{"0" => (2 ** 16) - 1}],
123
- [nil],
124
116
  [{"1" => nil}],
125
117
  ]
126
118
  target = build(:uint16, records)
@@ -130,7 +122,6 @@ module RawRecordsSparseUnionArrayTests
130
122
  def test_int32
131
123
  records = [
132
124
  [{"0" => -(2 ** 31)}],
133
- [nil],
134
125
  [{"1" => nil}],
135
126
  ]
136
127
  target = build(:int32, records)
@@ -140,7 +131,6 @@ module RawRecordsSparseUnionArrayTests
140
131
  def test_uint32
141
132
  records = [
142
133
  [{"0" => (2 ** 32) - 1}],
143
- [nil],
144
134
  [{"1" => nil}],
145
135
  ]
146
136
  target = build(:uint32, records)
@@ -150,7 +140,6 @@ module RawRecordsSparseUnionArrayTests
150
140
  def test_int64
151
141
  records = [
152
142
  [{"0" => -(2 ** 63)}],
153
- [nil],
154
143
  [{"1" => nil}],
155
144
  ]
156
145
  target = build(:int64, records)
@@ -160,7 +149,6 @@ module RawRecordsSparseUnionArrayTests
160
149
  def test_uint64
161
150
  records = [
162
151
  [{"0" => (2 ** 64) - 1}],
163
- [nil],
164
152
  [{"1" => nil}],
165
153
  ]
166
154
  target = build(:uint64, records)
@@ -170,7 +158,6 @@ module RawRecordsSparseUnionArrayTests
170
158
  def test_float
171
159
  records = [
172
160
  [{"0" => -1.0}],
173
- [nil],
174
161
  [{"1" => nil}],
175
162
  ]
176
163
  target = build(:float, records)
@@ -180,7 +167,6 @@ module RawRecordsSparseUnionArrayTests
180
167
  def test_double
181
168
  records = [
182
169
  [{"0" => -1.0}],
183
- [nil],
184
170
  [{"1" => nil}],
185
171
  ]
186
172
  target = build(:double, records)
@@ -190,7 +176,6 @@ module RawRecordsSparseUnionArrayTests
190
176
  def test_binary
191
177
  records = [
192
178
  [{"0" => "\xff".b}],
193
- [nil],
194
179
  [{"1" => nil}],
195
180
  ]
196
181
  target = build(:binary, records)
@@ -200,7 +185,6 @@ module RawRecordsSparseUnionArrayTests
200
185
  def test_string
201
186
  records = [
202
187
  [{"0" => "Ruby"}],
203
- [nil],
204
188
  [{"1" => nil}],
205
189
  ]
206
190
  target = build(:string, records)
@@ -210,7 +194,6 @@ module RawRecordsSparseUnionArrayTests
210
194
  def test_date32
211
195
  records = [
212
196
  [{"0" => Date.new(1960, 1, 1)}],
213
- [nil],
214
197
  [{"1" => nil}],
215
198
  ]
216
199
  target = build(:date32, records)
@@ -220,7 +203,6 @@ module RawRecordsSparseUnionArrayTests
220
203
  def test_date64
221
204
  records = [
222
205
  [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
223
- [nil],
224
206
  [{"1" => nil}],
225
207
  ]
226
208
  target = build(:date64, records)
@@ -230,7 +212,6 @@ module RawRecordsSparseUnionArrayTests
230
212
  def test_timestamp_second
231
213
  records = [
232
214
  [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
233
- [nil],
234
215
  [{"1" => nil}],
235
216
  ]
236
217
  target = build({
@@ -244,7 +225,6 @@ module RawRecordsSparseUnionArrayTests
244
225
  def test_timestamp_milli
245
226
  records = [
246
227
  [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
247
- [nil],
248
228
  [{"1" => nil}],
249
229
  ]
250
230
  target = build({
@@ -258,7 +238,6 @@ module RawRecordsSparseUnionArrayTests
258
238
  def test_timestamp_micro
259
239
  records = [
260
240
  [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
261
- [nil],
262
241
  [{"1" => nil}],
263
242
  ]
264
243
  target = build({
@@ -272,7 +251,6 @@ module RawRecordsSparseUnionArrayTests
272
251
  def test_timestamp_nano
273
252
  records = [
274
253
  [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
275
- [nil],
276
254
  [{"1" => nil}],
277
255
  ]
278
256
  target = build({
@@ -288,7 +266,6 @@ module RawRecordsSparseUnionArrayTests
288
266
  records = [
289
267
  # 00:10:00
290
268
  [{"0" => Arrow::Time.new(unit, 60 * 10)}],
291
- [nil],
292
269
  [{"1" => nil}],
293
270
  ]
294
271
  target = build({
@@ -304,7 +281,6 @@ module RawRecordsSparseUnionArrayTests
304
281
  records = [
305
282
  # 00:10:00.123
306
283
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
307
- [nil],
308
284
  [{"1" => nil}],
309
285
  ]
310
286
  target = build({
@@ -320,7 +296,6 @@ module RawRecordsSparseUnionArrayTests
320
296
  records = [
321
297
  # 00:10:00.123456
322
298
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
323
- [nil],
324
299
  [{"1" => nil}],
325
300
  ]
326
301
  target = build({
@@ -336,7 +311,6 @@ module RawRecordsSparseUnionArrayTests
336
311
  records = [
337
312
  # 00:10:00.123456789
338
313
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
339
- [nil],
340
314
  [{"1" => nil}],
341
315
  ]
342
316
  target = build({
@@ -350,7 +324,6 @@ module RawRecordsSparseUnionArrayTests
350
324
  def test_decimal128
351
325
  records = [
352
326
  [{"0" => BigDecimal("92.92")}],
353
- [nil],
354
327
  [{"1" => nil}],
355
328
  ]
356
329
  target = build({
@@ -365,7 +338,6 @@ module RawRecordsSparseUnionArrayTests
365
338
  def test_list
366
339
  records = [
367
340
  [{"0" => [true, nil, false]}],
368
- [nil],
369
341
  [{"1" => nil}],
370
342
  ]
371
343
  target = build({
@@ -382,7 +354,6 @@ module RawRecordsSparseUnionArrayTests
382
354
  def test_struct
383
355
  records = [
384
356
  [{"0" => {"sub_field" => true}}],
385
- [nil],
386
357
  [{"1" => nil}],
387
358
  [{"0" => {"sub_field" => nil}}],
388
359
  ]
@@ -403,7 +374,6 @@ module RawRecordsSparseUnionArrayTests
403
374
  omit("Need to add support for SparseUnionArrayBuilder")
404
375
  records = [
405
376
  [{"0" => {"field1" => true}}],
406
- [nil],
407
377
  [{"1" => nil}],
408
378
  [{"0" => {"field2" => nil}}],
409
379
  ]
@@ -429,7 +399,6 @@ module RawRecordsSparseUnionArrayTests
429
399
  omit("Need to add support for DenseUnionArrayBuilder")
430
400
  records = [
431
401
  [{"0" => {"field1" => true}}],
432
- [nil],
433
402
  [{"1" => nil}],
434
403
  [{"0" => {"field2" => nil}}],
435
404
  ]
@@ -455,7 +424,6 @@ module RawRecordsSparseUnionArrayTests
455
424
  omit("Need to add support for DictionaryArrayBuilder")
456
425
  records = [
457
426
  [{"0" => "Ruby"}],
458
- [nil],
459
427
  [{"1" => nil}],
460
428
  [{"0" => "GLib"}],
461
429
  ]
@@ -19,6 +19,7 @@
19
19
 
20
20
  $VERBOSE = true
21
21
 
22
+ require "fileutils"
22
23
  require "pathname"
23
24
 
24
25
  (ENV["ARROW_DLL_PATH"] || "").split(File::PATH_SEPARATOR).each do |path|
@@ -31,6 +32,14 @@ lib_dir = base_dir + "lib"
31
32
  ext_dir = base_dir + "ext" + "arrow"
32
33
  test_dir = base_dir + "test"
33
34
 
35
+ build_dir = ENV["BUILD_DIR"]
36
+ if build_dir
37
+ build_dir = File.join(build_dir, "red-arrow")
38
+ FileUtils.mkdir_p(build_dir)
39
+ else
40
+ build_dir = ext_dir
41
+ end
42
+
34
43
  make = nil
35
44
  if ENV["NO_MAKE"] != "yes"
36
45
  if ENV["MAKE"]
@@ -42,15 +51,17 @@ if ENV["NO_MAKE"] != "yes"
42
51
  end
43
52
  end
44
53
  if make
45
- Dir.chdir(ext_dir.to_s) do
54
+ Dir.chdir(build_dir.to_s) do
46
55
  unless File.exist?("Makefile")
47
- system(RbConfig.ruby, "extconf.rb", "--enable-debug-build") or exit(false)
56
+ system(RbConfig.ruby,
57
+ (ext_dir + "extconf.rb").to_s,
58
+ "--enable-debug-build") or exit(false)
48
59
  end
49
60
  system("#{make} > #{File::NULL}") or exit(false)
50
61
  end
51
62
  end
52
63
 
53
- $LOAD_PATH.unshift(ext_dir.to_s)
64
+ $LOAD_PATH.unshift(build_dir.to_s)
54
65
  $LOAD_PATH.unshift(lib_dir.to_s)
55
66
 
56
67
  require_relative "helper"
@@ -76,20 +76,22 @@ class ArrayTest < Test::Unit::TestCase
76
76
  def setup
77
77
  values = [true, false, false, true]
78
78
  @array = Arrow::BooleanArray.new(values)
79
+ @options = Arrow::FilterOptions.new
80
+ @options.null_selection_behavior = :emit_null
79
81
  end
80
82
 
81
83
  test("Array: boolean") do
82
84
  filter = [nil, true, true, false]
83
85
  filtered_array = Arrow::BooleanArray.new([nil, false, false])
84
86
  assert_equal(filtered_array,
85
- @array.filter(filter))
87
+ @array.filter(filter, @options))
86
88
  end
87
89
 
88
90
  test("Arrow::BooleanArray") do
89
91
  filter = Arrow::BooleanArray.new([nil, true, true, false])
90
92
  filtered_array = Arrow::BooleanArray.new([nil, false, false])
91
93
  assert_equal(filtered_array,
92
- @array.filter(filter))
94
+ @array.filter(filter, @options))
93
95
  end
94
96
 
95
97
  test("Arrow::ChunkedArray") do
@@ -100,7 +102,7 @@ class ArrayTest < Test::Unit::TestCase
100
102
  filter = Arrow::ChunkedArray.new(chunks)
101
103
  filtered_array = Arrow::BooleanArray.new([nil, false, false])
102
104
  assert_equal(filtered_array,
103
- @array.filter(filter))
105
+ @array.filter(filter, @options))
104
106
  end
105
107
  end
106
108
 
@@ -16,6 +16,17 @@
16
16
  # under the License.
17
17
 
18
18
  class BufferTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("GC") do
21
+ data = "Hello"
22
+ data_id = data.object_id
23
+ _buffer = Arrow::Buffer.new(data)
24
+ data = nil
25
+ GC.start
26
+ assert_equal("Hello", ObjectSpace._id2ref(data_id))
27
+ end
28
+ end
29
+
19
30
  sub_test_case("instance methods") do
20
31
  def setup
21
32
  @buffer = Arrow::Buffer.new("Hello")
@@ -92,6 +92,8 @@ class ChunkedArrayTest < Test::Unit::TestCase
92
92
  Arrow::BooleanArray.new([false, true, false]),
93
93
  ]
94
94
  @chunked_array = Arrow::ChunkedArray.new(arrays)
95
+ @options = Arrow::FilterOptions.new
96
+ @options.null_selection_behavior = :emit_null
95
97
  end
96
98
 
97
99
  test("Array: boolean") do
@@ -102,7 +104,7 @@ class ChunkedArrayTest < Test::Unit::TestCase
102
104
  ]
103
105
  filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
104
106
  assert_equal(filtered_chunked_array,
105
- @chunked_array.filter(filter))
107
+ @chunked_array.filter(filter, @options))
106
108
  end
107
109
 
108
110
  test("Arrow::BooleanArray") do
@@ -113,7 +115,7 @@ class ChunkedArrayTest < Test::Unit::TestCase
113
115
  ]
114
116
  filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
115
117
  assert_equal(filtered_chunked_array,
116
- @chunked_array.filter(filter))
118
+ @chunked_array.filter(filter, @options))
117
119
  end
118
120
 
119
121
  test("Arrow::ChunkedArray") do
@@ -128,7 +130,7 @@ class ChunkedArrayTest < Test::Unit::TestCase
128
130
  ]
129
131
  filtered_chunked_array = Arrow::ChunkedArray.new(filtered_chunks)
130
132
  assert_equal(filtered_chunked_array,
131
- @chunked_array.filter(filter))
133
+ @chunked_array.filter(filter, @options))
132
134
  end
133
135
  end
134
136
 
@@ -28,12 +28,12 @@ class DenseUnionDataTypeTest < Test::Unit::TestCase
28
28
  end
29
29
 
30
30
  test("ordered arguments") do
31
- assert_equal("union[dense]<visible: bool=2, count: int32=9>",
31
+ assert_equal("dense_union<visible: bool=2, count: int32=9>",
32
32
  Arrow::DenseUnionDataType.new(@fields, [2, 9]).to_s)
33
33
  end
34
34
 
35
35
  test("description") do
36
- assert_equal("union[dense]<visible: bool=2, count: int32=9>",
36
+ assert_equal("dense_union<visible: bool=2, count: int32=9>",
37
37
  Arrow::DenseUnionDataType.new(fields: @fields,
38
38
  type_codes: [2, 9]).to_s)
39
39
  end
@@ -0,0 +1,41 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class DictionaryArrayTest < Test::Unit::TestCase
19
+ sub_test_case("instance methods") do
20
+ def setup
21
+ @values = ["a", "b", "c", "b", "a"]
22
+ @string_array = Arrow::StringArray.new(@values)
23
+ @array = @string_array.dictionary_encode
24
+ end
25
+
26
+ test("#[]") do
27
+ assert_equal(@values, @array.to_a)
28
+ end
29
+
30
+ test("#get_value") do
31
+ assert_equal([
32
+ @values[0],
33
+ @values[3],
34
+ ],
35
+ [
36
+ @array.get_value(0),
37
+ @array.get_value(3),
38
+ ])
39
+ end
40
+ end
41
+ end
@@ -18,17 +18,32 @@
18
18
  class FeatherTest < Test::Unit::TestCase
19
19
  include Helper::Fixture
20
20
 
21
- def test_save_load
21
+ def setup
22
22
  columns = {
23
23
  "message" => Arrow::StringArray.new(["Start", "Crash", "Shutdown"]),
24
24
  "is_critical" => Arrow::BooleanArray.new([false, true, false]),
25
25
  }
26
- table = Arrow::Table.new(columns)
26
+ @table = Arrow::Table.new(columns)
27
27
 
28
- output = Tempfile.new(["red-arrow", ".feather"])
29
- table.save(output.path)
30
- output.close
28
+ @output = Tempfile.new(["red-arrow", ".feather"])
29
+ begin
30
+ yield(@output)
31
+ ensure
32
+ @output.close!
33
+ end
34
+ end
35
+
36
+ def test_default
37
+ @table.save(@output.path)
38
+ @output.close
39
+
40
+ assert_equal(@table, Arrow::Table.load(@output.path))
41
+ end
42
+
43
+ def test_compression
44
+ @table.save(@output.path, compression: :zstd)
45
+ @output.close
31
46
 
32
- assert_equal(table, Arrow::Table.load(output.path))
47
+ assert_equal(@table, Arrow::Table.load(@output.path))
33
48
  end
34
49
  end