red-arrow 0.15.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +63 -33
  4. data/ext/arrow/raw-records.cpp +2 -1
  5. data/ext/arrow/values.cpp +2 -1
  6. data/lib/arrow/array-builder.rb +101 -52
  7. data/lib/arrow/array.rb +28 -10
  8. data/lib/arrow/{binary-array-builder.rb → buffer.rb} +7 -15
  9. data/lib/arrow/chunked-array.rb +2 -0
  10. data/lib/arrow/csv-loader.rb +5 -0
  11. data/lib/arrow/csv-read-options.rb +18 -0
  12. data/lib/arrow/data-type.rb +35 -2
  13. data/lib/arrow/decimal128-array-builder.rb +0 -2
  14. data/lib/arrow/dictionary-array.rb +24 -0
  15. data/lib/arrow/field.rb +1 -1
  16. data/lib/arrow/generic-filterable.rb +43 -0
  17. data/lib/arrow/generic-takeable.rb +38 -0
  18. data/lib/arrow/list-data-type.rb +58 -8
  19. data/lib/arrow/loader.rb +12 -1
  20. data/lib/arrow/null-array-builder.rb +1 -1
  21. data/lib/arrow/null-array.rb +24 -0
  22. data/lib/arrow/raw-table-converter.rb +47 -0
  23. data/lib/arrow/record-batch-iterator.rb +22 -0
  24. data/lib/arrow/record-batch.rb +8 -3
  25. data/lib/arrow/schema.rb +5 -2
  26. data/lib/arrow/struct-array-builder.rb +13 -7
  27. data/lib/arrow/struct-data-type.rb +0 -2
  28. data/lib/arrow/table-loader.rb +29 -6
  29. data/lib/arrow/table-saver.rb +37 -13
  30. data/lib/arrow/table.rb +20 -73
  31. data/lib/arrow/version.rb +1 -1
  32. data/red-arrow.gemspec +3 -1
  33. data/test/helper.rb +1 -0
  34. data/test/helper/omittable.rb +36 -0
  35. data/test/raw-records/test-dense-union-array.rb +1 -34
  36. data/test/raw-records/test-sparse-union-array.rb +1 -33
  37. data/test/run-test.rb +14 -3
  38. data/test/test-array-builder.rb +17 -0
  39. data/test/test-array.rb +104 -0
  40. data/test/test-buffer.rb +11 -0
  41. data/test/test-chunked-array.rb +96 -0
  42. data/test/test-csv-loader.rb +2 -2
  43. data/test/test-data-type.rb +11 -0
  44. data/test/test-dense-union-data-type.rb +2 -2
  45. data/test/test-dictionary-array.rb +41 -0
  46. data/test/test-feather.rb +21 -6
  47. data/test/test-list-data-type.rb +27 -1
  48. data/test/test-null-array.rb +23 -0
  49. data/test/test-record-batch-iterator.rb +37 -0
  50. data/test/test-record-batch.rb +14 -0
  51. data/test/test-schema.rb +16 -0
  52. data/test/test-slicer.rb +74 -30
  53. data/test/test-sparse-union-data-type.rb +2 -2
  54. data/test/test-struct-array-builder.rb +8 -4
  55. data/test/test-table.rb +153 -14
  56. data/test/test-timestamp-array.rb +19 -0
  57. data/test/values/test-dense-union-array.rb +1 -34
  58. data/test/values/test-sparse-union-array.rb +1 -33
  59. metadata +22 -8
@@ -19,6 +19,7 @@
19
19
 
20
20
  $VERBOSE = true
21
21
 
22
+ require "fileutils"
22
23
  require "pathname"
23
24
 
24
25
  (ENV["ARROW_DLL_PATH"] || "").split(File::PATH_SEPARATOR).each do |path|
@@ -31,6 +32,14 @@ lib_dir = base_dir + "lib"
31
32
  ext_dir = base_dir + "ext" + "arrow"
32
33
  test_dir = base_dir + "test"
33
34
 
35
+ build_dir = ENV["BUILD_DIR"]
36
+ if build_dir
37
+ build_dir = File.join(build_dir, "red-arrow")
38
+ FileUtils.mkdir_p(build_dir)
39
+ else
40
+ build_dir = ext_dir
41
+ end
42
+
34
43
  make = nil
35
44
  if ENV["NO_MAKE"] != "yes"
36
45
  if ENV["MAKE"]
@@ -42,15 +51,17 @@ if ENV["NO_MAKE"] != "yes"
42
51
  end
43
52
  end
44
53
  if make
45
- Dir.chdir(ext_dir.to_s) do
54
+ Dir.chdir(build_dir.to_s) do
46
55
  unless File.exist?("Makefile")
47
- system(RbConfig.ruby, "extconf.rb", "--enable-debug-build") or exit(false)
56
+ system(RbConfig.ruby,
57
+ (ext_dir + "extconf.rb").to_s,
58
+ "--enable-debug-build") or exit(false)
48
59
  end
49
60
  system("#{make} > #{File::NULL}") or exit(false)
50
61
  end
51
62
  end
52
63
 
53
- $LOAD_PATH.unshift(ext_dir.to_s)
64
+ $LOAD_PATH.unshift(build_dir.to_s)
54
65
  $LOAD_PATH.unshift(lib_dir.to_s)
55
66
 
56
67
  require_relative "helper"
@@ -60,6 +60,23 @@ class ArrayBuilderTest < Test::Unit::TestCase
60
60
  DateTime.new(2018, 1, 5, 0, 23, 21),
61
61
  ])
62
62
  end
63
+
64
+ test("list<boolean>s") do
65
+ assert_build(Arrow::ArrayBuilder,
66
+ [
67
+ [nil, true, false],
68
+ nil,
69
+ [false],
70
+ ])
71
+ end
72
+
73
+ test("list<string>s") do
74
+ assert_build(Arrow::ArrayBuilder,
75
+ [
76
+ ["Hello", "World"],
77
+ ["Apache Arrow"],
78
+ ])
79
+ end
63
80
  end
64
81
 
65
82
  sub_test_case("specific builder") do
@@ -63,5 +63,109 @@ class ArrayTest < Test::Unit::TestCase
63
63
  end
64
64
  end
65
65
  end
66
+
67
+ sub_test_case("#cast") do
68
+ test("Symbol") do
69
+ assert_equal(Arrow::Int32Array.new([1, 2, 3]),
70
+ Arrow::StringArray.new(["1", "2", "3"]).cast(:int32))
71
+ end
72
+ end
73
+ end
74
+
75
+ sub_test_case("#filter") do
76
+ def setup
77
+ values = [true, false, false, true]
78
+ @array = Arrow::BooleanArray.new(values)
79
+ @options = Arrow::FilterOptions.new
80
+ @options.null_selection_behavior = :emit_null
81
+ end
82
+
83
+ test("Array: boolean") do
84
+ filter = [nil, true, true, false]
85
+ filtered_array = Arrow::BooleanArray.new([nil, false, false])
86
+ assert_equal(filtered_array,
87
+ @array.filter(filter, @options))
88
+ end
89
+
90
+ test("Arrow::BooleanArray") do
91
+ filter = Arrow::BooleanArray.new([nil, true, true, false])
92
+ filtered_array = Arrow::BooleanArray.new([nil, false, false])
93
+ assert_equal(filtered_array,
94
+ @array.filter(filter, @options))
95
+ end
96
+
97
+ test("Arrow::ChunkedArray") do
98
+ chunks = [
99
+ Arrow::BooleanArray.new([nil, true]),
100
+ Arrow::BooleanArray.new([true, false]),
101
+ ]
102
+ filter = Arrow::ChunkedArray.new(chunks)
103
+ filtered_array = Arrow::BooleanArray.new([nil, false, false])
104
+ assert_equal(filtered_array,
105
+ @array.filter(filter, @options))
106
+ end
107
+ end
108
+
109
+ sub_test_case("#take") do
110
+ def setup
111
+ values = [1, 0 ,2]
112
+ @array = Arrow::Int16Array.new(values)
113
+ end
114
+
115
+ test("Arrow: boolean") do
116
+ indices = [1, 0, 2]
117
+ assert_equal(Arrow::Int16Array.new([0, 1, 2]),
118
+ @array.take(indices))
119
+ end
120
+
121
+ test("Arrow::Array") do
122
+ indices = Arrow::Int16Array.new([1, 0, 2])
123
+ assert_equal(Arrow::Int16Array.new([0, 1, 2]),
124
+ @array.take(indices))
125
+ end
126
+
127
+ test("Arrow::ChunkedArray") do
128
+ taken_chunks = [
129
+ Arrow::Int16Array.new([0, 1]),
130
+ Arrow::Int16Array.new([2])
131
+ ]
132
+ taken_chunked_array = Arrow::ChunkedArray.new(taken_chunks)
133
+ indices_chunks = [
134
+ Arrow::Int16Array.new([1, 0]),
135
+ Arrow::Int16Array.new([2])
136
+ ]
137
+ indices = Arrow::ChunkedArray.new(indices_chunks)
138
+ assert_equal(taken_chunked_array,
139
+ @array.take(indices))
140
+ end
141
+ end
142
+
143
+ sub_test_case("#is_in") do
144
+ def setup
145
+ values = [1, 0, 1, 2]
146
+ @array = Arrow::Int16Array.new(values)
147
+ end
148
+
149
+ test("Arrow: Array") do
150
+ right = [2, 0]
151
+ assert_equal(Arrow::BooleanArray.new([false, true, false, true]),
152
+ @array.is_in(right))
153
+ end
154
+
155
+ test("Arrow::Array") do
156
+ right = Arrow::Int16Array.new([2, 0])
157
+ assert_equal(Arrow::BooleanArray.new([false, true, false, true]),
158
+ @array.is_in(right))
159
+ end
160
+
161
+ test("Arrow::ChunkedArray") do
162
+ chunks = [
163
+ Arrow::Int16Array.new([1, 0]),
164
+ Arrow::Int16Array.new([1, 0, 3])
165
+ ]
166
+ right = Arrow::ChunkedArray.new(chunks)
167
+ assert_equal(Arrow::BooleanArray.new([true, true, true, false]),
168
+ @array.is_in(right))
169
+ end
66
170
  end
67
171
  end
@@ -16,6 +16,17 @@
16
16
  # under the License.
17
17
 
18
18
  class BufferTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("GC") do
21
+ data = "Hello"
22
+ data_id = data.object_id
23
+ _buffer = Arrow::Buffer.new(data)
24
+ data = nil
25
+ GC.start
26
+ assert_equal("Hello", ObjectSpace._id2ref(data_id))
27
+ end
28
+ end
29
+
19
30
  sub_test_case("instance methods") do
20
31
  def setup
21
32
  @buffer = Arrow::Buffer.new("Hello")
@@ -84,4 +84,100 @@ class ChunkedArrayTest < Test::Unit::TestCase
84
84
  end
85
85
  end
86
86
  end
87
+
88
+ sub_test_case("#filter") do
89
+ def setup
90
+ arrays = [
91
+ Arrow::BooleanArray.new([false, true]),
92
+ Arrow::BooleanArray.new([false, true, false]),
93
+ ]
94
+ @chunked_array = Arrow::ChunkedArray.new(arrays)
95
+ @options = Arrow::FilterOptions.new
96
+ @options.null_selection_behavior = :emit_null
97
+ end
98
+
99
+ test("Array: boolean") do
100
+ filter = [nil, true, true, false, true]
101
+ chunks = [
102
+ Arrow::BooleanArray.new([nil, true]),
103
+ Arrow::BooleanArray.new([false, false]),
104
+ ]
105
+ filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
106
+ assert_equal(filtered_chunked_array,
107
+ @chunked_array.filter(filter, @options))
108
+ end
109
+
110
+ test("Arrow::BooleanArray") do
111
+ filter = Arrow::BooleanArray.new([nil, true, true, false, true])
112
+ chunks = [
113
+ Arrow::BooleanArray.new([nil, true]),
114
+ Arrow::BooleanArray.new([false, false]),
115
+ ]
116
+ filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
117
+ assert_equal(filtered_chunked_array,
118
+ @chunked_array.filter(filter, @options))
119
+ end
120
+
121
+ test("Arrow::ChunkedArray") do
122
+ chunks = [
123
+ Arrow::BooleanArray.new([nil, true]),
124
+ Arrow::BooleanArray.new([true, false, true]),
125
+ ]
126
+ filter = Arrow::ChunkedArray.new(chunks)
127
+ filtered_chunks = [
128
+ Arrow::BooleanArray.new([nil, true]),
129
+ Arrow::BooleanArray.new([false, false]),
130
+ ]
131
+ filtered_chunked_array = Arrow::ChunkedArray.new(filtered_chunks)
132
+ assert_equal(filtered_chunked_array,
133
+ @chunked_array.filter(filter, @options))
134
+ end
135
+ end
136
+
137
+ sub_test_case("#take") do
138
+ def setup
139
+ chunks = [
140
+ Arrow::Int16Array.new([1, 0]),
141
+ Arrow::Int16Array.new([2]),
142
+ ]
143
+ @chunked_array = Arrow::ChunkedArray.new(chunks)
144
+ end
145
+
146
+ test("Arrow: boolean") do
147
+ chunks = [
148
+ Arrow::Int16Array.new([0, 1]),
149
+ Arrow::Int16Array.new([2])
150
+ ]
151
+ taken_chunked_array = Arrow::ChunkedArray.new(chunks)
152
+ indices = [1, 0, 2]
153
+ assert_equal(taken_chunked_array,
154
+ @chunked_array.take(indices))
155
+ end
156
+
157
+ test("Arrow::Array") do
158
+ chunks = [
159
+ Arrow::Int16Array.new([0, 1]),
160
+ Arrow::Int16Array.new([2])
161
+ ]
162
+ taken_chunked_array = Arrow::ChunkedArray.new(chunks)
163
+ indices = Arrow::Int16Array.new([1, 0, 2])
164
+ assert_equal(taken_chunked_array,
165
+ @chunked_array.take(indices))
166
+ end
167
+
168
+ test("Arrow::ChunkedArray") do
169
+ taken_chunks = [
170
+ Arrow::Int16Array.new([0, 1]),
171
+ Arrow::Int16Array.new([2])
172
+ ]
173
+ taken_chunked_array = Arrow::ChunkedArray.new(taken_chunks)
174
+ indices_chunks = [
175
+ Arrow::Int16Array.new([1, 0]),
176
+ Arrow::Int16Array.new([2])
177
+ ]
178
+ indices = Arrow::ChunkedArray.new(indices_chunks)
179
+ assert_equal(taken_chunked_array,
180
+ @chunked_array.take(indices))
181
+ end
182
+ end
87
183
  end
@@ -117,8 +117,8 @@ class CSVLoaderTest < Test::Unit::TestCase
117
117
  end
118
118
 
119
119
  sub_test_case("CSVReader") do
120
- def load_csv(data, options)
121
- Arrow::CSVLoader.load(data, options)
120
+ def load_csv(data, **options)
121
+ Arrow::CSVLoader.load(data, **options)
122
122
  end
123
123
 
124
124
  sub_test_case(":headers") do
@@ -48,6 +48,17 @@ class DataTypeTest < Test::Unit::TestCase
48
48
  assert_equal(Arrow::FixedSizeBinaryDataType.new(10),
49
49
  Arrow::DataType.resolve([:fixed_size_binary, 10]))
50
50
  end
51
+
52
+ test("abstract") do
53
+ message =
54
+ "abstract type: <:floating_point>: " +
55
+ "use one of not abstract type: [" +
56
+ "Arrow::DoubleDataType, " +
57
+ "Arrow::FloatDataType]"
58
+ assert_raise(ArgumentError.new(message)) do
59
+ Arrow::DataType.resolve(:floating_point)
60
+ end
61
+ end
51
62
  end
52
63
 
53
64
  sub_test_case("instance methods") do
@@ -28,12 +28,12 @@ class DenseUnionDataTypeTest < Test::Unit::TestCase
28
28
  end
29
29
 
30
30
  test("ordered arguments") do
31
- assert_equal("union[dense]<visible: bool=2, count: int32=9>",
31
+ assert_equal("dense_union<visible: bool=2, count: int32=9>",
32
32
  Arrow::DenseUnionDataType.new(@fields, [2, 9]).to_s)
33
33
  end
34
34
 
35
35
  test("description") do
36
- assert_equal("union[dense]<visible: bool=2, count: int32=9>",
36
+ assert_equal("dense_union<visible: bool=2, count: int32=9>",
37
37
  Arrow::DenseUnionDataType.new(fields: @fields,
38
38
  type_codes: [2, 9]).to_s)
39
39
  end
@@ -0,0 +1,41 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class DictionaryArrayTest < Test::Unit::TestCase
19
+ sub_test_case("instance methods") do
20
+ def setup
21
+ @values = ["a", "b", "c", "b", "a"]
22
+ @string_array = Arrow::StringArray.new(@values)
23
+ @array = @string_array.dictionary_encode
24
+ end
25
+
26
+ test("#[]") do
27
+ assert_equal(@values, @array.to_a)
28
+ end
29
+
30
+ test("#get_value") do
31
+ assert_equal([
32
+ @values[0],
33
+ @values[3],
34
+ ],
35
+ [
36
+ @array.get_value(0),
37
+ @array.get_value(3),
38
+ ])
39
+ end
40
+ end
41
+ end
@@ -18,17 +18,32 @@
18
18
  class FeatherTest < Test::Unit::TestCase
19
19
  include Helper::Fixture
20
20
 
21
- def test_save_load
21
+ def setup
22
22
  columns = {
23
23
  "message" => Arrow::StringArray.new(["Start", "Crash", "Shutdown"]),
24
24
  "is_critical" => Arrow::BooleanArray.new([false, true, false]),
25
25
  }
26
- table = Arrow::Table.new(columns)
26
+ @table = Arrow::Table.new(columns)
27
27
 
28
- output = Tempfile.new(["red-arrow", ".feather"])
29
- table.save(output.path)
30
- output.close
28
+ @output = Tempfile.new(["red-arrow", ".feather"])
29
+ begin
30
+ yield(@output)
31
+ ensure
32
+ @output.close!
33
+ end
34
+ end
35
+
36
+ def test_default
37
+ @table.save(@output.path)
38
+ @output.close
39
+
40
+ assert_equal(@table, Arrow::Table.load(@output.path))
41
+ end
42
+
43
+ def test_compression
44
+ @table.save(@output.path, compression: :zstd)
45
+ @output.close
31
46
 
32
- assert_equal(table, Arrow::Table.load(output.path))
47
+ assert_equal(@table, Arrow::Table.load(@output.path))
33
48
  end
34
49
  end
@@ -23,7 +23,7 @@ class ListDataTypeTest < Test::Unit::TestCase
23
23
  Arrow::ListDataType.new(field).to_s)
24
24
  end
25
25
 
26
- test("Hash") do
26
+ test("name: String") do
27
27
  assert_equal("list<tag: string>",
28
28
  Arrow::ListDataType.new(name: "tag", type: :string).to_s)
29
29
  end
@@ -39,5 +39,31 @@ class ListDataTypeTest < Test::Unit::TestCase
39
39
  assert_equal("list<tag: string>",
40
40
  Arrow::ListDataType.new(field: field_description).to_s)
41
41
  end
42
+
43
+ test("Arrow::DataType") do
44
+ data_type = Arrow::BooleanDataType.new
45
+ assert_equal("list<item: bool>",
46
+ Arrow::ListDataType.new(data_type).to_s)
47
+ end
48
+
49
+ test("String") do
50
+ assert_equal("list<item: bool>",
51
+ Arrow::ListDataType.new("boolean").to_s)
52
+ end
53
+
54
+ test("Symbol") do
55
+ assert_equal("list<item: bool>",
56
+ Arrow::ListDataType.new(:boolean).to_s)
57
+ end
58
+
59
+ test("[data type name, additional information]") do
60
+ assert_equal("list<item: time32[ms]>",
61
+ Arrow::ListDataType.new([:time32, :milli]).to_s)
62
+ end
63
+
64
+ test("type: Symbol") do
65
+ assert_equal("list<item: bool>",
66
+ Arrow::ListDataType.new(type: :boolean).to_s)
67
+ end
42
68
  end
43
69
  end