red-arrow 0.15.1 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +63 -33
  4. data/ext/arrow/raw-records.cpp +2 -1
  5. data/ext/arrow/values.cpp +2 -1
  6. data/lib/arrow/array-builder.rb +101 -52
  7. data/lib/arrow/array.rb +28 -10
  8. data/lib/arrow/{binary-array-builder.rb → buffer.rb} +7 -15
  9. data/lib/arrow/chunked-array.rb +2 -0
  10. data/lib/arrow/csv-loader.rb +5 -0
  11. data/lib/arrow/csv-read-options.rb +18 -0
  12. data/lib/arrow/data-type.rb +35 -2
  13. data/lib/arrow/decimal128-array-builder.rb +0 -2
  14. data/lib/arrow/dictionary-array.rb +24 -0
  15. data/lib/arrow/field.rb +1 -1
  16. data/lib/arrow/generic-filterable.rb +43 -0
  17. data/lib/arrow/generic-takeable.rb +38 -0
  18. data/lib/arrow/list-data-type.rb +58 -8
  19. data/lib/arrow/loader.rb +12 -1
  20. data/lib/arrow/null-array-builder.rb +1 -1
  21. data/lib/arrow/null-array.rb +24 -0
  22. data/lib/arrow/raw-table-converter.rb +47 -0
  23. data/lib/arrow/record-batch-iterator.rb +22 -0
  24. data/lib/arrow/record-batch.rb +8 -3
  25. data/lib/arrow/schema.rb +5 -2
  26. data/lib/arrow/struct-array-builder.rb +13 -7
  27. data/lib/arrow/struct-data-type.rb +0 -2
  28. data/lib/arrow/table-loader.rb +29 -6
  29. data/lib/arrow/table-saver.rb +37 -13
  30. data/lib/arrow/table.rb +20 -73
  31. data/lib/arrow/version.rb +1 -1
  32. data/red-arrow.gemspec +3 -1
  33. data/test/helper.rb +1 -0
  34. data/test/helper/omittable.rb +36 -0
  35. data/test/raw-records/test-dense-union-array.rb +1 -34
  36. data/test/raw-records/test-sparse-union-array.rb +1 -33
  37. data/test/run-test.rb +14 -3
  38. data/test/test-array-builder.rb +17 -0
  39. data/test/test-array.rb +104 -0
  40. data/test/test-buffer.rb +11 -0
  41. data/test/test-chunked-array.rb +96 -0
  42. data/test/test-csv-loader.rb +2 -2
  43. data/test/test-data-type.rb +11 -0
  44. data/test/test-dense-union-data-type.rb +2 -2
  45. data/test/test-dictionary-array.rb +41 -0
  46. data/test/test-feather.rb +21 -6
  47. data/test/test-list-data-type.rb +27 -1
  48. data/test/test-null-array.rb +23 -0
  49. data/test/test-record-batch-iterator.rb +37 -0
  50. data/test/test-record-batch.rb +14 -0
  51. data/test/test-schema.rb +16 -0
  52. data/test/test-slicer.rb +74 -30
  53. data/test/test-sparse-union-data-type.rb +2 -2
  54. data/test/test-struct-array-builder.rb +8 -4
  55. data/test/test-table.rb +153 -14
  56. data/test/test-timestamp-array.rb +19 -0
  57. data/test/values/test-dense-union-array.rb +1 -34
  58. data/test/values/test-sparse-union-array.rb +1 -33
  59. metadata +22 -8
@@ -19,6 +19,7 @@
19
19
 
20
20
  $VERBOSE = true
21
21
 
22
+ require "fileutils"
22
23
  require "pathname"
23
24
 
24
25
  (ENV["ARROW_DLL_PATH"] || "").split(File::PATH_SEPARATOR).each do |path|
@@ -31,6 +32,14 @@ lib_dir = base_dir + "lib"
31
32
  ext_dir = base_dir + "ext" + "arrow"
32
33
  test_dir = base_dir + "test"
33
34
 
35
+ build_dir = ENV["BUILD_DIR"]
36
+ if build_dir
37
+ build_dir = File.join(build_dir, "red-arrow")
38
+ FileUtils.mkdir_p(build_dir)
39
+ else
40
+ build_dir = ext_dir
41
+ end
42
+
34
43
  make = nil
35
44
  if ENV["NO_MAKE"] != "yes"
36
45
  if ENV["MAKE"]
@@ -42,15 +51,17 @@ if ENV["NO_MAKE"] != "yes"
42
51
  end
43
52
  end
44
53
  if make
45
- Dir.chdir(ext_dir.to_s) do
54
+ Dir.chdir(build_dir.to_s) do
46
55
  unless File.exist?("Makefile")
47
- system(RbConfig.ruby, "extconf.rb", "--enable-debug-build") or exit(false)
56
+ system(RbConfig.ruby,
57
+ (ext_dir + "extconf.rb").to_s,
58
+ "--enable-debug-build") or exit(false)
48
59
  end
49
60
  system("#{make} > #{File::NULL}") or exit(false)
50
61
  end
51
62
  end
52
63
 
53
- $LOAD_PATH.unshift(ext_dir.to_s)
64
+ $LOAD_PATH.unshift(build_dir.to_s)
54
65
  $LOAD_PATH.unshift(lib_dir.to_s)
55
66
 
56
67
  require_relative "helper"
@@ -60,6 +60,23 @@ class ArrayBuilderTest < Test::Unit::TestCase
60
60
  DateTime.new(2018, 1, 5, 0, 23, 21),
61
61
  ])
62
62
  end
63
+
64
+ test("list<boolean>s") do
65
+ assert_build(Arrow::ArrayBuilder,
66
+ [
67
+ [nil, true, false],
68
+ nil,
69
+ [false],
70
+ ])
71
+ end
72
+
73
+ test("list<string>s") do
74
+ assert_build(Arrow::ArrayBuilder,
75
+ [
76
+ ["Hello", "World"],
77
+ ["Apache Arrow"],
78
+ ])
79
+ end
63
80
  end
64
81
 
65
82
  sub_test_case("specific builder") do
@@ -63,5 +63,109 @@ class ArrayTest < Test::Unit::TestCase
63
63
  end
64
64
  end
65
65
  end
66
+
67
+ sub_test_case("#cast") do
68
+ test("Symbol") do
69
+ assert_equal(Arrow::Int32Array.new([1, 2, 3]),
70
+ Arrow::StringArray.new(["1", "2", "3"]).cast(:int32))
71
+ end
72
+ end
73
+ end
74
+
75
+ sub_test_case("#filter") do
76
+ def setup
77
+ values = [true, false, false, true]
78
+ @array = Arrow::BooleanArray.new(values)
79
+ @options = Arrow::FilterOptions.new
80
+ @options.null_selection_behavior = :emit_null
81
+ end
82
+
83
+ test("Array: boolean") do
84
+ filter = [nil, true, true, false]
85
+ filtered_array = Arrow::BooleanArray.new([nil, false, false])
86
+ assert_equal(filtered_array,
87
+ @array.filter(filter, @options))
88
+ end
89
+
90
+ test("Arrow::BooleanArray") do
91
+ filter = Arrow::BooleanArray.new([nil, true, true, false])
92
+ filtered_array = Arrow::BooleanArray.new([nil, false, false])
93
+ assert_equal(filtered_array,
94
+ @array.filter(filter, @options))
95
+ end
96
+
97
+ test("Arrow::ChunkedArray") do
98
+ chunks = [
99
+ Arrow::BooleanArray.new([nil, true]),
100
+ Arrow::BooleanArray.new([true, false]),
101
+ ]
102
+ filter = Arrow::ChunkedArray.new(chunks)
103
+ filtered_array = Arrow::BooleanArray.new([nil, false, false])
104
+ assert_equal(filtered_array,
105
+ @array.filter(filter, @options))
106
+ end
107
+ end
108
+
109
+ sub_test_case("#take") do
110
+ def setup
111
+ values = [1, 0 ,2]
112
+ @array = Arrow::Int16Array.new(values)
113
+ end
114
+
115
+ test("Arrow: boolean") do
116
+ indices = [1, 0, 2]
117
+ assert_equal(Arrow::Int16Array.new([0, 1, 2]),
118
+ @array.take(indices))
119
+ end
120
+
121
+ test("Arrow::Array") do
122
+ indices = Arrow::Int16Array.new([1, 0, 2])
123
+ assert_equal(Arrow::Int16Array.new([0, 1, 2]),
124
+ @array.take(indices))
125
+ end
126
+
127
+ test("Arrow::ChunkedArray") do
128
+ taken_chunks = [
129
+ Arrow::Int16Array.new([0, 1]),
130
+ Arrow::Int16Array.new([2])
131
+ ]
132
+ taken_chunked_array = Arrow::ChunkedArray.new(taken_chunks)
133
+ indices_chunks = [
134
+ Arrow::Int16Array.new([1, 0]),
135
+ Arrow::Int16Array.new([2])
136
+ ]
137
+ indices = Arrow::ChunkedArray.new(indices_chunks)
138
+ assert_equal(taken_chunked_array,
139
+ @array.take(indices))
140
+ end
141
+ end
142
+
143
+ sub_test_case("#is_in") do
144
+ def setup
145
+ values = [1, 0, 1, 2]
146
+ @array = Arrow::Int16Array.new(values)
147
+ end
148
+
149
+ test("Arrow: Array") do
150
+ right = [2, 0]
151
+ assert_equal(Arrow::BooleanArray.new([false, true, false, true]),
152
+ @array.is_in(right))
153
+ end
154
+
155
+ test("Arrow::Array") do
156
+ right = Arrow::Int16Array.new([2, 0])
157
+ assert_equal(Arrow::BooleanArray.new([false, true, false, true]),
158
+ @array.is_in(right))
159
+ end
160
+
161
+ test("Arrow::ChunkedArray") do
162
+ chunks = [
163
+ Arrow::Int16Array.new([1, 0]),
164
+ Arrow::Int16Array.new([1, 0, 3])
165
+ ]
166
+ right = Arrow::ChunkedArray.new(chunks)
167
+ assert_equal(Arrow::BooleanArray.new([true, true, true, false]),
168
+ @array.is_in(right))
169
+ end
66
170
  end
67
171
  end
@@ -16,6 +16,17 @@
16
16
  # under the License.
17
17
 
18
18
  class BufferTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("GC") do
21
+ data = "Hello"
22
+ data_id = data.object_id
23
+ _buffer = Arrow::Buffer.new(data)
24
+ data = nil
25
+ GC.start
26
+ assert_equal("Hello", ObjectSpace._id2ref(data_id))
27
+ end
28
+ end
29
+
19
30
  sub_test_case("instance methods") do
20
31
  def setup
21
32
  @buffer = Arrow::Buffer.new("Hello")
@@ -84,4 +84,100 @@ class ChunkedArrayTest < Test::Unit::TestCase
84
84
  end
85
85
  end
86
86
  end
87
+
88
+ sub_test_case("#filter") do
89
+ def setup
90
+ arrays = [
91
+ Arrow::BooleanArray.new([false, true]),
92
+ Arrow::BooleanArray.new([false, true, false]),
93
+ ]
94
+ @chunked_array = Arrow::ChunkedArray.new(arrays)
95
+ @options = Arrow::FilterOptions.new
96
+ @options.null_selection_behavior = :emit_null
97
+ end
98
+
99
+ test("Array: boolean") do
100
+ filter = [nil, true, true, false, true]
101
+ chunks = [
102
+ Arrow::BooleanArray.new([nil, true]),
103
+ Arrow::BooleanArray.new([false, false]),
104
+ ]
105
+ filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
106
+ assert_equal(filtered_chunked_array,
107
+ @chunked_array.filter(filter, @options))
108
+ end
109
+
110
+ test("Arrow::BooleanArray") do
111
+ filter = Arrow::BooleanArray.new([nil, true, true, false, true])
112
+ chunks = [
113
+ Arrow::BooleanArray.new([nil, true]),
114
+ Arrow::BooleanArray.new([false, false]),
115
+ ]
116
+ filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
117
+ assert_equal(filtered_chunked_array,
118
+ @chunked_array.filter(filter, @options))
119
+ end
120
+
121
+ test("Arrow::ChunkedArray") do
122
+ chunks = [
123
+ Arrow::BooleanArray.new([nil, true]),
124
+ Arrow::BooleanArray.new([true, false, true]),
125
+ ]
126
+ filter = Arrow::ChunkedArray.new(chunks)
127
+ filtered_chunks = [
128
+ Arrow::BooleanArray.new([nil, true]),
129
+ Arrow::BooleanArray.new([false, false]),
130
+ ]
131
+ filtered_chunked_array = Arrow::ChunkedArray.new(filtered_chunks)
132
+ assert_equal(filtered_chunked_array,
133
+ @chunked_array.filter(filter, @options))
134
+ end
135
+ end
136
+
137
+ sub_test_case("#take") do
138
+ def setup
139
+ chunks = [
140
+ Arrow::Int16Array.new([1, 0]),
141
+ Arrow::Int16Array.new([2]),
142
+ ]
143
+ @chunked_array = Arrow::ChunkedArray.new(chunks)
144
+ end
145
+
146
+ test("Arrow: boolean") do
147
+ chunks = [
148
+ Arrow::Int16Array.new([0, 1]),
149
+ Arrow::Int16Array.new([2])
150
+ ]
151
+ taken_chunked_array = Arrow::ChunkedArray.new(chunks)
152
+ indices = [1, 0, 2]
153
+ assert_equal(taken_chunked_array,
154
+ @chunked_array.take(indices))
155
+ end
156
+
157
+ test("Arrow::Array") do
158
+ chunks = [
159
+ Arrow::Int16Array.new([0, 1]),
160
+ Arrow::Int16Array.new([2])
161
+ ]
162
+ taken_chunked_array = Arrow::ChunkedArray.new(chunks)
163
+ indices = Arrow::Int16Array.new([1, 0, 2])
164
+ assert_equal(taken_chunked_array,
165
+ @chunked_array.take(indices))
166
+ end
167
+
168
+ test("Arrow::ChunkedArray") do
169
+ taken_chunks = [
170
+ Arrow::Int16Array.new([0, 1]),
171
+ Arrow::Int16Array.new([2])
172
+ ]
173
+ taken_chunked_array = Arrow::ChunkedArray.new(taken_chunks)
174
+ indices_chunks = [
175
+ Arrow::Int16Array.new([1, 0]),
176
+ Arrow::Int16Array.new([2])
177
+ ]
178
+ indices = Arrow::ChunkedArray.new(indices_chunks)
179
+ assert_equal(taken_chunked_array,
180
+ @chunked_array.take(indices))
181
+ end
182
+ end
87
183
  end
@@ -117,8 +117,8 @@ class CSVLoaderTest < Test::Unit::TestCase
117
117
  end
118
118
 
119
119
  sub_test_case("CSVReader") do
120
- def load_csv(data, options)
121
- Arrow::CSVLoader.load(data, options)
120
+ def load_csv(data, **options)
121
+ Arrow::CSVLoader.load(data, **options)
122
122
  end
123
123
 
124
124
  sub_test_case(":headers") do
@@ -48,6 +48,17 @@ class DataTypeTest < Test::Unit::TestCase
48
48
  assert_equal(Arrow::FixedSizeBinaryDataType.new(10),
49
49
  Arrow::DataType.resolve([:fixed_size_binary, 10]))
50
50
  end
51
+
52
+ test("abstract") do
53
+ message =
54
+ "abstract type: <:floating_point>: " +
55
+ "use one of not abstract type: [" +
56
+ "Arrow::DoubleDataType, " +
57
+ "Arrow::FloatDataType]"
58
+ assert_raise(ArgumentError.new(message)) do
59
+ Arrow::DataType.resolve(:floating_point)
60
+ end
61
+ end
51
62
  end
52
63
 
53
64
  sub_test_case("instance methods") do
@@ -28,12 +28,12 @@ class DenseUnionDataTypeTest < Test::Unit::TestCase
28
28
  end
29
29
 
30
30
  test("ordered arguments") do
31
- assert_equal("union[dense]<visible: bool=2, count: int32=9>",
31
+ assert_equal("dense_union<visible: bool=2, count: int32=9>",
32
32
  Arrow::DenseUnionDataType.new(@fields, [2, 9]).to_s)
33
33
  end
34
34
 
35
35
  test("description") do
36
- assert_equal("union[dense]<visible: bool=2, count: int32=9>",
36
+ assert_equal("dense_union<visible: bool=2, count: int32=9>",
37
37
  Arrow::DenseUnionDataType.new(fields: @fields,
38
38
  type_codes: [2, 9]).to_s)
39
39
  end
@@ -0,0 +1,41 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class DictionaryArrayTest < Test::Unit::TestCase
19
+ sub_test_case("instance methods") do
20
+ def setup
21
+ @values = ["a", "b", "c", "b", "a"]
22
+ @string_array = Arrow::StringArray.new(@values)
23
+ @array = @string_array.dictionary_encode
24
+ end
25
+
26
+ test("#[]") do
27
+ assert_equal(@values, @array.to_a)
28
+ end
29
+
30
+ test("#get_value") do
31
+ assert_equal([
32
+ @values[0],
33
+ @values[3],
34
+ ],
35
+ [
36
+ @array.get_value(0),
37
+ @array.get_value(3),
38
+ ])
39
+ end
40
+ end
41
+ end
@@ -18,17 +18,32 @@
18
18
  class FeatherTest < Test::Unit::TestCase
19
19
  include Helper::Fixture
20
20
 
21
- def test_save_load
21
+ def setup
22
22
  columns = {
23
23
  "message" => Arrow::StringArray.new(["Start", "Crash", "Shutdown"]),
24
24
  "is_critical" => Arrow::BooleanArray.new([false, true, false]),
25
25
  }
26
- table = Arrow::Table.new(columns)
26
+ @table = Arrow::Table.new(columns)
27
27
 
28
- output = Tempfile.new(["red-arrow", ".feather"])
29
- table.save(output.path)
30
- output.close
28
+ @output = Tempfile.new(["red-arrow", ".feather"])
29
+ begin
30
+ yield(@output)
31
+ ensure
32
+ @output.close!
33
+ end
34
+ end
35
+
36
+ def test_default
37
+ @table.save(@output.path)
38
+ @output.close
39
+
40
+ assert_equal(@table, Arrow::Table.load(@output.path))
41
+ end
42
+
43
+ def test_compression
44
+ @table.save(@output.path, compression: :zstd)
45
+ @output.close
31
46
 
32
- assert_equal(table, Arrow::Table.load(output.path))
47
+ assert_equal(@table, Arrow::Table.load(@output.path))
33
48
  end
34
49
  end
@@ -23,7 +23,7 @@ class ListDataTypeTest < Test::Unit::TestCase
23
23
  Arrow::ListDataType.new(field).to_s)
24
24
  end
25
25
 
26
- test("Hash") do
26
+ test("name: String") do
27
27
  assert_equal("list<tag: string>",
28
28
  Arrow::ListDataType.new(name: "tag", type: :string).to_s)
29
29
  end
@@ -39,5 +39,31 @@ class ListDataTypeTest < Test::Unit::TestCase
39
39
  assert_equal("list<tag: string>",
40
40
  Arrow::ListDataType.new(field: field_description).to_s)
41
41
  end
42
+
43
+ test("Arrow::DataType") do
44
+ data_type = Arrow::BooleanDataType.new
45
+ assert_equal("list<item: bool>",
46
+ Arrow::ListDataType.new(data_type).to_s)
47
+ end
48
+
49
+ test("String") do
50
+ assert_equal("list<item: bool>",
51
+ Arrow::ListDataType.new("boolean").to_s)
52
+ end
53
+
54
+ test("Symbol") do
55
+ assert_equal("list<item: bool>",
56
+ Arrow::ListDataType.new(:boolean).to_s)
57
+ end
58
+
59
+ test("[data type name, additional information]") do
60
+ assert_equal("list<item: time32[ms]>",
61
+ Arrow::ListDataType.new([:time32, :milli]).to_s)
62
+ end
63
+
64
+ test("type: Symbol") do
65
+ assert_equal("list<item: bool>",
66
+ Arrow::ListDataType.new(type: :boolean).to_s)
67
+ end
42
68
  end
43
69
  end