red-arrow 0.15.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +63 -33
  4. data/ext/arrow/raw-records.cpp +2 -1
  5. data/ext/arrow/values.cpp +2 -1
  6. data/lib/arrow/array-builder.rb +101 -52
  7. data/lib/arrow/array.rb +28 -10
  8. data/lib/arrow/{binary-array-builder.rb → buffer.rb} +7 -15
  9. data/lib/arrow/chunked-array.rb +2 -0
  10. data/lib/arrow/csv-loader.rb +15 -3
  11. data/lib/arrow/csv-read-options.rb +18 -0
  12. data/lib/arrow/data-type.rb +35 -2
  13. data/lib/arrow/decimal128-array-builder.rb +0 -2
  14. data/lib/arrow/dictionary-array.rb +24 -0
  15. data/lib/arrow/field.rb +1 -1
  16. data/lib/arrow/generic-filterable.rb +43 -0
  17. data/lib/arrow/generic-takeable.rb +38 -0
  18. data/lib/arrow/list-data-type.rb +58 -8
  19. data/lib/arrow/loader.rb +12 -1
  20. data/lib/arrow/null-array-builder.rb +1 -1
  21. data/lib/arrow/null-array.rb +24 -0
  22. data/lib/arrow/raw-table-converter.rb +47 -0
  23. data/lib/arrow/record-batch-iterator.rb +22 -0
  24. data/lib/arrow/record-batch.rb +8 -3
  25. data/lib/arrow/schema.rb +5 -2
  26. data/lib/arrow/struct-array-builder.rb +13 -7
  27. data/lib/arrow/struct-data-type.rb +0 -2
  28. data/lib/arrow/table-loader.rb +29 -6
  29. data/lib/arrow/table-saver.rb +37 -13
  30. data/lib/arrow/table.rb +20 -73
  31. data/lib/arrow/version.rb +1 -1
  32. data/red-arrow.gemspec +4 -2
  33. data/test/helper.rb +1 -0
  34. data/test/helper/omittable.rb +36 -0
  35. data/test/raw-records/test-dense-union-array.rb +1 -34
  36. data/test/raw-records/test-sparse-union-array.rb +1 -33
  37. data/test/run-test.rb +14 -3
  38. data/test/test-array-builder.rb +17 -0
  39. data/test/test-array.rb +104 -0
  40. data/test/test-buffer.rb +11 -0
  41. data/test/test-chunked-array.rb +96 -0
  42. data/test/test-csv-loader.rb +77 -2
  43. data/test/test-data-type.rb +11 -0
  44. data/test/test-dense-union-data-type.rb +2 -2
  45. data/test/test-dictionary-array.rb +41 -0
  46. data/test/test-feather.rb +21 -6
  47. data/test/test-list-data-type.rb +27 -1
  48. data/test/test-null-array.rb +23 -0
  49. data/test/test-record-batch-iterator.rb +37 -0
  50. data/test/test-record-batch.rb +14 -0
  51. data/test/test-schema.rb +16 -0
  52. data/test/test-slicer.rb +74 -30
  53. data/test/test-sparse-union-data-type.rb +2 -2
  54. data/test/test-struct-array-builder.rb +8 -4
  55. data/test/test-table.rb +153 -14
  56. data/test/test-timestamp-array.rb +19 -0
  57. data/test/values/test-dense-union-array.rb +1 -34
  58. data/test/values/test-sparse-union-array.rb +1 -33
  59. metadata +76 -63
@@ -19,6 +19,7 @@
19
19
 
20
20
  $VERBOSE = true
21
21
 
22
+ require "fileutils"
22
23
  require "pathname"
23
24
 
24
25
  (ENV["ARROW_DLL_PATH"] || "").split(File::PATH_SEPARATOR).each do |path|
@@ -31,6 +32,14 @@ lib_dir = base_dir + "lib"
31
32
  ext_dir = base_dir + "ext" + "arrow"
32
33
  test_dir = base_dir + "test"
33
34
 
35
+ build_dir = ENV["BUILD_DIR"]
36
+ if build_dir
37
+ build_dir = File.join(build_dir, "red-arrow")
38
+ FileUtils.mkdir_p(build_dir)
39
+ else
40
+ build_dir = ext_dir
41
+ end
42
+
34
43
  make = nil
35
44
  if ENV["NO_MAKE"] != "yes"
36
45
  if ENV["MAKE"]
@@ -42,15 +51,17 @@ if ENV["NO_MAKE"] != "yes"
42
51
  end
43
52
  end
44
53
  if make
45
- Dir.chdir(ext_dir.to_s) do
54
+ Dir.chdir(build_dir.to_s) do
46
55
  unless File.exist?("Makefile")
47
- system(RbConfig.ruby, "extconf.rb", "--enable-debug-build") or exit(false)
56
+ system(RbConfig.ruby,
57
+ (ext_dir + "extconf.rb").to_s,
58
+ "--enable-debug-build") or exit(false)
48
59
  end
49
60
  system("#{make} > #{File::NULL}") or exit(false)
50
61
  end
51
62
  end
52
63
 
53
- $LOAD_PATH.unshift(ext_dir.to_s)
64
+ $LOAD_PATH.unshift(build_dir.to_s)
54
65
  $LOAD_PATH.unshift(lib_dir.to_s)
55
66
 
56
67
  require_relative "helper"
@@ -60,6 +60,23 @@ class ArrayBuilderTest < Test::Unit::TestCase
60
60
  DateTime.new(2018, 1, 5, 0, 23, 21),
61
61
  ])
62
62
  end
63
+
64
+ test("list<boolean>s") do
65
+ assert_build(Arrow::ArrayBuilder,
66
+ [
67
+ [nil, true, false],
68
+ nil,
69
+ [false],
70
+ ])
71
+ end
72
+
73
+ test("list<string>s") do
74
+ assert_build(Arrow::ArrayBuilder,
75
+ [
76
+ ["Hello", "World"],
77
+ ["Apache Arrow"],
78
+ ])
79
+ end
63
80
  end
64
81
 
65
82
  sub_test_case("specific builder") do
@@ -63,5 +63,109 @@ class ArrayTest < Test::Unit::TestCase
63
63
  end
64
64
  end
65
65
  end
66
+
67
+ sub_test_case("#cast") do
68
+ test("Symbol") do
69
+ assert_equal(Arrow::Int32Array.new([1, 2, 3]),
70
+ Arrow::StringArray.new(["1", "2", "3"]).cast(:int32))
71
+ end
72
+ end
73
+ end
74
+
75
+ sub_test_case("#filter") do
76
+ def setup
77
+ values = [true, false, false, true]
78
+ @array = Arrow::BooleanArray.new(values)
79
+ @options = Arrow::FilterOptions.new
80
+ @options.null_selection_behavior = :emit_null
81
+ end
82
+
83
+ test("Array: boolean") do
84
+ filter = [nil, true, true, false]
85
+ filtered_array = Arrow::BooleanArray.new([nil, false, false])
86
+ assert_equal(filtered_array,
87
+ @array.filter(filter, @options))
88
+ end
89
+
90
+ test("Arrow::BooleanArray") do
91
+ filter = Arrow::BooleanArray.new([nil, true, true, false])
92
+ filtered_array = Arrow::BooleanArray.new([nil, false, false])
93
+ assert_equal(filtered_array,
94
+ @array.filter(filter, @options))
95
+ end
96
+
97
+ test("Arrow::ChunkedArray") do
98
+ chunks = [
99
+ Arrow::BooleanArray.new([nil, true]),
100
+ Arrow::BooleanArray.new([true, false]),
101
+ ]
102
+ filter = Arrow::ChunkedArray.new(chunks)
103
+ filtered_array = Arrow::BooleanArray.new([nil, false, false])
104
+ assert_equal(filtered_array,
105
+ @array.filter(filter, @options))
106
+ end
107
+ end
108
+
109
+ sub_test_case("#take") do
110
+ def setup
111
+ values = [1, 0 ,2]
112
+ @array = Arrow::Int16Array.new(values)
113
+ end
114
+
115
+ test("Arrow: boolean") do
116
+ indices = [1, 0, 2]
117
+ assert_equal(Arrow::Int16Array.new([0, 1, 2]),
118
+ @array.take(indices))
119
+ end
120
+
121
+ test("Arrow::Array") do
122
+ indices = Arrow::Int16Array.new([1, 0, 2])
123
+ assert_equal(Arrow::Int16Array.new([0, 1, 2]),
124
+ @array.take(indices))
125
+ end
126
+
127
+ test("Arrow::ChunkedArray") do
128
+ taken_chunks = [
129
+ Arrow::Int16Array.new([0, 1]),
130
+ Arrow::Int16Array.new([2])
131
+ ]
132
+ taken_chunked_array = Arrow::ChunkedArray.new(taken_chunks)
133
+ indices_chunks = [
134
+ Arrow::Int16Array.new([1, 0]),
135
+ Arrow::Int16Array.new([2])
136
+ ]
137
+ indices = Arrow::ChunkedArray.new(indices_chunks)
138
+ assert_equal(taken_chunked_array,
139
+ @array.take(indices))
140
+ end
141
+ end
142
+
143
+ sub_test_case("#is_in") do
144
+ def setup
145
+ values = [1, 0, 1, 2]
146
+ @array = Arrow::Int16Array.new(values)
147
+ end
148
+
149
+ test("Arrow: Array") do
150
+ right = [2, 0]
151
+ assert_equal(Arrow::BooleanArray.new([false, true, false, true]),
152
+ @array.is_in(right))
153
+ end
154
+
155
+ test("Arrow::Array") do
156
+ right = Arrow::Int16Array.new([2, 0])
157
+ assert_equal(Arrow::BooleanArray.new([false, true, false, true]),
158
+ @array.is_in(right))
159
+ end
160
+
161
+ test("Arrow::ChunkedArray") do
162
+ chunks = [
163
+ Arrow::Int16Array.new([1, 0]),
164
+ Arrow::Int16Array.new([1, 0, 3])
165
+ ]
166
+ right = Arrow::ChunkedArray.new(chunks)
167
+ assert_equal(Arrow::BooleanArray.new([true, true, true, false]),
168
+ @array.is_in(right))
169
+ end
66
170
  end
67
171
  end
@@ -16,6 +16,17 @@
16
16
  # under the License.
17
17
 
18
18
  class BufferTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("GC") do
21
+ data = "Hello"
22
+ data_id = data.object_id
23
+ _buffer = Arrow::Buffer.new(data)
24
+ data = nil
25
+ GC.start
26
+ assert_equal("Hello", ObjectSpace._id2ref(data_id))
27
+ end
28
+ end
29
+
19
30
  sub_test_case("instance methods") do
20
31
  def setup
21
32
  @buffer = Arrow::Buffer.new("Hello")
@@ -84,4 +84,100 @@ class ChunkedArrayTest < Test::Unit::TestCase
84
84
  end
85
85
  end
86
86
  end
87
+
88
+ sub_test_case("#filter") do
89
+ def setup
90
+ arrays = [
91
+ Arrow::BooleanArray.new([false, true]),
92
+ Arrow::BooleanArray.new([false, true, false]),
93
+ ]
94
+ @chunked_array = Arrow::ChunkedArray.new(arrays)
95
+ @options = Arrow::FilterOptions.new
96
+ @options.null_selection_behavior = :emit_null
97
+ end
98
+
99
+ test("Array: boolean") do
100
+ filter = [nil, true, true, false, true]
101
+ chunks = [
102
+ Arrow::BooleanArray.new([nil, true]),
103
+ Arrow::BooleanArray.new([false, false]),
104
+ ]
105
+ filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
106
+ assert_equal(filtered_chunked_array,
107
+ @chunked_array.filter(filter, @options))
108
+ end
109
+
110
+ test("Arrow::BooleanArray") do
111
+ filter = Arrow::BooleanArray.new([nil, true, true, false, true])
112
+ chunks = [
113
+ Arrow::BooleanArray.new([nil, true]),
114
+ Arrow::BooleanArray.new([false, false]),
115
+ ]
116
+ filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
117
+ assert_equal(filtered_chunked_array,
118
+ @chunked_array.filter(filter, @options))
119
+ end
120
+
121
+ test("Arrow::ChunkedArray") do
122
+ chunks = [
123
+ Arrow::BooleanArray.new([nil, true]),
124
+ Arrow::BooleanArray.new([true, false, true]),
125
+ ]
126
+ filter = Arrow::ChunkedArray.new(chunks)
127
+ filtered_chunks = [
128
+ Arrow::BooleanArray.new([nil, true]),
129
+ Arrow::BooleanArray.new([false, false]),
130
+ ]
131
+ filtered_chunked_array = Arrow::ChunkedArray.new(filtered_chunks)
132
+ assert_equal(filtered_chunked_array,
133
+ @chunked_array.filter(filter, @options))
134
+ end
135
+ end
136
+
137
+ sub_test_case("#take") do
138
+ def setup
139
+ chunks = [
140
+ Arrow::Int16Array.new([1, 0]),
141
+ Arrow::Int16Array.new([2]),
142
+ ]
143
+ @chunked_array = Arrow::ChunkedArray.new(chunks)
144
+ end
145
+
146
+ test("Arrow: boolean") do
147
+ chunks = [
148
+ Arrow::Int16Array.new([0, 1]),
149
+ Arrow::Int16Array.new([2])
150
+ ]
151
+ taken_chunked_array = Arrow::ChunkedArray.new(chunks)
152
+ indices = [1, 0, 2]
153
+ assert_equal(taken_chunked_array,
154
+ @chunked_array.take(indices))
155
+ end
156
+
157
+ test("Arrow::Array") do
158
+ chunks = [
159
+ Arrow::Int16Array.new([0, 1]),
160
+ Arrow::Int16Array.new([2])
161
+ ]
162
+ taken_chunked_array = Arrow::ChunkedArray.new(chunks)
163
+ indices = Arrow::Int16Array.new([1, 0, 2])
164
+ assert_equal(taken_chunked_array,
165
+ @chunked_array.take(indices))
166
+ end
167
+
168
+ test("Arrow::ChunkedArray") do
169
+ taken_chunks = [
170
+ Arrow::Int16Array.new([0, 1]),
171
+ Arrow::Int16Array.new([2])
172
+ ]
173
+ taken_chunked_array = Arrow::ChunkedArray.new(taken_chunks)
174
+ indices_chunks = [
175
+ Arrow::Int16Array.new([1, 0]),
176
+ Arrow::Int16Array.new([2])
177
+ ]
178
+ indices = Arrow::ChunkedArray.new(indices_chunks)
179
+ assert_equal(taken_chunked_array,
180
+ @chunked_array.take(indices))
181
+ end
182
+ end
87
183
  end
@@ -117,8 +117,83 @@ class CSVLoaderTest < Test::Unit::TestCase
117
117
  end
118
118
 
119
119
  sub_test_case("CSVReader") do
120
- def load_csv(data, options)
121
- Arrow::CSVLoader.load(data, options)
120
+ def load_csv(data, **options)
121
+ Arrow::CSVLoader.load(data, **options)
122
+ end
123
+
124
+ sub_test_case(":headers") do
125
+ test("true") do
126
+ values = Arrow::StringArray.new(["a", "b", "c"])
127
+ assert_equal(Arrow::Table.new(value: values),
128
+ load_csv(<<-CSV, headers: true))
129
+ value
130
+ a
131
+ b
132
+ c
133
+ CSV
134
+ end
135
+
136
+ test(":first_line") do
137
+ values = Arrow::StringArray.new(["a", "b", "c"])
138
+ assert_equal(Arrow::Table.new(value: values),
139
+ load_csv(<<-CSV, headers: :first_line))
140
+ value
141
+ a
142
+ b
143
+ c
144
+ CSV
145
+ end
146
+
147
+ test("truthy") do
148
+ values = Arrow::StringArray.new(["a", "b", "c"])
149
+ assert_equal(Arrow::Table.new(value: values),
150
+ load_csv(<<-CSV, headers: 0))
151
+ value
152
+ a
153
+ b
154
+ c
155
+ CSV
156
+ end
157
+
158
+ test("Array of column names") do
159
+ values = Arrow::StringArray.new(["a", "b", "c"])
160
+ assert_equal(Arrow::Table.new(column: values),
161
+ load_csv(<<-CSV, headers: ["column"]))
162
+ a
163
+ b
164
+ c
165
+ CSV
166
+ end
167
+
168
+ test("false") do
169
+ values = Arrow::StringArray.new(["a", "b", "c"])
170
+ assert_equal(Arrow::Table.new(f0: values),
171
+ load_csv(<<-CSV, headers: false))
172
+ a
173
+ b
174
+ c
175
+ CSV
176
+ end
177
+
178
+ test("nil") do
179
+ values = Arrow::StringArray.new(["a", "b", "c"])
180
+ assert_equal(Arrow::Table.new(f0: values),
181
+ load_csv(<<-CSV, headers: nil))
182
+ a
183
+ b
184
+ c
185
+ CSV
186
+ end
187
+
188
+ test("string") do
189
+ values = Arrow::StringArray.new(["a", "b", "c"])
190
+ assert_equal(Arrow::Table.new(column: values),
191
+ load_csv(<<-CSV, headers: "column"))
192
+ a
193
+ b
194
+ c
195
+ CSV
196
+ end
122
197
  end
123
198
 
124
199
  test(":column_types") do
@@ -48,6 +48,17 @@ class DataTypeTest < Test::Unit::TestCase
48
48
  assert_equal(Arrow::FixedSizeBinaryDataType.new(10),
49
49
  Arrow::DataType.resolve([:fixed_size_binary, 10]))
50
50
  end
51
+
52
+ test("abstract") do
53
+ message =
54
+ "abstract type: <:floating_point>: " +
55
+ "use one of not abstract type: [" +
56
+ "Arrow::DoubleDataType, " +
57
+ "Arrow::FloatDataType]"
58
+ assert_raise(ArgumentError.new(message)) do
59
+ Arrow::DataType.resolve(:floating_point)
60
+ end
61
+ end
51
62
  end
52
63
 
53
64
  sub_test_case("instance methods") do
@@ -28,12 +28,12 @@ class DenseUnionDataTypeTest < Test::Unit::TestCase
28
28
  end
29
29
 
30
30
  test("ordered arguments") do
31
- assert_equal("union[dense]<visible: bool=2, count: int32=9>",
31
+ assert_equal("dense_union<visible: bool=2, count: int32=9>",
32
32
  Arrow::DenseUnionDataType.new(@fields, [2, 9]).to_s)
33
33
  end
34
34
 
35
35
  test("description") do
36
- assert_equal("union[dense]<visible: bool=2, count: int32=9>",
36
+ assert_equal("dense_union<visible: bool=2, count: int32=9>",
37
37
  Arrow::DenseUnionDataType.new(fields: @fields,
38
38
  type_codes: [2, 9]).to_s)
39
39
  end
@@ -0,0 +1,41 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class DictionaryArrayTest < Test::Unit::TestCase
19
+ sub_test_case("instance methods") do
20
+ def setup
21
+ @values = ["a", "b", "c", "b", "a"]
22
+ @string_array = Arrow::StringArray.new(@values)
23
+ @array = @string_array.dictionary_encode
24
+ end
25
+
26
+ test("#[]") do
27
+ assert_equal(@values, @array.to_a)
28
+ end
29
+
30
+ test("#get_value") do
31
+ assert_equal([
32
+ @values[0],
33
+ @values[3],
34
+ ],
35
+ [
36
+ @array.get_value(0),
37
+ @array.get_value(3),
38
+ ])
39
+ end
40
+ end
41
+ end