red_amber 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +111 -48
- data/CHANGELOG.md +90 -1
- data/Gemfile +1 -0
- data/README.md +42 -25
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +60 -0
- data/doc/DataFrame.md +3 -0
- data/doc/Vector.md +88 -0
- data/lib/red_amber/data_frame.rb +161 -46
- data/lib/red_amber/data_frame_combinable.rb +304 -101
- data/lib/red_amber/data_frame_displayable.rb +4 -4
- data/lib/red_amber/data_frame_indexable.rb +2 -2
- data/lib/red_amber/data_frame_loadsave.rb +4 -1
- data/lib/red_amber/data_frame_reshaping.rb +35 -10
- data/lib/red_amber/data_frame_selectable.rb +221 -116
- data/lib/red_amber/data_frame_variable_operation.rb +146 -82
- data/lib/red_amber/group.rb +16 -7
- data/lib/red_amber/helper.rb +53 -31
- data/lib/red_amber/refinements.rb +199 -0
- data/lib/red_amber/vector.rb +55 -52
- data/lib/red_amber/vector_functions.rb +23 -75
- data/lib/red_amber/vector_selectable.rb +116 -69
- data/lib/red_amber/vector_updatable.rb +136 -7
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +3 -2
- metadata +11 -8
data/lib/red_amber/vector.rb
CHANGED
@@ -10,39 +10,37 @@ module RedAmber
|
|
10
10
|
include VectorSelectable
|
11
11
|
include Helper
|
12
12
|
|
13
|
+
using RefineArrayLike
|
14
|
+
|
15
|
+
# Quicker constructor of Vector.
|
16
|
+
#
|
17
|
+
def self.create(arrow_array)
|
18
|
+
instance = allocate
|
19
|
+
instance.instance_variable_set(:@data, arrow_array)
|
20
|
+
instance
|
21
|
+
end
|
22
|
+
|
23
|
+
# Create a Vector.
|
24
|
+
#
|
25
|
+
# @note default is headless Vector and '@key == nil'
|
13
26
|
def initialize(*array)
|
14
|
-
@
|
15
|
-
|
16
|
-
Vector
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
in [arrow_array_like] if arrow_array_like.respond_to?(:to_arrow_array)
|
28
|
-
arrow_array_like.to_arrow_array
|
29
|
-
in [Range => r]
|
30
|
-
Arrow::Array.new(Array(r))
|
31
|
-
else
|
32
|
-
begin
|
33
|
-
Arrow::Array.new(Array(array))
|
34
|
-
rescue Error
|
35
|
-
raise VectorArgumentError, "Invalid argument: #{array}"
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
27
|
+
@data =
|
28
|
+
case array
|
29
|
+
in [Vector => v]
|
30
|
+
v.data
|
31
|
+
in [Range => r]
|
32
|
+
Arrow::Array.new(Array(r))
|
33
|
+
in [Arrow::Array | Arrow::ChunkedArray]
|
34
|
+
array[0]
|
35
|
+
in [arrow_array_like] if arrow_array_like.respond_to?(:to_arrow_array)
|
36
|
+
arrow_array_like.to_arrow_array
|
37
|
+
else
|
38
|
+
Arrow::Array.new(array.flatten)
|
39
|
+
end
|
39
40
|
end
|
40
41
|
|
41
42
|
attr_reader :data
|
42
|
-
|
43
|
-
def to_arrow_array
|
44
|
-
@data
|
45
|
-
end
|
43
|
+
alias_method :to_arrow_array, :data
|
46
44
|
|
47
45
|
attr_accessor :key
|
48
46
|
|
@@ -52,45 +50,46 @@ module RedAmber
|
|
52
50
|
|
53
51
|
def inspect(limit: 80)
|
54
52
|
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table').casecmp('MINIMUM').zero?
|
55
|
-
# Better performance than `.upcase == 'MINIMUM'
|
53
|
+
# Better performance than `.upcase == 'MINIMUM'`
|
56
54
|
"#{self.class}(:#{type}, size=#{size})"
|
57
55
|
else
|
58
56
|
sio = StringIO.new << '['
|
59
|
-
|
60
|
-
next_str = "#{
|
61
|
-
if (
|
62
|
-
|
57
|
+
each.with_index do |e, i|
|
58
|
+
next_str = "#{sio.size > 1 ? ', ' : ''}#{e.inspect}"
|
59
|
+
if (sio.size + next_str.size) < limit
|
60
|
+
sio << next_str
|
63
61
|
else
|
64
|
-
|
62
|
+
sio << ', ... ' if i < size
|
65
63
|
break
|
66
64
|
end
|
67
65
|
end
|
68
66
|
sio << ']'
|
69
67
|
|
70
|
-
format "#<#{self.class}(:#{type}, size=#{size}):0x%016x>\n%s\n",
|
68
|
+
format "#<#{self.class}(:#{type}, size=#{size}):0x%016x>\n%s\n",
|
69
|
+
object_id, sio.string
|
71
70
|
end
|
72
71
|
end
|
73
72
|
|
74
|
-
def
|
73
|
+
def to_ary
|
75
74
|
@data.values
|
76
75
|
end
|
77
|
-
|
78
|
-
alias_method :
|
76
|
+
|
77
|
+
alias_method :to_a, :to_ary
|
78
|
+
alias_method :values, :to_ary
|
79
|
+
alias_method :entries, :to_ary
|
79
80
|
|
80
81
|
def indices
|
81
82
|
(0...size).to_a
|
82
83
|
end
|
84
|
+
|
83
85
|
alias_method :indexes, :indices
|
84
86
|
alias_method :indeces, :indices
|
85
87
|
|
86
|
-
def to_ary
|
87
|
-
values
|
88
|
-
end
|
89
|
-
|
90
88
|
def size
|
91
89
|
# only defined :length in Arrow?
|
92
90
|
@data.length
|
93
91
|
end
|
92
|
+
|
94
93
|
alias_method :length, :size
|
95
94
|
alias_method :n_rows, :size
|
96
95
|
alias_method :nrow, :size
|
@@ -100,39 +99,43 @@ module RedAmber
|
|
100
99
|
end
|
101
100
|
|
102
101
|
def type
|
103
|
-
@data.value_type.nick.to_sym
|
102
|
+
list? ? :list : @data.value_type.nick.to_sym
|
104
103
|
end
|
105
104
|
|
106
105
|
def boolean?
|
107
|
-
|
106
|
+
@data.boolean?
|
108
107
|
end
|
109
108
|
|
110
109
|
def numeric?
|
111
|
-
|
110
|
+
@data.numeric?
|
112
111
|
end
|
113
112
|
|
114
113
|
def float?
|
115
|
-
|
114
|
+
@data.float?
|
116
115
|
end
|
117
116
|
|
118
117
|
def integer?
|
119
|
-
|
118
|
+
@data.integer?
|
120
119
|
end
|
121
120
|
|
122
121
|
def string?
|
123
|
-
|
122
|
+
@data.string?
|
124
123
|
end
|
125
124
|
|
126
125
|
def dictionary?
|
127
|
-
|
126
|
+
@data.dictionary?
|
128
127
|
end
|
129
128
|
|
130
129
|
def temporal?
|
131
|
-
|
130
|
+
@data.temporal?
|
131
|
+
end
|
132
|
+
|
133
|
+
def list?
|
134
|
+
@data.list?
|
132
135
|
end
|
133
136
|
|
134
137
|
def type_class
|
135
|
-
@data.
|
138
|
+
@data.type_class
|
136
139
|
end
|
137
140
|
|
138
141
|
def each
|
@@ -12,7 +12,8 @@ module RedAmber
|
|
12
12
|
module VectorFunctions
|
13
13
|
# [Unary aggregations]: vector.func => scalar
|
14
14
|
unary_aggregations =
|
15
|
-
%i[all any approximate_median count count_distinct max mean min min_max
|
15
|
+
%i[all any approximate_median count count_distinct max mean min min_max
|
16
|
+
product stddev sum variance]
|
16
17
|
unary_aggregations.each do |function|
|
17
18
|
define_method(function) do |**options|
|
18
19
|
datum = exec_func_unary(function, options)
|
@@ -54,7 +55,10 @@ module RedAmber
|
|
54
55
|
# @param min_count [Integer] min count.
|
55
56
|
# @return [Float] quantile.
|
56
57
|
def quantile(prob = 0.5, interpolation: :linear, skip_nils: true, min_count: 0)
|
57
|
-
|
58
|
+
unless (0..1).cover? prob
|
59
|
+
raise VectorArgumentError,
|
60
|
+
"Invalid: probability #{prob} must be between 0 and 1"
|
61
|
+
end
|
58
62
|
|
59
63
|
datum = find(:quantile).execute([data],
|
60
64
|
q: prob,
|
@@ -66,7 +70,8 @@ module RedAmber
|
|
66
70
|
|
67
71
|
# Return quantiles in a DataFrame
|
68
72
|
#
|
69
|
-
def quantiles(probs = [1.0, 0.75, 0.5, 0.25, 0.0],
|
73
|
+
def quantiles(probs = [1.0, 0.75, 0.5, 0.25, 0.0],
|
74
|
+
interpolation: :linear, skip_nils: true, min_count: 0)
|
70
75
|
if probs.empty? || !probs.all? { |q| (0..1).cover?(q) }
|
71
76
|
raise VectorArgumentError, "Invarid probavilities #{probs}"
|
72
77
|
end
|
@@ -74,20 +79,23 @@ module RedAmber
|
|
74
79
|
DataFrame.new(
|
75
80
|
probs: probs,
|
76
81
|
quantiles: probs.map do |q|
|
77
|
-
quantile(q,
|
82
|
+
quantile(q,
|
83
|
+
interpolation: interpolation, skip_nils: skip_nils,
|
84
|
+
min_count: min_count)
|
78
85
|
end
|
79
86
|
)
|
80
87
|
end
|
81
88
|
|
82
89
|
# [Unary element-wise]: vector.func => vector
|
83
90
|
unary_element_wise =
|
84
|
-
%i[abs acos asin array_sort_indices atan bit_wise_not ceil cos
|
85
|
-
fill_null_forward floor
|
91
|
+
%i[abs acos asin array_sort_indices atan bit_wise_not ceil cos
|
92
|
+
fill_null_backward fill_null_forward floor
|
93
|
+
is_finite is_inf is_nan is_null is_valid ln log10 log1p log2
|
86
94
|
round round_to_multiple sign sin tan trunc unique]
|
87
95
|
unary_element_wise.each do |function|
|
88
96
|
define_method(function) do |**options|
|
89
97
|
datum = exec_func_unary(function, options)
|
90
|
-
Vector.
|
98
|
+
Vector.create(datum.value)
|
91
99
|
end
|
92
100
|
end
|
93
101
|
alias_method :is_nil, :is_null
|
@@ -113,12 +121,12 @@ module RedAmber
|
|
113
121
|
unary_element_wise_op.each do |function, operator|
|
114
122
|
define_method(function) do |**options|
|
115
123
|
datum = exec_func_unary(function, options)
|
116
|
-
Vector.
|
124
|
+
Vector.create(datum.value)
|
117
125
|
end
|
118
126
|
|
119
127
|
define_method(operator) do |**options|
|
120
128
|
datum = exec_func_unary(function, options)
|
121
|
-
Vector.
|
129
|
+
Vector.create(datum.value)
|
122
130
|
end
|
123
131
|
end
|
124
132
|
alias_method :not, :invert
|
@@ -129,7 +137,7 @@ module RedAmber
|
|
129
137
|
binary_element_wise.each do |function|
|
130
138
|
define_method(function) do |other, **options|
|
131
139
|
datum = exec_func_binary(function, other, options)
|
132
|
-
Vector.
|
140
|
+
Vector.create(datum.value)
|
133
141
|
end
|
134
142
|
end
|
135
143
|
|
@@ -145,7 +153,7 @@ module RedAmber
|
|
145
153
|
logical_binary_element_wise.each do |method, function|
|
146
154
|
define_method(method) do |other, **options|
|
147
155
|
datum = exec_func_binary(function, other, options)
|
148
|
-
Vector.
|
156
|
+
Vector.create(datum.value)
|
149
157
|
end
|
150
158
|
end
|
151
159
|
|
@@ -171,12 +179,12 @@ module RedAmber
|
|
171
179
|
binary_element_wise_op.each do |function, operator|
|
172
180
|
define_method(function) do |other, **options|
|
173
181
|
datum = exec_func_binary(function, other, options)
|
174
|
-
Vector.
|
182
|
+
Vector.create(datum.value)
|
175
183
|
end
|
176
184
|
|
177
185
|
define_method(operator) do |other, **options|
|
178
186
|
datum = exec_func_binary(function, other, options)
|
179
|
-
Vector.
|
187
|
+
Vector.create(datum.value)
|
180
188
|
end
|
181
189
|
end
|
182
190
|
alias_method :eq, :equal
|
@@ -190,67 +198,6 @@ module RedAmber
|
|
190
198
|
[Vector.new(Array(other) * size), self]
|
191
199
|
end
|
192
200
|
|
193
|
-
# < Not implimented yet > ---
|
194
|
-
|
195
|
-
# option(s) required
|
196
|
-
# - index
|
197
|
-
|
198
|
-
# Returns other than value
|
199
|
-
# - mode
|
200
|
-
# - tdigest
|
201
|
-
|
202
|
-
# Functions with numerical range check (unary)
|
203
|
-
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
204
|
-
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
205
|
-
|
206
|
-
# Functions with numerical range check (binary)
|
207
|
-
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
208
|
-
# shift_left_checked shift_right_checked
|
209
|
-
|
210
|
-
# (array functions)
|
211
|
-
# dictionary_encode,
|
212
|
-
# partition_nth_indices,
|
213
|
-
# quarter, quarters_between,
|
214
|
-
|
215
|
-
# (strings)
|
216
|
-
# ascii_capitalize, ascii_center, ascii_is_alnum, ascii_is_alpha, ascii_is_decimal,
|
217
|
-
# ascii_is_lower, ascii_is_printable, ascii_is_space, ascii_is_title, ascii_is_upper,
|
218
|
-
# ascii_lower, ascii_lpad, ascii_ltrim, ascii_ltrim_whitespace, ascii_reverse,
|
219
|
-
# ascii_rpad, ascii_rtrim, ascii_rtrim_whitespace, ascii_split_whitespace,
|
220
|
-
# ascii_swapcase, ascii_title, ascii_trim, ascii_trim_whitespace, ascii_upper,
|
221
|
-
# binary_join, binary_join_element_wise, binary_length, binary_repeat,
|
222
|
-
# binary_replace_slice, binary_reverse, count_substring, count_substring_regex,
|
223
|
-
# ends_with, extract_regex, find_substring, find_substring_regex,
|
224
|
-
# match_like, match_substring, match_substring_regex, replace_substring,
|
225
|
-
# replace_substring_regex, split_pattern, split_pattern_regex, starts_with,
|
226
|
-
# string_is_ascii, utf8_capitalize, utf8_center, utf8_is_alnum, utf8_is_alpha,
|
227
|
-
# utf8_is_decimal, utf8_is_digit, utf8_is_lower, utf8_is_numeric, utf8_is_printable,
|
228
|
-
# utf8_is_space, utf8_is_title, utf8_is_upper, utf8_length, utf8_lower, utf8_lpad,
|
229
|
-
# utf8_ltrim, utf8_ltrim_whitespace, utf8_normalize, utf8_replace_slice, utf8_reverse,
|
230
|
-
# utf8_rpad, utf8_rtrim, utf8_rtrim_whitespace, utf8_slice_codeunits, utf8_split_whitespace,
|
231
|
-
# utf8_swapcase, utf8_title, utf8_trim, utf8_trim_whitespace, utf8_upper
|
232
|
-
|
233
|
-
# (temporal)
|
234
|
-
# assume_timezone, ceil_temporal, day, day_of_week, day_of_year, day_time_interval_between,
|
235
|
-
# days_between, floor_temporal, hour, hours_between, iso_calendar, iso_week, iso_year,
|
236
|
-
# microsecond, microseconds_between, millisecond, milliseconds_between, minute,
|
237
|
-
# minutes_between, month, month_day_nano_interval_between, month_interval_between,
|
238
|
-
# nanosecond, nanoseconds_between, round_temporal, second, seconds_between, strftime,
|
239
|
-
# strptime, subsecond, us_week, week, weeks_between, year, year_month_day, years_between
|
240
|
-
|
241
|
-
# (onditional)
|
242
|
-
# case_when, cast,
|
243
|
-
|
244
|
-
# (indices)
|
245
|
-
# choose, index_in, index_in_meta_binary, indices_nonzero
|
246
|
-
|
247
|
-
# (others)
|
248
|
-
# coalesce,
|
249
|
-
# is_in_meta_binary,
|
250
|
-
# list_element, list_flatten, list_parent_indices, list_value_length, make_struct,
|
251
|
-
# max_element_wise, min_element_wise, random, select_k_unstable,
|
252
|
-
# struct_field,
|
253
|
-
|
254
201
|
private # =======
|
255
202
|
|
256
203
|
def exec_func_unary(function, options)
|
@@ -263,7 +210,8 @@ module RedAmber
|
|
263
210
|
case other
|
264
211
|
when Vector
|
265
212
|
find(function).execute([data, other.data], options)
|
266
|
-
when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar,
|
213
|
+
when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar,
|
214
|
+
Array, Numeric, String, TrueClass, FalseClass
|
267
215
|
find(function).execute([data, other], options)
|
268
216
|
end
|
269
217
|
end
|
@@ -4,91 +4,122 @@
|
|
4
4
|
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
5
5
|
|
6
6
|
module RedAmber
|
7
|
-
# mix-
|
8
|
-
#
|
7
|
+
# mix-in for class Vector
|
8
|
+
# Functions to select some data.
|
9
9
|
module VectorSelectable
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
using RefineArray
|
11
|
+
using RefineArrayLike
|
12
|
+
|
13
|
+
# Select elements in the self by indices.
|
14
|
+
#
|
15
|
+
# @param indices [Array<Numeric>, Vector] indices.
|
16
|
+
# @yield [Array<Numeric>, Vector] indices.
|
17
|
+
# @return [Vector] Vector by selected elements.
|
18
|
+
#
|
19
|
+
# TODO: support for the option `boundscheck: true`
|
20
|
+
def take(*indices, &block)
|
21
|
+
if block
|
22
|
+
unless indices.empty?
|
23
|
+
raise VectorArgumentError, 'Must not specify both arguments and block.'
|
24
|
+
end
|
14
25
|
|
15
|
-
|
16
|
-
|
17
|
-
def take(*indices)
|
18
|
-
indices.flatten!
|
19
|
-
return Vector.new([]) if indices.empty?
|
26
|
+
indices = [yield]
|
27
|
+
end
|
20
28
|
|
21
|
-
|
22
|
-
|
29
|
+
vector =
|
30
|
+
case indices
|
31
|
+
in [Vector => v] if v.numeric?
|
32
|
+
return Vector.create(take_by_vector(v))
|
33
|
+
in []
|
34
|
+
return Vector.new
|
35
|
+
in [(Arrow::Array | Arrow::ChunkedArray) => aa]
|
36
|
+
Vector.create(aa)
|
37
|
+
else
|
38
|
+
Vector.new(indices.flatten)
|
39
|
+
end
|
23
40
|
|
24
|
-
|
41
|
+
unless vector.numeric?
|
42
|
+
raise VectorArgumentError, "argument must be a integers: #{indices}"
|
43
|
+
end
|
44
|
+
|
45
|
+
Vector.create(take_by_vector(vector))
|
25
46
|
end
|
26
47
|
|
27
|
-
#
|
48
|
+
# Select elements in the self by booleans.
|
49
|
+
#
|
50
|
+
# @param booleans [Array<true, false, nil>, Vector] booleans.
|
51
|
+
# @yield [Array<true, false, nil>, Vector] booleans.
|
52
|
+
# @return [Vector] Vector by selected elements.
|
53
|
+
#
|
54
|
+
# TODO: support for the option `null_selection_behavior: :drop`
|
28
55
|
def filter(*booleans, &block)
|
29
56
|
if block
|
30
|
-
|
57
|
+
unless booleans.empty?
|
58
|
+
raise VectorArgumentError, 'Must not specify both arguments and block.'
|
59
|
+
end
|
31
60
|
|
32
61
|
booleans = [yield]
|
33
62
|
end
|
34
63
|
|
35
|
-
booleans
|
36
|
-
|
37
|
-
|
38
|
-
b = booleans[0]
|
39
|
-
boolean_array =
|
40
|
-
case b
|
41
|
-
when Vector
|
42
|
-
raise VectorTypeError, 'Argument is not a boolean.' unless b.boolean?
|
64
|
+
case booleans
|
65
|
+
in [Vector => v]
|
66
|
+
raise VectorTypeError, 'Argument is not a boolean.' unless v.boolean?
|
43
67
|
|
44
|
-
|
45
|
-
|
46
|
-
|
68
|
+
Vector.create(filter_by_array(v.data))
|
69
|
+
in [Arrow::BooleanArray => ba]
|
70
|
+
Vector.create(filter_by_array(ba))
|
71
|
+
in []
|
72
|
+
Vector.new
|
73
|
+
else
|
74
|
+
booleans.flatten!
|
75
|
+
a = Arrow::Array.new(booleans)
|
76
|
+
if a.boolean?
|
77
|
+
Vector.create(filter_by_array(a))
|
78
|
+
elsif booleans.compact.empty? # [nil, nil] becomes string array
|
79
|
+
Vector.new
|
47
80
|
else
|
48
|
-
raise VectorTypeError,
|
49
|
-
|
50
|
-
Arrow::BooleanArray.new(booleans)
|
81
|
+
raise VectorTypeError, "Argument is not a boolean: #{booleans}"
|
51
82
|
end
|
52
|
-
|
53
|
-
filter_by_array(boolean_array) # returns sub Vector
|
83
|
+
end
|
54
84
|
end
|
55
85
|
alias_method :select, :filter
|
56
86
|
alias_method :find_all, :filter
|
57
87
|
|
58
|
-
#
|
59
|
-
#
|
88
|
+
# Select elements in the self by indices or booleans.
|
89
|
+
#
|
90
|
+
# @param args [Array<Numeric, true, false, nil>, Vector] specifier.
|
91
|
+
# @yield [Array<Numeric, true, false, nil>, Vector] specifier.
|
92
|
+
# @return [scalar, Array] returns scalar or array.
|
93
|
+
#
|
60
94
|
def [](*args)
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
unless arg.is_a?(Numeric) || booleans?([arg])
|
79
|
-
raise VectorArgumentError, "Argument must be numeric or boolean: #{args}"
|
95
|
+
array =
|
96
|
+
case args
|
97
|
+
in [Vector => v]
|
98
|
+
return scalar_or_array(take_by_vector(v)) if v.numeric?
|
99
|
+
return scalar_or_array(filter_by_array(v.data)) if v.boolean?
|
100
|
+
|
101
|
+
raise VectorTypeError, "Argument must be numeric or boolean: #{args}"
|
102
|
+
in [Arrow::BooleanArray => ba]
|
103
|
+
return scalar_or_array(filter_by_array(ba))
|
104
|
+
in []
|
105
|
+
return nil
|
106
|
+
in [Arrow::Array => arrow_array]
|
107
|
+
arrow_array
|
108
|
+
in [Range => r]
|
109
|
+
Arrow::Array.new(parse_range(r, size))
|
110
|
+
else
|
111
|
+
Arrow::Array.new(args.flatten)
|
80
112
|
end
|
81
|
-
|
82
|
-
array
|
83
|
-
return filter_by_array(array) if array.is_a?(Arrow::BooleanArray)
|
113
|
+
|
114
|
+
return scalar_or_array(filter_by_array(array)) if array.boolean?
|
84
115
|
|
85
116
|
vector = Vector.new(array)
|
86
|
-
return take_by_vector(vector) if vector.numeric?
|
117
|
+
return scalar_or_array(take_by_vector(vector)) if vector.numeric?
|
87
118
|
|
88
119
|
raise VectorArgumentError, "Invalid argument: #{args}"
|
89
120
|
end
|
90
121
|
|
91
|
-
#
|
122
|
+
# @param values [Array, Arrow::Array, Vector]
|
92
123
|
def is_in(*values)
|
93
124
|
self_data = chunked? ? data.pack : data
|
94
125
|
|
@@ -100,7 +131,7 @@ module RedAmber
|
|
100
131
|
Array(values).flatten
|
101
132
|
end
|
102
133
|
|
103
|
-
Vector.
|
134
|
+
Vector.create(self_data.is_in(array))
|
104
135
|
end
|
105
136
|
|
106
137
|
# Arrow's support required
|
@@ -108,28 +139,44 @@ module RedAmber
|
|
108
139
|
to_a.index(element)
|
109
140
|
end
|
110
141
|
|
142
|
+
def drop_nil
|
143
|
+
datum = find(:drop_null).execute([data])
|
144
|
+
Vector.create(datum.value)
|
145
|
+
end
|
146
|
+
|
111
147
|
private
|
112
148
|
|
113
149
|
# Accepts indices by numeric Vector
|
114
150
|
def take_by_vector(indices)
|
115
|
-
|
116
|
-
raise VectorArgumentError, "Index out of range: #{indices.min}" if indices.min <= -size - 1
|
151
|
+
indices = (indices < 0).if_else(indices + size, indices) if (indices < 0).any?
|
117
152
|
|
118
|
-
|
119
|
-
raise VectorArgumentError, "Index out of range: #{
|
153
|
+
min, max = indices.min_max
|
154
|
+
raise VectorArgumentError, "Index out of range: #{min}" if min < 0
|
155
|
+
raise VectorArgumentError, "Index out of range: #{max}" if max >= size
|
120
156
|
|
121
|
-
index_array =
|
157
|
+
index_array =
|
158
|
+
if indices.float?
|
159
|
+
Arrow::UInt64ArrayBuilder.build(indices.data)
|
160
|
+
else
|
161
|
+
indices.data
|
162
|
+
end
|
122
163
|
|
123
|
-
|
124
|
-
|
164
|
+
# :array_take will fail with ChunkedArray
|
165
|
+
find(:take).execute([data, index_array]).value
|
125
166
|
end
|
126
167
|
|
127
168
|
# Accepts booleans by Arrow::BooleanArray
|
128
169
|
def filter_by_array(boolean_array)
|
129
|
-
|
170
|
+
unless boolean_array.length == size
|
171
|
+
raise VectorArgumentError, 'Booleans must be same size as self.'
|
172
|
+
end
|
173
|
+
|
174
|
+
find(:array_filter).execute([data, boolean_array]).value
|
175
|
+
end
|
130
176
|
|
131
|
-
|
132
|
-
|
177
|
+
def scalar_or_array(arrow_array)
|
178
|
+
a = arrow_array.to_a
|
179
|
+
a.size > 1 ? a : a[0]
|
133
180
|
end
|
134
181
|
end
|
135
182
|
end
|