red_amber 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +111 -48
- data/CHANGELOG.md +90 -1
- data/Gemfile +1 -0
- data/README.md +42 -25
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +60 -0
- data/doc/DataFrame.md +3 -0
- data/doc/Vector.md +88 -0
- data/lib/red_amber/data_frame.rb +161 -46
- data/lib/red_amber/data_frame_combinable.rb +304 -101
- data/lib/red_amber/data_frame_displayable.rb +4 -4
- data/lib/red_amber/data_frame_indexable.rb +2 -2
- data/lib/red_amber/data_frame_loadsave.rb +4 -1
- data/lib/red_amber/data_frame_reshaping.rb +35 -10
- data/lib/red_amber/data_frame_selectable.rb +221 -116
- data/lib/red_amber/data_frame_variable_operation.rb +146 -82
- data/lib/red_amber/group.rb +16 -7
- data/lib/red_amber/helper.rb +53 -31
- data/lib/red_amber/refinements.rb +199 -0
- data/lib/red_amber/vector.rb +55 -52
- data/lib/red_amber/vector_functions.rb +23 -75
- data/lib/red_amber/vector_selectable.rb +116 -69
- data/lib/red_amber/vector_updatable.rb +136 -7
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +3 -2
- metadata +11 -8
data/lib/red_amber/vector.rb
CHANGED
@@ -10,39 +10,37 @@ module RedAmber
|
|
10
10
|
include VectorSelectable
|
11
11
|
include Helper
|
12
12
|
|
13
|
+
using RefineArrayLike
|
14
|
+
|
15
|
+
# Quicker constructor of Vector.
|
16
|
+
#
|
17
|
+
def self.create(arrow_array)
|
18
|
+
instance = allocate
|
19
|
+
instance.instance_variable_set(:@data, arrow_array)
|
20
|
+
instance
|
21
|
+
end
|
22
|
+
|
23
|
+
# Create a Vector.
|
24
|
+
#
|
25
|
+
# @note default is headless Vector and '@key == nil'
|
13
26
|
def initialize(*array)
|
14
|
-
@
|
15
|
-
|
16
|
-
Vector
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
in [arrow_array_like] if arrow_array_like.respond_to?(:to_arrow_array)
|
28
|
-
arrow_array_like.to_arrow_array
|
29
|
-
in [Range => r]
|
30
|
-
Arrow::Array.new(Array(r))
|
31
|
-
else
|
32
|
-
begin
|
33
|
-
Arrow::Array.new(Array(array))
|
34
|
-
rescue Error
|
35
|
-
raise VectorArgumentError, "Invalid argument: #{array}"
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
27
|
+
@data =
|
28
|
+
case array
|
29
|
+
in [Vector => v]
|
30
|
+
v.data
|
31
|
+
in [Range => r]
|
32
|
+
Arrow::Array.new(Array(r))
|
33
|
+
in [Arrow::Array | Arrow::ChunkedArray]
|
34
|
+
array[0]
|
35
|
+
in [arrow_array_like] if arrow_array_like.respond_to?(:to_arrow_array)
|
36
|
+
arrow_array_like.to_arrow_array
|
37
|
+
else
|
38
|
+
Arrow::Array.new(array.flatten)
|
39
|
+
end
|
39
40
|
end
|
40
41
|
|
41
42
|
attr_reader :data
|
42
|
-
|
43
|
-
def to_arrow_array
|
44
|
-
@data
|
45
|
-
end
|
43
|
+
alias_method :to_arrow_array, :data
|
46
44
|
|
47
45
|
attr_accessor :key
|
48
46
|
|
@@ -52,45 +50,46 @@ module RedAmber
|
|
52
50
|
|
53
51
|
def inspect(limit: 80)
|
54
52
|
if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table').casecmp('MINIMUM').zero?
|
55
|
-
# Better performance than `.upcase == 'MINIMUM'
|
53
|
+
# Better performance than `.upcase == 'MINIMUM'`
|
56
54
|
"#{self.class}(:#{type}, size=#{size})"
|
57
55
|
else
|
58
56
|
sio = StringIO.new << '['
|
59
|
-
|
60
|
-
next_str = "#{
|
61
|
-
if (
|
62
|
-
|
57
|
+
each.with_index do |e, i|
|
58
|
+
next_str = "#{sio.size > 1 ? ', ' : ''}#{e.inspect}"
|
59
|
+
if (sio.size + next_str.size) < limit
|
60
|
+
sio << next_str
|
63
61
|
else
|
64
|
-
|
62
|
+
sio << ', ... ' if i < size
|
65
63
|
break
|
66
64
|
end
|
67
65
|
end
|
68
66
|
sio << ']'
|
69
67
|
|
70
|
-
format "#<#{self.class}(:#{type}, size=#{size}):0x%016x>\n%s\n",
|
68
|
+
format "#<#{self.class}(:#{type}, size=#{size}):0x%016x>\n%s\n",
|
69
|
+
object_id, sio.string
|
71
70
|
end
|
72
71
|
end
|
73
72
|
|
74
|
-
def
|
73
|
+
def to_ary
|
75
74
|
@data.values
|
76
75
|
end
|
77
|
-
|
78
|
-
alias_method :
|
76
|
+
|
77
|
+
alias_method :to_a, :to_ary
|
78
|
+
alias_method :values, :to_ary
|
79
|
+
alias_method :entries, :to_ary
|
79
80
|
|
80
81
|
def indices
|
81
82
|
(0...size).to_a
|
82
83
|
end
|
84
|
+
|
83
85
|
alias_method :indexes, :indices
|
84
86
|
alias_method :indeces, :indices
|
85
87
|
|
86
|
-
def to_ary
|
87
|
-
values
|
88
|
-
end
|
89
|
-
|
90
88
|
def size
|
91
89
|
# only defined :length in Arrow?
|
92
90
|
@data.length
|
93
91
|
end
|
92
|
+
|
94
93
|
alias_method :length, :size
|
95
94
|
alias_method :n_rows, :size
|
96
95
|
alias_method :nrow, :size
|
@@ -100,39 +99,43 @@ module RedAmber
|
|
100
99
|
end
|
101
100
|
|
102
101
|
def type
|
103
|
-
@data.value_type.nick.to_sym
|
102
|
+
list? ? :list : @data.value_type.nick.to_sym
|
104
103
|
end
|
105
104
|
|
106
105
|
def boolean?
|
107
|
-
|
106
|
+
@data.boolean?
|
108
107
|
end
|
109
108
|
|
110
109
|
def numeric?
|
111
|
-
|
110
|
+
@data.numeric?
|
112
111
|
end
|
113
112
|
|
114
113
|
def float?
|
115
|
-
|
114
|
+
@data.float?
|
116
115
|
end
|
117
116
|
|
118
117
|
def integer?
|
119
|
-
|
118
|
+
@data.integer?
|
120
119
|
end
|
121
120
|
|
122
121
|
def string?
|
123
|
-
|
122
|
+
@data.string?
|
124
123
|
end
|
125
124
|
|
126
125
|
def dictionary?
|
127
|
-
|
126
|
+
@data.dictionary?
|
128
127
|
end
|
129
128
|
|
130
129
|
def temporal?
|
131
|
-
|
130
|
+
@data.temporal?
|
131
|
+
end
|
132
|
+
|
133
|
+
def list?
|
134
|
+
@data.list?
|
132
135
|
end
|
133
136
|
|
134
137
|
def type_class
|
135
|
-
@data.
|
138
|
+
@data.type_class
|
136
139
|
end
|
137
140
|
|
138
141
|
def each
|
@@ -12,7 +12,8 @@ module RedAmber
|
|
12
12
|
module VectorFunctions
|
13
13
|
# [Unary aggregations]: vector.func => scalar
|
14
14
|
unary_aggregations =
|
15
|
-
%i[all any approximate_median count count_distinct max mean min min_max
|
15
|
+
%i[all any approximate_median count count_distinct max mean min min_max
|
16
|
+
product stddev sum variance]
|
16
17
|
unary_aggregations.each do |function|
|
17
18
|
define_method(function) do |**options|
|
18
19
|
datum = exec_func_unary(function, options)
|
@@ -54,7 +55,10 @@ module RedAmber
|
|
54
55
|
# @param min_count [Integer] min count.
|
55
56
|
# @return [Float] quantile.
|
56
57
|
def quantile(prob = 0.5, interpolation: :linear, skip_nils: true, min_count: 0)
|
57
|
-
|
58
|
+
unless (0..1).cover? prob
|
59
|
+
raise VectorArgumentError,
|
60
|
+
"Invalid: probability #{prob} must be between 0 and 1"
|
61
|
+
end
|
58
62
|
|
59
63
|
datum = find(:quantile).execute([data],
|
60
64
|
q: prob,
|
@@ -66,7 +70,8 @@ module RedAmber
|
|
66
70
|
|
67
71
|
# Return quantiles in a DataFrame
|
68
72
|
#
|
69
|
-
def quantiles(probs = [1.0, 0.75, 0.5, 0.25, 0.0],
|
73
|
+
def quantiles(probs = [1.0, 0.75, 0.5, 0.25, 0.0],
|
74
|
+
interpolation: :linear, skip_nils: true, min_count: 0)
|
70
75
|
if probs.empty? || !probs.all? { |q| (0..1).cover?(q) }
|
71
76
|
raise VectorArgumentError, "Invarid probavilities #{probs}"
|
72
77
|
end
|
@@ -74,20 +79,23 @@ module RedAmber
|
|
74
79
|
DataFrame.new(
|
75
80
|
probs: probs,
|
76
81
|
quantiles: probs.map do |q|
|
77
|
-
quantile(q,
|
82
|
+
quantile(q,
|
83
|
+
interpolation: interpolation, skip_nils: skip_nils,
|
84
|
+
min_count: min_count)
|
78
85
|
end
|
79
86
|
)
|
80
87
|
end
|
81
88
|
|
82
89
|
# [Unary element-wise]: vector.func => vector
|
83
90
|
unary_element_wise =
|
84
|
-
%i[abs acos asin array_sort_indices atan bit_wise_not ceil cos
|
85
|
-
fill_null_forward floor
|
91
|
+
%i[abs acos asin array_sort_indices atan bit_wise_not ceil cos
|
92
|
+
fill_null_backward fill_null_forward floor
|
93
|
+
is_finite is_inf is_nan is_null is_valid ln log10 log1p log2
|
86
94
|
round round_to_multiple sign sin tan trunc unique]
|
87
95
|
unary_element_wise.each do |function|
|
88
96
|
define_method(function) do |**options|
|
89
97
|
datum = exec_func_unary(function, options)
|
90
|
-
Vector.
|
98
|
+
Vector.create(datum.value)
|
91
99
|
end
|
92
100
|
end
|
93
101
|
alias_method :is_nil, :is_null
|
@@ -113,12 +121,12 @@ module RedAmber
|
|
113
121
|
unary_element_wise_op.each do |function, operator|
|
114
122
|
define_method(function) do |**options|
|
115
123
|
datum = exec_func_unary(function, options)
|
116
|
-
Vector.
|
124
|
+
Vector.create(datum.value)
|
117
125
|
end
|
118
126
|
|
119
127
|
define_method(operator) do |**options|
|
120
128
|
datum = exec_func_unary(function, options)
|
121
|
-
Vector.
|
129
|
+
Vector.create(datum.value)
|
122
130
|
end
|
123
131
|
end
|
124
132
|
alias_method :not, :invert
|
@@ -129,7 +137,7 @@ module RedAmber
|
|
129
137
|
binary_element_wise.each do |function|
|
130
138
|
define_method(function) do |other, **options|
|
131
139
|
datum = exec_func_binary(function, other, options)
|
132
|
-
Vector.
|
140
|
+
Vector.create(datum.value)
|
133
141
|
end
|
134
142
|
end
|
135
143
|
|
@@ -145,7 +153,7 @@ module RedAmber
|
|
145
153
|
logical_binary_element_wise.each do |method, function|
|
146
154
|
define_method(method) do |other, **options|
|
147
155
|
datum = exec_func_binary(function, other, options)
|
148
|
-
Vector.
|
156
|
+
Vector.create(datum.value)
|
149
157
|
end
|
150
158
|
end
|
151
159
|
|
@@ -171,12 +179,12 @@ module RedAmber
|
|
171
179
|
binary_element_wise_op.each do |function, operator|
|
172
180
|
define_method(function) do |other, **options|
|
173
181
|
datum = exec_func_binary(function, other, options)
|
174
|
-
Vector.
|
182
|
+
Vector.create(datum.value)
|
175
183
|
end
|
176
184
|
|
177
185
|
define_method(operator) do |other, **options|
|
178
186
|
datum = exec_func_binary(function, other, options)
|
179
|
-
Vector.
|
187
|
+
Vector.create(datum.value)
|
180
188
|
end
|
181
189
|
end
|
182
190
|
alias_method :eq, :equal
|
@@ -190,67 +198,6 @@ module RedAmber
|
|
190
198
|
[Vector.new(Array(other) * size), self]
|
191
199
|
end
|
192
200
|
|
193
|
-
# < Not implimented yet > ---
|
194
|
-
|
195
|
-
# option(s) required
|
196
|
-
# - index
|
197
|
-
|
198
|
-
# Returns other than value
|
199
|
-
# - mode
|
200
|
-
# - tdigest
|
201
|
-
|
202
|
-
# Functions with numerical range check (unary)
|
203
|
-
# - abs_checked acos_checked asin_checked cos_checked ln_checked
|
204
|
-
# log10_checked log1p_checked log2_checked sin_checked tan_checked
|
205
|
-
|
206
|
-
# Functions with numerical range check (binary)
|
207
|
-
# - add_checked divide_checked logb_checked multiply_checked power_checked subtract_checked
|
208
|
-
# shift_left_checked shift_right_checked
|
209
|
-
|
210
|
-
# (array functions)
|
211
|
-
# dictionary_encode,
|
212
|
-
# partition_nth_indices,
|
213
|
-
# quarter, quarters_between,
|
214
|
-
|
215
|
-
# (strings)
|
216
|
-
# ascii_capitalize, ascii_center, ascii_is_alnum, ascii_is_alpha, ascii_is_decimal,
|
217
|
-
# ascii_is_lower, ascii_is_printable, ascii_is_space, ascii_is_title, ascii_is_upper,
|
218
|
-
# ascii_lower, ascii_lpad, ascii_ltrim, ascii_ltrim_whitespace, ascii_reverse,
|
219
|
-
# ascii_rpad, ascii_rtrim, ascii_rtrim_whitespace, ascii_split_whitespace,
|
220
|
-
# ascii_swapcase, ascii_title, ascii_trim, ascii_trim_whitespace, ascii_upper,
|
221
|
-
# binary_join, binary_join_element_wise, binary_length, binary_repeat,
|
222
|
-
# binary_replace_slice, binary_reverse, count_substring, count_substring_regex,
|
223
|
-
# ends_with, extract_regex, find_substring, find_substring_regex,
|
224
|
-
# match_like, match_substring, match_substring_regex, replace_substring,
|
225
|
-
# replace_substring_regex, split_pattern, split_pattern_regex, starts_with,
|
226
|
-
# string_is_ascii, utf8_capitalize, utf8_center, utf8_is_alnum, utf8_is_alpha,
|
227
|
-
# utf8_is_decimal, utf8_is_digit, utf8_is_lower, utf8_is_numeric, utf8_is_printable,
|
228
|
-
# utf8_is_space, utf8_is_title, utf8_is_upper, utf8_length, utf8_lower, utf8_lpad,
|
229
|
-
# utf8_ltrim, utf8_ltrim_whitespace, utf8_normalize, utf8_replace_slice, utf8_reverse,
|
230
|
-
# utf8_rpad, utf8_rtrim, utf8_rtrim_whitespace, utf8_slice_codeunits, utf8_split_whitespace,
|
231
|
-
# utf8_swapcase, utf8_title, utf8_trim, utf8_trim_whitespace, utf8_upper
|
232
|
-
|
233
|
-
# (temporal)
|
234
|
-
# assume_timezone, ceil_temporal, day, day_of_week, day_of_year, day_time_interval_between,
|
235
|
-
# days_between, floor_temporal, hour, hours_between, iso_calendar, iso_week, iso_year,
|
236
|
-
# microsecond, microseconds_between, millisecond, milliseconds_between, minute,
|
237
|
-
# minutes_between, month, month_day_nano_interval_between, month_interval_between,
|
238
|
-
# nanosecond, nanoseconds_between, round_temporal, second, seconds_between, strftime,
|
239
|
-
# strptime, subsecond, us_week, week, weeks_between, year, year_month_day, years_between
|
240
|
-
|
241
|
-
# (onditional)
|
242
|
-
# case_when, cast,
|
243
|
-
|
244
|
-
# (indices)
|
245
|
-
# choose, index_in, index_in_meta_binary, indices_nonzero
|
246
|
-
|
247
|
-
# (others)
|
248
|
-
# coalesce,
|
249
|
-
# is_in_meta_binary,
|
250
|
-
# list_element, list_flatten, list_parent_indices, list_value_length, make_struct,
|
251
|
-
# max_element_wise, min_element_wise, random, select_k_unstable,
|
252
|
-
# struct_field,
|
253
|
-
|
254
201
|
private # =======
|
255
202
|
|
256
203
|
def exec_func_unary(function, options)
|
@@ -263,7 +210,8 @@ module RedAmber
|
|
263
210
|
case other
|
264
211
|
when Vector
|
265
212
|
find(function).execute([data, other.data], options)
|
266
|
-
when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar,
|
213
|
+
when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar,
|
214
|
+
Array, Numeric, String, TrueClass, FalseClass
|
267
215
|
find(function).execute([data, other], options)
|
268
216
|
end
|
269
217
|
end
|
@@ -4,91 +4,122 @@
|
|
4
4
|
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
5
5
|
|
6
6
|
module RedAmber
|
7
|
-
# mix-
|
8
|
-
#
|
7
|
+
# mix-in for class Vector
|
8
|
+
# Functions to select some data.
|
9
9
|
module VectorSelectable
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
using RefineArray
|
11
|
+
using RefineArrayLike
|
12
|
+
|
13
|
+
# Select elements in the self by indices.
|
14
|
+
#
|
15
|
+
# @param indices [Array<Numeric>, Vector] indices.
|
16
|
+
# @yield [Array<Numeric>, Vector] indices.
|
17
|
+
# @return [Vector] Vector by selected elements.
|
18
|
+
#
|
19
|
+
# TODO: support for the option `boundscheck: true`
|
20
|
+
def take(*indices, &block)
|
21
|
+
if block
|
22
|
+
unless indices.empty?
|
23
|
+
raise VectorArgumentError, 'Must not specify both arguments and block.'
|
24
|
+
end
|
14
25
|
|
15
|
-
|
16
|
-
|
17
|
-
def take(*indices)
|
18
|
-
indices.flatten!
|
19
|
-
return Vector.new([]) if indices.empty?
|
26
|
+
indices = [yield]
|
27
|
+
end
|
20
28
|
|
21
|
-
|
22
|
-
|
29
|
+
vector =
|
30
|
+
case indices
|
31
|
+
in [Vector => v] if v.numeric?
|
32
|
+
return Vector.create(take_by_vector(v))
|
33
|
+
in []
|
34
|
+
return Vector.new
|
35
|
+
in [(Arrow::Array | Arrow::ChunkedArray) => aa]
|
36
|
+
Vector.create(aa)
|
37
|
+
else
|
38
|
+
Vector.new(indices.flatten)
|
39
|
+
end
|
23
40
|
|
24
|
-
|
41
|
+
unless vector.numeric?
|
42
|
+
raise VectorArgumentError, "argument must be a integers: #{indices}"
|
43
|
+
end
|
44
|
+
|
45
|
+
Vector.create(take_by_vector(vector))
|
25
46
|
end
|
26
47
|
|
27
|
-
#
|
48
|
+
# Select elements in the self by booleans.
|
49
|
+
#
|
50
|
+
# @param booleans [Array<true, false, nil>, Vector] booleans.
|
51
|
+
# @yield [Array<true, false, nil>, Vector] booleans.
|
52
|
+
# @return [Vector] Vector by selected elements.
|
53
|
+
#
|
54
|
+
# TODO: support for the option `null_selection_behavior: :drop`
|
28
55
|
def filter(*booleans, &block)
|
29
56
|
if block
|
30
|
-
|
57
|
+
unless booleans.empty?
|
58
|
+
raise VectorArgumentError, 'Must not specify both arguments and block.'
|
59
|
+
end
|
31
60
|
|
32
61
|
booleans = [yield]
|
33
62
|
end
|
34
63
|
|
35
|
-
booleans
|
36
|
-
|
37
|
-
|
38
|
-
b = booleans[0]
|
39
|
-
boolean_array =
|
40
|
-
case b
|
41
|
-
when Vector
|
42
|
-
raise VectorTypeError, 'Argument is not a boolean.' unless b.boolean?
|
64
|
+
case booleans
|
65
|
+
in [Vector => v]
|
66
|
+
raise VectorTypeError, 'Argument is not a boolean.' unless v.boolean?
|
43
67
|
|
44
|
-
|
45
|
-
|
46
|
-
|
68
|
+
Vector.create(filter_by_array(v.data))
|
69
|
+
in [Arrow::BooleanArray => ba]
|
70
|
+
Vector.create(filter_by_array(ba))
|
71
|
+
in []
|
72
|
+
Vector.new
|
73
|
+
else
|
74
|
+
booleans.flatten!
|
75
|
+
a = Arrow::Array.new(booleans)
|
76
|
+
if a.boolean?
|
77
|
+
Vector.create(filter_by_array(a))
|
78
|
+
elsif booleans.compact.empty? # [nil, nil] becomes string array
|
79
|
+
Vector.new
|
47
80
|
else
|
48
|
-
raise VectorTypeError,
|
49
|
-
|
50
|
-
Arrow::BooleanArray.new(booleans)
|
81
|
+
raise VectorTypeError, "Argument is not a boolean: #{booleans}"
|
51
82
|
end
|
52
|
-
|
53
|
-
filter_by_array(boolean_array) # returns sub Vector
|
83
|
+
end
|
54
84
|
end
|
55
85
|
alias_method :select, :filter
|
56
86
|
alias_method :find_all, :filter
|
57
87
|
|
58
|
-
#
|
59
|
-
#
|
88
|
+
# Select elements in the self by indices or booleans.
|
89
|
+
#
|
90
|
+
# @param args [Array<Numeric, true, false, nil>, Vector] specifier.
|
91
|
+
# @yield [Array<Numeric, true, false, nil>, Vector] specifier.
|
92
|
+
# @return [scalar, Array] returns scalar or array.
|
93
|
+
#
|
60
94
|
def [](*args)
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
unless arg.is_a?(Numeric) || booleans?([arg])
|
79
|
-
raise VectorArgumentError, "Argument must be numeric or boolean: #{args}"
|
95
|
+
array =
|
96
|
+
case args
|
97
|
+
in [Vector => v]
|
98
|
+
return scalar_or_array(take_by_vector(v)) if v.numeric?
|
99
|
+
return scalar_or_array(filter_by_array(v.data)) if v.boolean?
|
100
|
+
|
101
|
+
raise VectorTypeError, "Argument must be numeric or boolean: #{args}"
|
102
|
+
in [Arrow::BooleanArray => ba]
|
103
|
+
return scalar_or_array(filter_by_array(ba))
|
104
|
+
in []
|
105
|
+
return nil
|
106
|
+
in [Arrow::Array => arrow_array]
|
107
|
+
arrow_array
|
108
|
+
in [Range => r]
|
109
|
+
Arrow::Array.new(parse_range(r, size))
|
110
|
+
else
|
111
|
+
Arrow::Array.new(args.flatten)
|
80
112
|
end
|
81
|
-
|
82
|
-
array
|
83
|
-
return filter_by_array(array) if array.is_a?(Arrow::BooleanArray)
|
113
|
+
|
114
|
+
return scalar_or_array(filter_by_array(array)) if array.boolean?
|
84
115
|
|
85
116
|
vector = Vector.new(array)
|
86
|
-
return take_by_vector(vector) if vector.numeric?
|
117
|
+
return scalar_or_array(take_by_vector(vector)) if vector.numeric?
|
87
118
|
|
88
119
|
raise VectorArgumentError, "Invalid argument: #{args}"
|
89
120
|
end
|
90
121
|
|
91
|
-
#
|
122
|
+
# @param values [Array, Arrow::Array, Vector]
|
92
123
|
def is_in(*values)
|
93
124
|
self_data = chunked? ? data.pack : data
|
94
125
|
|
@@ -100,7 +131,7 @@ module RedAmber
|
|
100
131
|
Array(values).flatten
|
101
132
|
end
|
102
133
|
|
103
|
-
Vector.
|
134
|
+
Vector.create(self_data.is_in(array))
|
104
135
|
end
|
105
136
|
|
106
137
|
# Arrow's support required
|
@@ -108,28 +139,44 @@ module RedAmber
|
|
108
139
|
to_a.index(element)
|
109
140
|
end
|
110
141
|
|
142
|
+
def drop_nil
|
143
|
+
datum = find(:drop_null).execute([data])
|
144
|
+
Vector.create(datum.value)
|
145
|
+
end
|
146
|
+
|
111
147
|
private
|
112
148
|
|
113
149
|
# Accepts indices by numeric Vector
|
114
150
|
def take_by_vector(indices)
|
115
|
-
|
116
|
-
raise VectorArgumentError, "Index out of range: #{indices.min}" if indices.min <= -size - 1
|
151
|
+
indices = (indices < 0).if_else(indices + size, indices) if (indices < 0).any?
|
117
152
|
|
118
|
-
|
119
|
-
raise VectorArgumentError, "Index out of range: #{
|
153
|
+
min, max = indices.min_max
|
154
|
+
raise VectorArgumentError, "Index out of range: #{min}" if min < 0
|
155
|
+
raise VectorArgumentError, "Index out of range: #{max}" if max >= size
|
120
156
|
|
121
|
-
index_array =
|
157
|
+
index_array =
|
158
|
+
if indices.float?
|
159
|
+
Arrow::UInt64ArrayBuilder.build(indices.data)
|
160
|
+
else
|
161
|
+
indices.data
|
162
|
+
end
|
122
163
|
|
123
|
-
|
124
|
-
|
164
|
+
# :array_take will fail with ChunkedArray
|
165
|
+
find(:take).execute([data, index_array]).value
|
125
166
|
end
|
126
167
|
|
127
168
|
# Accepts booleans by Arrow::BooleanArray
|
128
169
|
def filter_by_array(boolean_array)
|
129
|
-
|
170
|
+
unless boolean_array.length == size
|
171
|
+
raise VectorArgumentError, 'Booleans must be same size as self.'
|
172
|
+
end
|
173
|
+
|
174
|
+
find(:array_filter).execute([data, boolean_array]).value
|
175
|
+
end
|
130
176
|
|
131
|
-
|
132
|
-
|
177
|
+
def scalar_or_array(arrow_array)
|
178
|
+
a = arrow_array.to_a
|
179
|
+
a.size > 1 ? a : a[0]
|
133
180
|
end
|
134
181
|
end
|
135
182
|
end
|