red_amber 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +24 -5
- data/CHANGELOG.md +98 -13
- data/Gemfile +1 -0
- data/README.md +55 -6
- data/doc/DataFrame.md +23 -9
- data/doc/Vector.md +156 -24
- data/lib/red-amber.rb +27 -0
- data/lib/red_amber/data_frame.rb +39 -7
- data/lib/red_amber/data_frame_displayable.rb +8 -8
- data/lib/red_amber/data_frame_observation_operation.rb +0 -72
- data/lib/red_amber/data_frame_selectable.rb +151 -32
- data/lib/red_amber/data_frame_variable_operation.rb +4 -0
- data/lib/red_amber/helper.rb +61 -0
- data/lib/red_amber/vector.rb +42 -12
- data/lib/red_amber/vector_functions.rb +25 -18
- data/lib/red_amber/vector_selectable.rb +124 -0
- data/lib/red_amber/{vector_compensable.rb → vector_updatable.rb} +52 -16
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -24
- metadata +6 -4
- data/lib/red_amber/data_frame_helper.rb +0 -64
@@ -3,35 +3,94 @@
|
|
3
3
|
module RedAmber
|
4
4
|
# mix-in for the class DataFrame
|
5
5
|
module DataFrameSelectable
|
6
|
-
# select
|
7
|
-
# select
|
6
|
+
# select variables: [symbol] or [string]
|
7
|
+
# select observations: [array of index], [range]
|
8
8
|
def [](*args)
|
9
|
+
args.flatten!
|
9
10
|
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
11
|
+
return remove_all_values if args.empty? || args[0].nil?
|
12
|
+
|
13
|
+
vector = parse_to_vector(args)
|
14
|
+
if vector.boolean?
|
15
|
+
return filter_by_vector(vector.data) if vector.size == size
|
16
|
+
|
17
|
+
raise DataFrameArgumentError, "Size is not match in booleans: #{args}"
|
18
|
+
end
|
19
|
+
return take_by_array(vector) if vector.numeric?
|
20
|
+
return select_vars_by_keys(vector.to_a.map(&:to_sym)) if vector.string? || vector.type == :dictionary
|
21
|
+
|
22
|
+
raise DataFrameArgumentError, "Invalid argument: #{args}"
|
23
|
+
end
|
24
|
+
|
25
|
+
# slice and select some observations to create sub DataFrame
|
26
|
+
def slice(*args, &block)
|
27
|
+
slicer = args
|
28
|
+
if block
|
29
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
30
|
+
|
31
|
+
slicer = instance_eval(&block)
|
32
|
+
end
|
33
|
+
slicer = [slicer].flatten
|
34
|
+
|
35
|
+
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
36
|
+
return remove_all_values if slicer.empty? || slicer[0].nil?
|
37
|
+
|
38
|
+
vector = parse_to_vector(slicer)
|
39
|
+
if vector.boolean?
|
40
|
+
return filter_by_vector(vector.data) if vector.size == size
|
41
|
+
|
42
|
+
raise DataFrameArgumentError, "Size is not match in booleans: #{slicer}"
|
24
43
|
end
|
44
|
+
return take_by_array(vector) if vector.numeric?
|
45
|
+
|
46
|
+
raise DataFrameArgumentError, "Invalid argument #{slicer}"
|
47
|
+
end
|
48
|
+
|
49
|
+
# remove selected observations to create sub DataFrame
|
50
|
+
def remove(*args, &block)
|
51
|
+
remover = args
|
52
|
+
if block
|
53
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
54
|
+
|
55
|
+
remover = instance_eval(&block)
|
56
|
+
end
|
57
|
+
remover = [remover].flatten
|
58
|
+
|
59
|
+
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
60
|
+
return self if remover.empty? || remover[0].nil?
|
61
|
+
|
62
|
+
vector = parse_to_vector(remover)
|
63
|
+
if vector.boolean?
|
64
|
+
return filter_by_vector(vector.primitive_invert.data) if vector.size == size
|
65
|
+
|
66
|
+
raise DataFrameArgumentError, "Size is not match in booleans: #{remover}"
|
67
|
+
end
|
68
|
+
if vector.numeric?
|
69
|
+
raise DataFrameArgumentError, "Index out of range: #{vector.min}" if vector.min <= -size - 1
|
70
|
+
|
71
|
+
normalized_indices = (vector < 0).if_else(vector + size, vector) # normalize index from tail
|
72
|
+
if normalized_indices.max >= size
|
73
|
+
raise DataFrameArgumentError, "Index out of range: #{normalized_indices.max}"
|
74
|
+
end
|
25
75
|
|
26
|
-
|
76
|
+
normalized_indices = normalized_indices.floor.to_a.map(&:to_i) # round to integer array
|
77
|
+
return remove_all_values if normalized_indices == indices
|
78
|
+
return self if normalized_indices.empty?
|
27
79
|
|
28
|
-
|
29
|
-
expanded = expand_range(args)
|
30
|
-
return map_indices(*expanded) if integers?(expanded)
|
31
|
-
return select_vars_by_keys(expanded.map(&:to_sym)) if sym_or_str?(expanded)
|
80
|
+
index_array = indices - normalized_indices
|
32
81
|
|
33
|
-
|
82
|
+
datum = Arrow::Function.find(:take).execute([table, index_array])
|
83
|
+
return DataFrame.new(datum.value)
|
84
|
+
end
|
85
|
+
|
86
|
+
raise DataFrameArgumentError, "Invalid argument #{remover}"
|
87
|
+
end
|
88
|
+
|
89
|
+
def remove_nil
|
90
|
+
func = Arrow::Function.find(:drop_null)
|
91
|
+
DataFrame.new(func.execute([table]).value)
|
34
92
|
end
|
93
|
+
alias_method :drop_nil, :remove_nil
|
35
94
|
|
36
95
|
# Select a variable by a key in String or Symbol
|
37
96
|
def v(key)
|
@@ -43,24 +102,57 @@ module RedAmber
|
|
43
102
|
variables[key.to_sym]
|
44
103
|
end
|
45
104
|
|
46
|
-
def head(
|
47
|
-
raise DataFrameArgumentError, "Index is out of range #{
|
105
|
+
def head(n_obs = 5)
|
106
|
+
raise DataFrameArgumentError, "Index is out of range #{n_obs}" if n_obs.negative?
|
48
107
|
|
49
|
-
self[0...[
|
108
|
+
self[0...[n_obs, size].min]
|
50
109
|
end
|
51
110
|
|
52
|
-
def tail(
|
53
|
-
raise DataFrameArgumentError, "Index is out of range #{
|
111
|
+
def tail(n_obs = 5)
|
112
|
+
raise DataFrameArgumentError, "Index is out of range #{n_obs}" if n_obs.negative?
|
54
113
|
|
55
|
-
self[-[
|
114
|
+
self[-[n_obs, size].min..]
|
56
115
|
end
|
57
116
|
|
58
|
-
def first(
|
59
|
-
head(
|
117
|
+
def first(n_obs = 1)
|
118
|
+
head(n_obs)
|
60
119
|
end
|
61
120
|
|
62
|
-
def last(
|
63
|
-
tail(
|
121
|
+
def last(n_obs = 1)
|
122
|
+
tail(n_obs)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Undocumented
|
126
|
+
# TODO: support for option {boundscheck: true}
|
127
|
+
def take(*indices)
|
128
|
+
indices.flatten!
|
129
|
+
return remove_all_values if indices.empty?
|
130
|
+
|
131
|
+
indices = indices[0] if indices.one? && !indices[0].is_a?(Numeric)
|
132
|
+
indices = Vector.new(indices) unless indices.is_a?(Vector)
|
133
|
+
|
134
|
+
take_by_array(indices)
|
135
|
+
end
|
136
|
+
|
137
|
+
# Undocumented
|
138
|
+
# TODO: support for option {null_selection_behavior: :drop}
|
139
|
+
def filter(*booleans)
|
140
|
+
booleans.flatten!
|
141
|
+
return remove_all_values if booleans.empty?
|
142
|
+
|
143
|
+
b = booleans[0]
|
144
|
+
case b
|
145
|
+
when Vector
|
146
|
+
raise DataFrameArgumentError, 'Argument is not a boolean.' unless b.boolean?
|
147
|
+
|
148
|
+
filter_by_vector(b.data)
|
149
|
+
when Arrow::BooleanArray
|
150
|
+
filter_by_vector(b)
|
151
|
+
else
|
152
|
+
raise DataFrameArgumentError, 'Argument is not a boolean.' unless booleans?(booleans)
|
153
|
+
|
154
|
+
filter_by_vector(Arrow::BooleanArray.new(booleans))
|
155
|
+
end
|
64
156
|
end
|
65
157
|
|
66
158
|
private
|
@@ -75,5 +167,32 @@ module RedAmber
|
|
75
167
|
DataFrame.new(@table[keys])
|
76
168
|
end
|
77
169
|
end
|
170
|
+
|
171
|
+
# Accepts indices by numeric Vector
|
172
|
+
def take_by_array(indices)
|
173
|
+
raise DataFrameArgumentError, "Indices must be a numeric Vector: #{indices}" unless indices.numeric?
|
174
|
+
raise DataFrameArgumentError, "Index out of range: #{indices.min}" if indices.min <= -size - 1
|
175
|
+
|
176
|
+
normalized_indices = (indices < 0).if_else(indices + size, indices) # normalize index from tail
|
177
|
+
raise DataFrameArgumentError, "Index out of range: #{normalized_indices.max}" if normalized_indices.max >= size
|
178
|
+
|
179
|
+
index_array = Arrow::UInt64ArrayBuilder.build(normalized_indices.data) # round to integer array
|
180
|
+
|
181
|
+
datum = Arrow::Function.find(:take).execute([table, index_array])
|
182
|
+
DataFrame.new(datum.value)
|
183
|
+
end
|
184
|
+
|
185
|
+
# Accepts booleans by Arrow::BooleanArray
|
186
|
+
def filter_by_vector(boolean_array)
|
187
|
+
raise DataFrameArgumentError, 'Booleans must be same size as self.' unless boolean_array.length == size
|
188
|
+
|
189
|
+
datum = Arrow::Function.find(:filter).execute([table, boolean_array])
|
190
|
+
DataFrame.new(datum.value)
|
191
|
+
end
|
192
|
+
|
193
|
+
# return a DataFrame with same keys as self without values
|
194
|
+
def remove_all_values
|
195
|
+
filter_by_vector(Arrow::BooleanArray.new([false] * size))
|
196
|
+
end
|
78
197
|
end
|
79
198
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# mix-in for the class DataFrame
|
5
|
+
module Helper
|
6
|
+
private
|
7
|
+
|
8
|
+
def pl(num)
|
9
|
+
num > 1 ? 's' : ''
|
10
|
+
end
|
11
|
+
|
12
|
+
def out_of_range?(indeces)
|
13
|
+
indeces.max >= size || indeces.min < -size
|
14
|
+
end
|
15
|
+
|
16
|
+
def integers?(enum)
|
17
|
+
enum.all?(Integer)
|
18
|
+
end
|
19
|
+
|
20
|
+
def sym_or_str?(enum)
|
21
|
+
enum.all? { |e| e.is_a?(Symbol) || e.is_a?(String) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def booleans?(enum)
|
25
|
+
enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
|
26
|
+
end
|
27
|
+
|
28
|
+
def create_dataframe_from_vector(key, vector)
|
29
|
+
DataFrame.new(key => vector.data)
|
30
|
+
end
|
31
|
+
|
32
|
+
def parse_to_vector(args)
|
33
|
+
a = args.reduce([]) do |accum, elem|
|
34
|
+
accum.concat(normalize_element(elem))
|
35
|
+
end
|
36
|
+
Vector.new(a)
|
37
|
+
end
|
38
|
+
|
39
|
+
def normalize_element(elem)
|
40
|
+
case elem
|
41
|
+
when Numeric, String, Symbol, TrueClass, FalseClass, NilClass
|
42
|
+
[elem]
|
43
|
+
when Range
|
44
|
+
both_end = [elem.begin, elem.end]
|
45
|
+
both_end[1] -= 1 if elem.exclude_end? && elem.end.is_a?(Integer)
|
46
|
+
|
47
|
+
if both_end.any?(Integer) || both_end.all?(&:nil?)
|
48
|
+
if both_end.any? { |e| e&.>=(size) || e&.<(-size) }
|
49
|
+
raise DataFrameArgumentError, "Index out of range: #{elem} for 0..#{size - 1}"
|
50
|
+
end
|
51
|
+
|
52
|
+
(0...size).to_a[elem]
|
53
|
+
else
|
54
|
+
elem.to_a
|
55
|
+
end
|
56
|
+
else
|
57
|
+
Array(elem)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/lib/red_amber/vector.rb
CHANGED
@@ -1,25 +1,37 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Values in variable (columnar) data object
|
5
5
|
# @data : holds Arrow::ChunkedArray
|
6
6
|
class Vector
|
7
7
|
# mix-in
|
8
|
-
include VectorCompensable
|
9
8
|
include VectorFunctions
|
9
|
+
include VectorUpdatable
|
10
|
+
include VectorSelectable
|
11
|
+
include Helper
|
10
12
|
|
11
|
-
|
12
|
-
def initialize(array)
|
13
|
+
def initialize(*array)
|
13
14
|
@key = nil # default is 'headless'
|
14
|
-
|
15
|
-
|
16
|
-
@data = array.data
|
17
|
-
when Arrow::Array, Arrow::ChunkedArray
|
18
|
-
@data = array
|
19
|
-
when Array
|
20
|
-
@data = Arrow::Array.new(array)
|
15
|
+
if array.empty? || array[0].nil?
|
16
|
+
Vector.new([])
|
21
17
|
else
|
22
|
-
|
18
|
+
array.flatten!
|
19
|
+
case array[0]
|
20
|
+
when Vector
|
21
|
+
@data = array[0].data
|
22
|
+
return
|
23
|
+
when Arrow::Array, Arrow::ChunkedArray
|
24
|
+
@data = array[0]
|
25
|
+
return
|
26
|
+
when Range
|
27
|
+
@data = Arrow::Array.new(Array(array[0]))
|
28
|
+
return
|
29
|
+
end
|
30
|
+
begin
|
31
|
+
@data = Arrow::Array.new(Array(array))
|
32
|
+
rescue Error
|
33
|
+
raise VectorArgumentError, "Invalid argument: #{array}"
|
34
|
+
end
|
23
35
|
end
|
24
36
|
end
|
25
37
|
|
@@ -52,6 +64,16 @@ module RedAmber
|
|
52
64
|
alias_method :to_a, :values
|
53
65
|
alias_method :entries, :values
|
54
66
|
|
67
|
+
def indices
|
68
|
+
(0...size).to_a
|
69
|
+
end
|
70
|
+
alias_method :indexes, :indices
|
71
|
+
alias_method :indeces, :indices
|
72
|
+
|
73
|
+
def to_ary
|
74
|
+
to_a
|
75
|
+
end
|
76
|
+
|
55
77
|
def size
|
56
78
|
# only defined :length in Arrow?
|
57
79
|
@data.length
|
@@ -60,6 +82,10 @@ module RedAmber
|
|
60
82
|
alias_method :n_rows, :size
|
61
83
|
alias_method :nrow, :size
|
62
84
|
|
85
|
+
def empty?
|
86
|
+
size.zero?
|
87
|
+
end
|
88
|
+
|
63
89
|
def type
|
64
90
|
@data.value_type.nick.to_sym
|
65
91
|
end
|
@@ -124,5 +150,9 @@ module RedAmber
|
|
124
150
|
def n_nans
|
125
151
|
numeric? ? is_nan.to_a.count(true) : 0
|
126
152
|
end
|
153
|
+
|
154
|
+
def has_nil?
|
155
|
+
is_nil.any
|
156
|
+
end
|
127
157
|
end
|
128
158
|
end
|
@@ -16,11 +16,13 @@ module RedAmber
|
|
16
16
|
unary_aggregations.each do |function|
|
17
17
|
define_method(function) do |opts: nil|
|
18
18
|
datum = exec_func_unary(function, options: opts)
|
19
|
-
|
19
|
+
get_scalar(datum)
|
20
20
|
end
|
21
21
|
end
|
22
22
|
alias_method :median, :approximate_median
|
23
23
|
alias_method :count_uniq, :count_distinct
|
24
|
+
alias_method :all?, :all
|
25
|
+
alias_method :any?, :any
|
24
26
|
|
25
27
|
def unbiased_variance
|
26
28
|
variance(opts: { ddof: 1 })
|
@@ -47,7 +49,7 @@ module RedAmber
|
|
47
49
|
unary_element_wise.each do |function|
|
48
50
|
define_method(function) do |opts: nil|
|
49
51
|
datum = exec_func_unary(function, options: opts)
|
50
|
-
|
52
|
+
Vector.new(datum.value)
|
51
53
|
end
|
52
54
|
end
|
53
55
|
alias_method :is_nil, :is_null
|
@@ -72,12 +74,12 @@ module RedAmber
|
|
72
74
|
unary_element_wise_op.each do |function, operator|
|
73
75
|
define_method(function) do |opts: nil|
|
74
76
|
datum = exec_func_unary(function, options: opts)
|
75
|
-
|
77
|
+
Vector.new(datum.value)
|
76
78
|
end
|
77
79
|
|
78
80
|
define_method(operator) do |opts: nil|
|
79
81
|
datum = exec_func_unary(function, options: opts)
|
80
|
-
|
82
|
+
Vector.new(datum.value)
|
81
83
|
end
|
82
84
|
end
|
83
85
|
alias_method :not, :invert
|
@@ -95,7 +97,7 @@ module RedAmber
|
|
95
97
|
binary_element_wise.each do |function|
|
96
98
|
define_method(function) do |other, opts: nil|
|
97
99
|
datum = exec_func_binary(function, other, options: opts)
|
98
|
-
|
100
|
+
Vector.new(datum.value)
|
99
101
|
end
|
100
102
|
end
|
101
103
|
|
@@ -111,7 +113,7 @@ module RedAmber
|
|
111
113
|
logical_binary_element_wise.each do |method, function|
|
112
114
|
define_method(method) do |other, opts: nil|
|
113
115
|
datum = exec_func_binary(function, other, options: opts)
|
114
|
-
|
116
|
+
Vector.new(datum.value)
|
115
117
|
end
|
116
118
|
end
|
117
119
|
|
@@ -144,12 +146,12 @@ module RedAmber
|
|
144
146
|
binary_element_wise_op.each do |function, operator|
|
145
147
|
define_method(function) do |other, opts: nil|
|
146
148
|
datum = exec_func_binary(function, other, options: opts)
|
147
|
-
|
149
|
+
Vector.new(datum.value)
|
148
150
|
end
|
149
151
|
|
150
152
|
define_method(operator) do |other, opts: nil|
|
151
153
|
datum = exec_func_binary(function, other, options: opts)
|
152
|
-
|
154
|
+
Vector.new(datum.value)
|
153
155
|
end
|
154
156
|
end
|
155
157
|
alias_method :eq, :equal
|
@@ -159,8 +161,17 @@ module RedAmber
|
|
159
161
|
alias_method :lt, :less
|
160
162
|
alias_method :ne, :not_equal
|
161
163
|
|
164
|
+
def coerce(other)
|
165
|
+
case other
|
166
|
+
when Vector, Array, Arrow::Array
|
167
|
+
raise VectorArgumentError, "Size unmatch: #{size} != #{other.length}" unless size == other.length
|
168
|
+
|
169
|
+
[Vector.new(Array(other)), self]
|
170
|
+
end
|
171
|
+
[Vector.new(Array(other) * size), self]
|
172
|
+
end
|
173
|
+
|
162
174
|
# (array functions)
|
163
|
-
# array_filter, array_take
|
164
175
|
# dictionary_encode,
|
165
176
|
# partition_nth_indices,
|
166
177
|
# quarter, quarters_between,
|
@@ -192,17 +203,17 @@ module RedAmber
|
|
192
203
|
# strptime, subsecond, us_week, week, weeks_between, year, year_month_day, years_between
|
193
204
|
|
194
205
|
# (onditional)
|
195
|
-
# case_when, cast,
|
206
|
+
# case_when, cast,
|
196
207
|
|
197
208
|
# (indices)
|
198
209
|
# choose, index_in, index_in_meta_binary, indices_nonzero
|
199
210
|
|
200
211
|
# (others)
|
201
|
-
# coalesce,
|
202
|
-
#
|
212
|
+
# coalesce,
|
213
|
+
# is_in_meta_binary,
|
203
214
|
# list_element, list_flatten, list_parent_indices, list_value_length, make_struct,
|
204
215
|
# max_element_wise, min_element_wise, random, select_k_unstable,
|
205
|
-
#
|
216
|
+
# struct_field,
|
206
217
|
|
207
218
|
private # =======
|
208
219
|
|
@@ -221,7 +232,7 @@ module RedAmber
|
|
221
232
|
end
|
222
233
|
end
|
223
234
|
|
224
|
-
def
|
235
|
+
def get_scalar(datum)
|
225
236
|
output = datum.value
|
226
237
|
case output
|
227
238
|
when Arrow::StringScalar then output.to_s
|
@@ -232,10 +243,6 @@ module RedAmber
|
|
232
243
|
end
|
233
244
|
end
|
234
245
|
|
235
|
-
def take_out_element_wise(datum)
|
236
|
-
Vector.new(datum.value)
|
237
|
-
end
|
238
|
-
|
239
246
|
module_function # ======
|
240
247
|
|
241
248
|
def find(function_name)
|
@@ -0,0 +1,124 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
|
4
|
+
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
5
|
+
|
6
|
+
module RedAmber
|
7
|
+
# mix-ins for class Vector
|
8
|
+
# Functions to select some data.
|
9
|
+
module VectorSelectable
|
10
|
+
def drop_nil
|
11
|
+
datum = find(:drop_null).execute([data])
|
12
|
+
Vector.new(datum.value)
|
13
|
+
end
|
14
|
+
|
15
|
+
# vector calculation version of selection by indices
|
16
|
+
# TODO: support for option {boundscheck: true}
|
17
|
+
def take(*indices)
|
18
|
+
indices.flatten!
|
19
|
+
return Vector.new([]) if indices.empty?
|
20
|
+
|
21
|
+
indices = indices[0] if indices.one? && !indices[0].is_a?(Numeric)
|
22
|
+
indices = Vector.new(indices) unless indices.is_a?(Vector)
|
23
|
+
|
24
|
+
take_by_vector(indices) # returns sub Vector
|
25
|
+
end
|
26
|
+
|
27
|
+
# TODO: support for option {null_selection_behavior: :drop}
|
28
|
+
def filter(*booleans)
|
29
|
+
booleans.flatten!
|
30
|
+
return Vector.new([]) if booleans.empty?
|
31
|
+
|
32
|
+
b = booleans[0]
|
33
|
+
boolean_array =
|
34
|
+
case b
|
35
|
+
when Vector
|
36
|
+
raise VectorTypeError, 'Argument is not a boolean.' unless b.boolean?
|
37
|
+
|
38
|
+
b.data
|
39
|
+
when Arrow::BooleanArray
|
40
|
+
b
|
41
|
+
else
|
42
|
+
raise VectorTypeError, 'Argument is not a boolean.' unless booleans?(booleans)
|
43
|
+
|
44
|
+
Arrow::BooleanArray.new(booleans)
|
45
|
+
end
|
46
|
+
|
47
|
+
filter_by_array(boolean_array) # returns sub Vector
|
48
|
+
end
|
49
|
+
|
50
|
+
# @param indices
|
51
|
+
# @param booleans
|
52
|
+
def [](*args)
|
53
|
+
args.flatten!
|
54
|
+
return Vector.new([]) if args.empty?
|
55
|
+
|
56
|
+
arg = args[0]
|
57
|
+
case arg
|
58
|
+
when Vector
|
59
|
+
return take_by_vector(arg) if arg.numeric?
|
60
|
+
return filter_by_array(arg.data) if arg.boolean?
|
61
|
+
|
62
|
+
raise VectorTypeError, "Argument must be numeric or boolean: #{arg}"
|
63
|
+
when Arrow::BooleanArray
|
64
|
+
return filter_by_array(arg)
|
65
|
+
when Arrow::Array
|
66
|
+
array = arg
|
67
|
+
else
|
68
|
+
unless arg.is_a?(Numeric) || booleans?([arg])
|
69
|
+
raise VectorArgumentError, "Argument must be numeric or boolean: #{args}"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
array ||= Arrow::Array.new(args)
|
73
|
+
return filter_by_array(array) if array.is_a?(Arrow::BooleanArray)
|
74
|
+
|
75
|
+
vector = Vector.new(array)
|
76
|
+
return take_by_vector(vector) if vector.numeric?
|
77
|
+
|
78
|
+
raise VectorArgumentError, "Invalid argument: #{args}"
|
79
|
+
end
|
80
|
+
|
81
|
+
# @param values [Array, Arrow::Array, Vector]
|
82
|
+
def is_in(*values)
|
83
|
+
values.flatten!
|
84
|
+
array =
|
85
|
+
case values[0]
|
86
|
+
when Vector
|
87
|
+
values[0].data
|
88
|
+
when Arrow::Array
|
89
|
+
values[0]
|
90
|
+
end
|
91
|
+
array ||= data.class.new(values)
|
92
|
+
Vector.new(data.is_in(array))
|
93
|
+
end
|
94
|
+
|
95
|
+
# Arrow's support required
|
96
|
+
def index(element)
|
97
|
+
to_a.index(element)
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
# Accepts indices by numeric Vector
|
103
|
+
def take_by_vector(indices)
|
104
|
+
raise VectorTypeError, "Indices must be numeric Vector: #{indices}" unless indices.numeric?
|
105
|
+
raise VectorArgumentError, "Index out of range: #{indices.min}" if indices.min <= -size - 1
|
106
|
+
|
107
|
+
normalized_indices = (indices < 0).if_else(indices + size, indices) # normalize index from tail
|
108
|
+
raise VectorArgumentError, "Index out of range: #{normalized_indices.max}" if normalized_indices.max >= size
|
109
|
+
|
110
|
+
index_array = Arrow::UInt64ArrayBuilder.build(normalized_indices.data) # round to integer array
|
111
|
+
|
112
|
+
datum = find(:array_take).execute([data, index_array])
|
113
|
+
Vector.new(datum.value)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Accepts booleans by Arrow::BooleanArray
|
117
|
+
def filter_by_array(boolean_array)
|
118
|
+
raise VectorArgumentError, 'Booleans must be same size as self.' unless boolean_array.length == size
|
119
|
+
|
120
|
+
datum = find(:array_filter).execute([data, boolean_array])
|
121
|
+
Vector.new(datum.value)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|