red_amber 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +24 -5
- data/CHANGELOG.md +98 -13
- data/Gemfile +1 -0
- data/README.md +55 -6
- data/doc/DataFrame.md +23 -9
- data/doc/Vector.md +156 -24
- data/lib/red-amber.rb +27 -0
- data/lib/red_amber/data_frame.rb +39 -7
- data/lib/red_amber/data_frame_displayable.rb +8 -8
- data/lib/red_amber/data_frame_observation_operation.rb +0 -72
- data/lib/red_amber/data_frame_selectable.rb +151 -32
- data/lib/red_amber/data_frame_variable_operation.rb +4 -0
- data/lib/red_amber/helper.rb +61 -0
- data/lib/red_amber/vector.rb +42 -12
- data/lib/red_amber/vector_functions.rb +25 -18
- data/lib/red_amber/vector_selectable.rb +124 -0
- data/lib/red_amber/{vector_compensable.rb → vector_updatable.rb} +52 -16
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -24
- metadata +6 -4
- data/lib/red_amber/data_frame_helper.rb +0 -64
@@ -3,35 +3,94 @@
|
|
3
3
|
module RedAmber
|
4
4
|
# mix-in for the class DataFrame
|
5
5
|
module DataFrameSelectable
|
6
|
-
# select
|
7
|
-
# select
|
6
|
+
# select variables: [symbol] or [string]
|
7
|
+
# select observations: [array of index], [range]
|
8
8
|
def [](*args)
|
9
|
+
args.flatten!
|
9
10
|
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
11
|
+
return remove_all_values if args.empty? || args[0].nil?
|
12
|
+
|
13
|
+
vector = parse_to_vector(args)
|
14
|
+
if vector.boolean?
|
15
|
+
return filter_by_vector(vector.data) if vector.size == size
|
16
|
+
|
17
|
+
raise DataFrameArgumentError, "Size is not match in booleans: #{args}"
|
18
|
+
end
|
19
|
+
return take_by_array(vector) if vector.numeric?
|
20
|
+
return select_vars_by_keys(vector.to_a.map(&:to_sym)) if vector.string? || vector.type == :dictionary
|
21
|
+
|
22
|
+
raise DataFrameArgumentError, "Invalid argument: #{args}"
|
23
|
+
end
|
24
|
+
|
25
|
+
# slice and select some observations to create sub DataFrame
|
26
|
+
def slice(*args, &block)
|
27
|
+
slicer = args
|
28
|
+
if block
|
29
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
30
|
+
|
31
|
+
slicer = instance_eval(&block)
|
32
|
+
end
|
33
|
+
slicer = [slicer].flatten
|
34
|
+
|
35
|
+
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
36
|
+
return remove_all_values if slicer.empty? || slicer[0].nil?
|
37
|
+
|
38
|
+
vector = parse_to_vector(slicer)
|
39
|
+
if vector.boolean?
|
40
|
+
return filter_by_vector(vector.data) if vector.size == size
|
41
|
+
|
42
|
+
raise DataFrameArgumentError, "Size is not match in booleans: #{slicer}"
|
24
43
|
end
|
44
|
+
return take_by_array(vector) if vector.numeric?
|
45
|
+
|
46
|
+
raise DataFrameArgumentError, "Invalid argument #{slicer}"
|
47
|
+
end
|
48
|
+
|
49
|
+
# remove selected observations to create sub DataFrame
|
50
|
+
def remove(*args, &block)
|
51
|
+
remover = args
|
52
|
+
if block
|
53
|
+
raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
|
54
|
+
|
55
|
+
remover = instance_eval(&block)
|
56
|
+
end
|
57
|
+
remover = [remover].flatten
|
58
|
+
|
59
|
+
raise DataFrameArgumentError, 'Empty dataframe' if empty?
|
60
|
+
return self if remover.empty? || remover[0].nil?
|
61
|
+
|
62
|
+
vector = parse_to_vector(remover)
|
63
|
+
if vector.boolean?
|
64
|
+
return filter_by_vector(vector.primitive_invert.data) if vector.size == size
|
65
|
+
|
66
|
+
raise DataFrameArgumentError, "Size is not match in booleans: #{remover}"
|
67
|
+
end
|
68
|
+
if vector.numeric?
|
69
|
+
raise DataFrameArgumentError, "Index out of range: #{vector.min}" if vector.min <= -size - 1
|
70
|
+
|
71
|
+
normalized_indices = (vector < 0).if_else(vector + size, vector) # normalize index from tail
|
72
|
+
if normalized_indices.max >= size
|
73
|
+
raise DataFrameArgumentError, "Index out of range: #{normalized_indices.max}"
|
74
|
+
end
|
25
75
|
|
26
|
-
|
76
|
+
normalized_indices = normalized_indices.floor.to_a.map(&:to_i) # round to integer array
|
77
|
+
return remove_all_values if normalized_indices == indices
|
78
|
+
return self if normalized_indices.empty?
|
27
79
|
|
28
|
-
|
29
|
-
expanded = expand_range(args)
|
30
|
-
return map_indices(*expanded) if integers?(expanded)
|
31
|
-
return select_vars_by_keys(expanded.map(&:to_sym)) if sym_or_str?(expanded)
|
80
|
+
index_array = indices - normalized_indices
|
32
81
|
|
33
|
-
|
82
|
+
datum = Arrow::Function.find(:take).execute([table, index_array])
|
83
|
+
return DataFrame.new(datum.value)
|
84
|
+
end
|
85
|
+
|
86
|
+
raise DataFrameArgumentError, "Invalid argument #{remover}"
|
87
|
+
end
|
88
|
+
|
89
|
+
def remove_nil
|
90
|
+
func = Arrow::Function.find(:drop_null)
|
91
|
+
DataFrame.new(func.execute([table]).value)
|
34
92
|
end
|
93
|
+
alias_method :drop_nil, :remove_nil
|
35
94
|
|
36
95
|
# Select a variable by a key in String or Symbol
|
37
96
|
def v(key)
|
@@ -43,24 +102,57 @@ module RedAmber
|
|
43
102
|
variables[key.to_sym]
|
44
103
|
end
|
45
104
|
|
46
|
-
def head(
|
47
|
-
raise DataFrameArgumentError, "Index is out of range #{
|
105
|
+
def head(n_obs = 5)
|
106
|
+
raise DataFrameArgumentError, "Index is out of range #{n_obs}" if n_obs.negative?
|
48
107
|
|
49
|
-
self[0...[
|
108
|
+
self[0...[n_obs, size].min]
|
50
109
|
end
|
51
110
|
|
52
|
-
def tail(
|
53
|
-
raise DataFrameArgumentError, "Index is out of range #{
|
111
|
+
def tail(n_obs = 5)
|
112
|
+
raise DataFrameArgumentError, "Index is out of range #{n_obs}" if n_obs.negative?
|
54
113
|
|
55
|
-
self[-[
|
114
|
+
self[-[n_obs, size].min..]
|
56
115
|
end
|
57
116
|
|
58
|
-
def first(
|
59
|
-
head(
|
117
|
+
def first(n_obs = 1)
|
118
|
+
head(n_obs)
|
60
119
|
end
|
61
120
|
|
62
|
-
def last(
|
63
|
-
tail(
|
121
|
+
def last(n_obs = 1)
|
122
|
+
tail(n_obs)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Undocumented
|
126
|
+
# TODO: support for option {boundscheck: true}
|
127
|
+
def take(*indices)
|
128
|
+
indices.flatten!
|
129
|
+
return remove_all_values if indices.empty?
|
130
|
+
|
131
|
+
indices = indices[0] if indices.one? && !indices[0].is_a?(Numeric)
|
132
|
+
indices = Vector.new(indices) unless indices.is_a?(Vector)
|
133
|
+
|
134
|
+
take_by_array(indices)
|
135
|
+
end
|
136
|
+
|
137
|
+
# Undocumented
|
138
|
+
# TODO: support for option {null_selection_behavior: :drop}
|
139
|
+
def filter(*booleans)
|
140
|
+
booleans.flatten!
|
141
|
+
return remove_all_values if booleans.empty?
|
142
|
+
|
143
|
+
b = booleans[0]
|
144
|
+
case b
|
145
|
+
when Vector
|
146
|
+
raise DataFrameArgumentError, 'Argument is not a boolean.' unless b.boolean?
|
147
|
+
|
148
|
+
filter_by_vector(b.data)
|
149
|
+
when Arrow::BooleanArray
|
150
|
+
filter_by_vector(b)
|
151
|
+
else
|
152
|
+
raise DataFrameArgumentError, 'Argument is not a boolean.' unless booleans?(booleans)
|
153
|
+
|
154
|
+
filter_by_vector(Arrow::BooleanArray.new(booleans))
|
155
|
+
end
|
64
156
|
end
|
65
157
|
|
66
158
|
private
|
@@ -75,5 +167,32 @@ module RedAmber
|
|
75
167
|
DataFrame.new(@table[keys])
|
76
168
|
end
|
77
169
|
end
|
170
|
+
|
171
|
+
# Accepts indices by numeric Vector
|
172
|
+
def take_by_array(indices)
|
173
|
+
raise DataFrameArgumentError, "Indices must be a numeric Vector: #{indices}" unless indices.numeric?
|
174
|
+
raise DataFrameArgumentError, "Index out of range: #{indices.min}" if indices.min <= -size - 1
|
175
|
+
|
176
|
+
normalized_indices = (indices < 0).if_else(indices + size, indices) # normalize index from tail
|
177
|
+
raise DataFrameArgumentError, "Index out of range: #{normalized_indices.max}" if normalized_indices.max >= size
|
178
|
+
|
179
|
+
index_array = Arrow::UInt64ArrayBuilder.build(normalized_indices.data) # round to integer array
|
180
|
+
|
181
|
+
datum = Arrow::Function.find(:take).execute([table, index_array])
|
182
|
+
DataFrame.new(datum.value)
|
183
|
+
end
|
184
|
+
|
185
|
+
# Accepts booleans by Arrow::BooleanArray
|
186
|
+
def filter_by_vector(boolean_array)
|
187
|
+
raise DataFrameArgumentError, 'Booleans must be same size as self.' unless boolean_array.length == size
|
188
|
+
|
189
|
+
datum = Arrow::Function.find(:filter).execute([table, boolean_array])
|
190
|
+
DataFrame.new(datum.value)
|
191
|
+
end
|
192
|
+
|
193
|
+
# return a DataFrame with same keys as self without values
|
194
|
+
def remove_all_values
|
195
|
+
filter_by_vector(Arrow::BooleanArray.new([false] * size))
|
196
|
+
end
|
78
197
|
end
|
79
198
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RedAmber
|
4
|
+
# mix-in for the class DataFrame
|
5
|
+
module Helper
|
6
|
+
private
|
7
|
+
|
8
|
+
def pl(num)
|
9
|
+
num > 1 ? 's' : ''
|
10
|
+
end
|
11
|
+
|
12
|
+
def out_of_range?(indeces)
|
13
|
+
indeces.max >= size || indeces.min < -size
|
14
|
+
end
|
15
|
+
|
16
|
+
def integers?(enum)
|
17
|
+
enum.all?(Integer)
|
18
|
+
end
|
19
|
+
|
20
|
+
def sym_or_str?(enum)
|
21
|
+
enum.all? { |e| e.is_a?(Symbol) || e.is_a?(String) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def booleans?(enum)
|
25
|
+
enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
|
26
|
+
end
|
27
|
+
|
28
|
+
def create_dataframe_from_vector(key, vector)
|
29
|
+
DataFrame.new(key => vector.data)
|
30
|
+
end
|
31
|
+
|
32
|
+
def parse_to_vector(args)
|
33
|
+
a = args.reduce([]) do |accum, elem|
|
34
|
+
accum.concat(normalize_element(elem))
|
35
|
+
end
|
36
|
+
Vector.new(a)
|
37
|
+
end
|
38
|
+
|
39
|
+
def normalize_element(elem)
|
40
|
+
case elem
|
41
|
+
when Numeric, String, Symbol, TrueClass, FalseClass, NilClass
|
42
|
+
[elem]
|
43
|
+
when Range
|
44
|
+
both_end = [elem.begin, elem.end]
|
45
|
+
both_end[1] -= 1 if elem.exclude_end? && elem.end.is_a?(Integer)
|
46
|
+
|
47
|
+
if both_end.any?(Integer) || both_end.all?(&:nil?)
|
48
|
+
if both_end.any? { |e| e&.>=(size) || e&.<(-size) }
|
49
|
+
raise DataFrameArgumentError, "Index out of range: #{elem} for 0..#{size - 1}"
|
50
|
+
end
|
51
|
+
|
52
|
+
(0...size).to_a[elem]
|
53
|
+
else
|
54
|
+
elem.to_a
|
55
|
+
end
|
56
|
+
else
|
57
|
+
Array(elem)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/lib/red_amber/vector.rb
CHANGED
@@ -1,25 +1,37 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Values in variable (columnar) data object
|
5
5
|
# @data : holds Arrow::ChunkedArray
|
6
6
|
class Vector
|
7
7
|
# mix-in
|
8
|
-
include VectorCompensable
|
9
8
|
include VectorFunctions
|
9
|
+
include VectorUpdatable
|
10
|
+
include VectorSelectable
|
11
|
+
include Helper
|
10
12
|
|
11
|
-
|
12
|
-
def initialize(array)
|
13
|
+
def initialize(*array)
|
13
14
|
@key = nil # default is 'headless'
|
14
|
-
|
15
|
-
|
16
|
-
@data = array.data
|
17
|
-
when Arrow::Array, Arrow::ChunkedArray
|
18
|
-
@data = array
|
19
|
-
when Array
|
20
|
-
@data = Arrow::Array.new(array)
|
15
|
+
if array.empty? || array[0].nil?
|
16
|
+
Vector.new([])
|
21
17
|
else
|
22
|
-
|
18
|
+
array.flatten!
|
19
|
+
case array[0]
|
20
|
+
when Vector
|
21
|
+
@data = array[0].data
|
22
|
+
return
|
23
|
+
when Arrow::Array, Arrow::ChunkedArray
|
24
|
+
@data = array[0]
|
25
|
+
return
|
26
|
+
when Range
|
27
|
+
@data = Arrow::Array.new(Array(array[0]))
|
28
|
+
return
|
29
|
+
end
|
30
|
+
begin
|
31
|
+
@data = Arrow::Array.new(Array(array))
|
32
|
+
rescue Error
|
33
|
+
raise VectorArgumentError, "Invalid argument: #{array}"
|
34
|
+
end
|
23
35
|
end
|
24
36
|
end
|
25
37
|
|
@@ -52,6 +64,16 @@ module RedAmber
|
|
52
64
|
alias_method :to_a, :values
|
53
65
|
alias_method :entries, :values
|
54
66
|
|
67
|
+
def indices
|
68
|
+
(0...size).to_a
|
69
|
+
end
|
70
|
+
alias_method :indexes, :indices
|
71
|
+
alias_method :indeces, :indices
|
72
|
+
|
73
|
+
def to_ary
|
74
|
+
to_a
|
75
|
+
end
|
76
|
+
|
55
77
|
def size
|
56
78
|
# only defined :length in Arrow?
|
57
79
|
@data.length
|
@@ -60,6 +82,10 @@ module RedAmber
|
|
60
82
|
alias_method :n_rows, :size
|
61
83
|
alias_method :nrow, :size
|
62
84
|
|
85
|
+
def empty?
|
86
|
+
size.zero?
|
87
|
+
end
|
88
|
+
|
63
89
|
def type
|
64
90
|
@data.value_type.nick.to_sym
|
65
91
|
end
|
@@ -124,5 +150,9 @@ module RedAmber
|
|
124
150
|
def n_nans
|
125
151
|
numeric? ? is_nan.to_a.count(true) : 0
|
126
152
|
end
|
153
|
+
|
154
|
+
def has_nil?
|
155
|
+
is_nil.any
|
156
|
+
end
|
127
157
|
end
|
128
158
|
end
|
@@ -16,11 +16,13 @@ module RedAmber
|
|
16
16
|
unary_aggregations.each do |function|
|
17
17
|
define_method(function) do |opts: nil|
|
18
18
|
datum = exec_func_unary(function, options: opts)
|
19
|
-
|
19
|
+
get_scalar(datum)
|
20
20
|
end
|
21
21
|
end
|
22
22
|
alias_method :median, :approximate_median
|
23
23
|
alias_method :count_uniq, :count_distinct
|
24
|
+
alias_method :all?, :all
|
25
|
+
alias_method :any?, :any
|
24
26
|
|
25
27
|
def unbiased_variance
|
26
28
|
variance(opts: { ddof: 1 })
|
@@ -47,7 +49,7 @@ module RedAmber
|
|
47
49
|
unary_element_wise.each do |function|
|
48
50
|
define_method(function) do |opts: nil|
|
49
51
|
datum = exec_func_unary(function, options: opts)
|
50
|
-
|
52
|
+
Vector.new(datum.value)
|
51
53
|
end
|
52
54
|
end
|
53
55
|
alias_method :is_nil, :is_null
|
@@ -72,12 +74,12 @@ module RedAmber
|
|
72
74
|
unary_element_wise_op.each do |function, operator|
|
73
75
|
define_method(function) do |opts: nil|
|
74
76
|
datum = exec_func_unary(function, options: opts)
|
75
|
-
|
77
|
+
Vector.new(datum.value)
|
76
78
|
end
|
77
79
|
|
78
80
|
define_method(operator) do |opts: nil|
|
79
81
|
datum = exec_func_unary(function, options: opts)
|
80
|
-
|
82
|
+
Vector.new(datum.value)
|
81
83
|
end
|
82
84
|
end
|
83
85
|
alias_method :not, :invert
|
@@ -95,7 +97,7 @@ module RedAmber
|
|
95
97
|
binary_element_wise.each do |function|
|
96
98
|
define_method(function) do |other, opts: nil|
|
97
99
|
datum = exec_func_binary(function, other, options: opts)
|
98
|
-
|
100
|
+
Vector.new(datum.value)
|
99
101
|
end
|
100
102
|
end
|
101
103
|
|
@@ -111,7 +113,7 @@ module RedAmber
|
|
111
113
|
logical_binary_element_wise.each do |method, function|
|
112
114
|
define_method(method) do |other, opts: nil|
|
113
115
|
datum = exec_func_binary(function, other, options: opts)
|
114
|
-
|
116
|
+
Vector.new(datum.value)
|
115
117
|
end
|
116
118
|
end
|
117
119
|
|
@@ -144,12 +146,12 @@ module RedAmber
|
|
144
146
|
binary_element_wise_op.each do |function, operator|
|
145
147
|
define_method(function) do |other, opts: nil|
|
146
148
|
datum = exec_func_binary(function, other, options: opts)
|
147
|
-
|
149
|
+
Vector.new(datum.value)
|
148
150
|
end
|
149
151
|
|
150
152
|
define_method(operator) do |other, opts: nil|
|
151
153
|
datum = exec_func_binary(function, other, options: opts)
|
152
|
-
|
154
|
+
Vector.new(datum.value)
|
153
155
|
end
|
154
156
|
end
|
155
157
|
alias_method :eq, :equal
|
@@ -159,8 +161,17 @@ module RedAmber
|
|
159
161
|
alias_method :lt, :less
|
160
162
|
alias_method :ne, :not_equal
|
161
163
|
|
164
|
+
def coerce(other)
|
165
|
+
case other
|
166
|
+
when Vector, Array, Arrow::Array
|
167
|
+
raise VectorArgumentError, "Size unmatch: #{size} != #{other.length}" unless size == other.length
|
168
|
+
|
169
|
+
[Vector.new(Array(other)), self]
|
170
|
+
end
|
171
|
+
[Vector.new(Array(other) * size), self]
|
172
|
+
end
|
173
|
+
|
162
174
|
# (array functions)
|
163
|
-
# array_filter, array_take
|
164
175
|
# dictionary_encode,
|
165
176
|
# partition_nth_indices,
|
166
177
|
# quarter, quarters_between,
|
@@ -192,17 +203,17 @@ module RedAmber
|
|
192
203
|
# strptime, subsecond, us_week, week, weeks_between, year, year_month_day, years_between
|
193
204
|
|
194
205
|
# (onditional)
|
195
|
-
# case_when, cast,
|
206
|
+
# case_when, cast,
|
196
207
|
|
197
208
|
# (indices)
|
198
209
|
# choose, index_in, index_in_meta_binary, indices_nonzero
|
199
210
|
|
200
211
|
# (others)
|
201
|
-
# coalesce,
|
202
|
-
#
|
212
|
+
# coalesce,
|
213
|
+
# is_in_meta_binary,
|
203
214
|
# list_element, list_flatten, list_parent_indices, list_value_length, make_struct,
|
204
215
|
# max_element_wise, min_element_wise, random, select_k_unstable,
|
205
|
-
#
|
216
|
+
# struct_field,
|
206
217
|
|
207
218
|
private # =======
|
208
219
|
|
@@ -221,7 +232,7 @@ module RedAmber
|
|
221
232
|
end
|
222
233
|
end
|
223
234
|
|
224
|
-
def
|
235
|
+
def get_scalar(datum)
|
225
236
|
output = datum.value
|
226
237
|
case output
|
227
238
|
when Arrow::StringScalar then output.to_s
|
@@ -232,10 +243,6 @@ module RedAmber
|
|
232
243
|
end
|
233
244
|
end
|
234
245
|
|
235
|
-
def take_out_element_wise(datum)
|
236
|
-
Vector.new(datum.value)
|
237
|
-
end
|
238
|
-
|
239
246
|
module_function # ======
|
240
247
|
|
241
248
|
def find(function_name)
|
@@ -0,0 +1,124 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
|
4
|
+
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
5
|
+
|
6
|
+
module RedAmber
|
7
|
+
# mix-ins for class Vector
|
8
|
+
# Functions to select some data.
|
9
|
+
module VectorSelectable
|
10
|
+
def drop_nil
|
11
|
+
datum = find(:drop_null).execute([data])
|
12
|
+
Vector.new(datum.value)
|
13
|
+
end
|
14
|
+
|
15
|
+
# vector calculation version of selection by indices
|
16
|
+
# TODO: support for option {boundscheck: true}
|
17
|
+
def take(*indices)
|
18
|
+
indices.flatten!
|
19
|
+
return Vector.new([]) if indices.empty?
|
20
|
+
|
21
|
+
indices = indices[0] if indices.one? && !indices[0].is_a?(Numeric)
|
22
|
+
indices = Vector.new(indices) unless indices.is_a?(Vector)
|
23
|
+
|
24
|
+
take_by_vector(indices) # returns sub Vector
|
25
|
+
end
|
26
|
+
|
27
|
+
# TODO: support for option {null_selection_behavior: :drop}
|
28
|
+
def filter(*booleans)
|
29
|
+
booleans.flatten!
|
30
|
+
return Vector.new([]) if booleans.empty?
|
31
|
+
|
32
|
+
b = booleans[0]
|
33
|
+
boolean_array =
|
34
|
+
case b
|
35
|
+
when Vector
|
36
|
+
raise VectorTypeError, 'Argument is not a boolean.' unless b.boolean?
|
37
|
+
|
38
|
+
b.data
|
39
|
+
when Arrow::BooleanArray
|
40
|
+
b
|
41
|
+
else
|
42
|
+
raise VectorTypeError, 'Argument is not a boolean.' unless booleans?(booleans)
|
43
|
+
|
44
|
+
Arrow::BooleanArray.new(booleans)
|
45
|
+
end
|
46
|
+
|
47
|
+
filter_by_array(boolean_array) # returns sub Vector
|
48
|
+
end
|
49
|
+
|
50
|
+
# @param indices
|
51
|
+
# @param booleans
|
52
|
+
def [](*args)
|
53
|
+
args.flatten!
|
54
|
+
return Vector.new([]) if args.empty?
|
55
|
+
|
56
|
+
arg = args[0]
|
57
|
+
case arg
|
58
|
+
when Vector
|
59
|
+
return take_by_vector(arg) if arg.numeric?
|
60
|
+
return filter_by_array(arg.data) if arg.boolean?
|
61
|
+
|
62
|
+
raise VectorTypeError, "Argument must be numeric or boolean: #{arg}"
|
63
|
+
when Arrow::BooleanArray
|
64
|
+
return filter_by_array(arg)
|
65
|
+
when Arrow::Array
|
66
|
+
array = arg
|
67
|
+
else
|
68
|
+
unless arg.is_a?(Numeric) || booleans?([arg])
|
69
|
+
raise VectorArgumentError, "Argument must be numeric or boolean: #{args}"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
array ||= Arrow::Array.new(args)
|
73
|
+
return filter_by_array(array) if array.is_a?(Arrow::BooleanArray)
|
74
|
+
|
75
|
+
vector = Vector.new(array)
|
76
|
+
return take_by_vector(vector) if vector.numeric?
|
77
|
+
|
78
|
+
raise VectorArgumentError, "Invalid argument: #{args}"
|
79
|
+
end
|
80
|
+
|
81
|
+
# @param values [Array, Arrow::Array, Vector]
|
82
|
+
def is_in(*values)
|
83
|
+
values.flatten!
|
84
|
+
array =
|
85
|
+
case values[0]
|
86
|
+
when Vector
|
87
|
+
values[0].data
|
88
|
+
when Arrow::Array
|
89
|
+
values[0]
|
90
|
+
end
|
91
|
+
array ||= data.class.new(values)
|
92
|
+
Vector.new(data.is_in(array))
|
93
|
+
end
|
94
|
+
|
95
|
+
# Arrow's support required
|
96
|
+
def index(element)
|
97
|
+
to_a.index(element)
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
# Accepts indices by numeric Vector
|
103
|
+
def take_by_vector(indices)
|
104
|
+
raise VectorTypeError, "Indices must be numeric Vector: #{indices}" unless indices.numeric?
|
105
|
+
raise VectorArgumentError, "Index out of range: #{indices.min}" if indices.min <= -size - 1
|
106
|
+
|
107
|
+
normalized_indices = (indices < 0).if_else(indices + size, indices) # normalize index from tail
|
108
|
+
raise VectorArgumentError, "Index out of range: #{normalized_indices.max}" if normalized_indices.max >= size
|
109
|
+
|
110
|
+
index_array = Arrow::UInt64ArrayBuilder.build(normalized_indices.data) # round to integer array
|
111
|
+
|
112
|
+
datum = find(:array_take).execute([data, index_array])
|
113
|
+
Vector.new(datum.value)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Accepts booleans by Arrow::BooleanArray
|
117
|
+
def filter_by_array(boolean_array)
|
118
|
+
raise VectorArgumentError, 'Booleans must be same size as self.' unless boolean_array.length == size
|
119
|
+
|
120
|
+
datum = find(:array_filter).execute([data, boolean_array])
|
121
|
+
Vector.new(datum.value)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|