red_amber 0.1.5 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +33 -5
- data/.rubocop_todo.yml +2 -15
- data/.yardopts +1 -0
- data/CHANGELOG.md +164 -18
- data/Gemfile +6 -1
- data/README.md +247 -33
- data/Rakefile +1 -0
- data/benchmark/csv_load_penguins.yml +1 -1
- data/doc/DataFrame.md +383 -219
- data/doc/Vector.md +247 -37
- data/doc/examples_of_red_amber.ipynb +5454 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red-amber.rb +3 -0
- data/lib/red_amber/data_frame.rb +62 -10
- data/lib/red_amber/data_frame_displayable.rb +86 -9
- data/lib/red_amber/data_frame_selectable.rb +151 -32
- data/lib/red_amber/data_frame_variable_operation.rb +4 -0
- data/lib/red_amber/group.rb +59 -0
- data/lib/red_amber/helper.rb +61 -0
- data/lib/red_amber/vector.rb +59 -15
- data/lib/red_amber/vector_functions.rb +47 -38
- data/lib/red_amber/vector_selectable.rb +126 -0
- data/lib/red_amber/vector_updatable.rb +125 -0
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +6 -3
- data/red_amber.gemspec +0 -2
- metadata +9 -33
- data/lib/red_amber/data_frame_helper.rb +0 -64
- data/lib/red_amber/data_frame_observation_operation.rb +0 -83
- data/lib/red_amber/vector_compensable.rb +0 -68
data/lib/red_amber/vector.rb
CHANGED
@@ -1,25 +1,37 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
#
|
4
|
+
# Values in variable (columnar) data object
|
5
5
|
# @data : holds Arrow::ChunkedArray
|
6
6
|
class Vector
|
7
7
|
# mix-in
|
8
|
-
include VectorCompensable
|
9
8
|
include VectorFunctions
|
9
|
+
include VectorUpdatable
|
10
|
+
include VectorSelectable
|
11
|
+
include Helper
|
10
12
|
|
11
|
-
|
12
|
-
def initialize(array)
|
13
|
+
def initialize(*array)
|
13
14
|
@key = nil # default is 'headless'
|
14
|
-
|
15
|
-
|
16
|
-
@data = array.data
|
17
|
-
when Arrow::Array, Arrow::ChunkedArray
|
18
|
-
@data = array
|
19
|
-
when Array
|
20
|
-
@data = Arrow::Array.new(array)
|
15
|
+
if array.empty? || array[0].nil?
|
16
|
+
Vector.new([])
|
21
17
|
else
|
22
|
-
|
18
|
+
array.flatten!
|
19
|
+
case array[0]
|
20
|
+
when Vector
|
21
|
+
@data = array[0].data
|
22
|
+
return
|
23
|
+
when Arrow::Array, Arrow::ChunkedArray
|
24
|
+
@data = array[0]
|
25
|
+
return
|
26
|
+
when Range
|
27
|
+
@data = Arrow::Array.new(Array(array[0]))
|
28
|
+
return
|
29
|
+
end
|
30
|
+
begin
|
31
|
+
@data = Arrow::Array.new(Array(array))
|
32
|
+
rescue Error
|
33
|
+
raise VectorArgumentError, "Invalid argument: #{array}"
|
34
|
+
end
|
23
35
|
end
|
24
36
|
end
|
25
37
|
|
@@ -52,6 +64,16 @@ module RedAmber
|
|
52
64
|
alias_method :to_a, :values
|
53
65
|
alias_method :entries, :values
|
54
66
|
|
67
|
+
def indices
|
68
|
+
(0...size).to_a
|
69
|
+
end
|
70
|
+
alias_method :indexes, :indices
|
71
|
+
alias_method :indeces, :indices
|
72
|
+
|
73
|
+
def to_ary
|
74
|
+
to_a
|
75
|
+
end
|
76
|
+
|
55
77
|
def size
|
56
78
|
# only defined :length in Arrow?
|
57
79
|
@data.length
|
@@ -60,20 +82,32 @@ module RedAmber
|
|
60
82
|
alias_method :n_rows, :size
|
61
83
|
alias_method :nrow, :size
|
62
84
|
|
85
|
+
def empty?
|
86
|
+
size.zero?
|
87
|
+
end
|
88
|
+
|
63
89
|
def type
|
64
90
|
@data.value_type.nick.to_sym
|
65
91
|
end
|
66
92
|
|
67
93
|
def boolean?
|
68
|
-
|
94
|
+
type_class == Arrow::BooleanDataType
|
69
95
|
end
|
70
96
|
|
71
97
|
def numeric?
|
72
98
|
type_class < Arrow::NumericDataType
|
73
99
|
end
|
74
100
|
|
101
|
+
def float?
|
102
|
+
type_class < Arrow::FloatingPointDataType
|
103
|
+
end
|
104
|
+
|
105
|
+
def integer?
|
106
|
+
type_class < Arrow::IntegerDataType
|
107
|
+
end
|
108
|
+
|
75
109
|
def string?
|
76
|
-
|
110
|
+
type_class == Arrow::StringDataType
|
77
111
|
end
|
78
112
|
|
79
113
|
def temporal?
|
@@ -84,7 +118,13 @@ module RedAmber
|
|
84
118
|
@data.value_data_type.class
|
85
119
|
end
|
86
120
|
|
87
|
-
|
121
|
+
def each
|
122
|
+
return enum_for(:each) unless block_given?
|
123
|
+
|
124
|
+
size.times do |i|
|
125
|
+
yield self[i]
|
126
|
+
end
|
127
|
+
end
|
88
128
|
|
89
129
|
def chunked?
|
90
130
|
@data.is_a? Arrow::ChunkedArray
|
@@ -124,5 +164,9 @@ module RedAmber
|
|
124
164
|
def n_nans
|
125
165
|
numeric? ? is_nan.to_a.count(true) : 0
|
126
166
|
end
|
167
|
+
|
168
|
+
def has_nil?
|
169
|
+
is_nil.any
|
170
|
+
end
|
127
171
|
end
|
128
172
|
end
|
@@ -14,21 +14,23 @@ module RedAmber
|
|
14
14
|
unary_aggregations =
|
15
15
|
%i[all any approximate_median count count_distinct max mean min min_max product stddev sum variance]
|
16
16
|
unary_aggregations.each do |function|
|
17
|
-
define_method(function) do |
|
18
|
-
datum = exec_func_unary(function, options
|
19
|
-
|
17
|
+
define_method(function) do |**options|
|
18
|
+
datum = exec_func_unary(function, options)
|
19
|
+
get_scalar(datum)
|
20
20
|
end
|
21
21
|
end
|
22
22
|
alias_method :median, :approximate_median
|
23
23
|
alias_method :count_uniq, :count_distinct
|
24
|
+
alias_method :all?, :all
|
25
|
+
alias_method :any?, :any
|
24
26
|
|
25
27
|
def unbiased_variance
|
26
|
-
variance(
|
28
|
+
variance(ddof: 1)
|
27
29
|
end
|
28
30
|
alias_method :var, :unbiased_variance
|
29
31
|
|
30
32
|
def sd
|
31
|
-
stddev(
|
33
|
+
stddev(ddof: 1)
|
32
34
|
end
|
33
35
|
alias_method :std, :sd
|
34
36
|
|
@@ -45,9 +47,9 @@ module RedAmber
|
|
45
47
|
%i[abs array_sort_indices atan bit_wise_not ceil cos fill_null_backward fill_null_forward floor is_finite
|
46
48
|
is_inf is_nan is_null is_valid round round_to_multiple sign sin tan trunc unique]
|
47
49
|
unary_element_wise.each do |function|
|
48
|
-
define_method(function) do |
|
49
|
-
datum = exec_func_unary(function, options
|
50
|
-
|
50
|
+
define_method(function) do |**options|
|
51
|
+
datum = exec_func_unary(function, options)
|
52
|
+
Vector.new(datum.value)
|
51
53
|
end
|
52
54
|
end
|
53
55
|
alias_method :is_nil, :is_null
|
@@ -70,14 +72,14 @@ module RedAmber
|
|
70
72
|
negate: '-@',
|
71
73
|
}
|
72
74
|
unary_element_wise_op.each do |function, operator|
|
73
|
-
define_method(function) do |
|
74
|
-
datum = exec_func_unary(function, options
|
75
|
-
|
75
|
+
define_method(function) do |**options|
|
76
|
+
datum = exec_func_unary(function, options)
|
77
|
+
Vector.new(datum.value)
|
76
78
|
end
|
77
79
|
|
78
|
-
define_method(operator) do |
|
79
|
-
datum = exec_func_unary(function, options
|
80
|
-
|
80
|
+
define_method(operator) do |**options|
|
81
|
+
datum = exec_func_unary(function, options)
|
82
|
+
Vector.new(datum.value)
|
81
83
|
end
|
82
84
|
end
|
83
85
|
alias_method :not, :invert
|
@@ -93,9 +95,9 @@ module RedAmber
|
|
93
95
|
binary_element_wise =
|
94
96
|
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor]
|
95
97
|
binary_element_wise.each do |function|
|
96
|
-
define_method(function) do |other,
|
97
|
-
datum = exec_func_binary(function, other, options
|
98
|
-
|
98
|
+
define_method(function) do |other, **options|
|
99
|
+
datum = exec_func_binary(function, other, options)
|
100
|
+
Vector.new(datum.value)
|
99
101
|
end
|
100
102
|
end
|
101
103
|
|
@@ -109,9 +111,9 @@ module RedAmber
|
|
109
111
|
or_org: :or,
|
110
112
|
}
|
111
113
|
logical_binary_element_wise.each do |method, function|
|
112
|
-
define_method(method) do |other,
|
113
|
-
datum = exec_func_binary(function, other, options
|
114
|
-
|
114
|
+
define_method(method) do |other, **options|
|
115
|
+
datum = exec_func_binary(function, other, options)
|
116
|
+
Vector.new(datum.value)
|
115
117
|
end
|
116
118
|
end
|
117
119
|
|
@@ -142,14 +144,14 @@ module RedAmber
|
|
142
144
|
not_equal: '!=',
|
143
145
|
}
|
144
146
|
binary_element_wise_op.each do |function, operator|
|
145
|
-
define_method(function) do |other,
|
146
|
-
datum = exec_func_binary(function, other, options
|
147
|
-
|
147
|
+
define_method(function) do |other, **options|
|
148
|
+
datum = exec_func_binary(function, other, options)
|
149
|
+
Vector.new(datum.value)
|
148
150
|
end
|
149
151
|
|
150
|
-
define_method(operator) do |other,
|
151
|
-
datum = exec_func_binary(function, other, options
|
152
|
-
|
152
|
+
define_method(operator) do |other, **options|
|
153
|
+
datum = exec_func_binary(function, other, options)
|
154
|
+
Vector.new(datum.value)
|
153
155
|
end
|
154
156
|
end
|
155
157
|
alias_method :eq, :equal
|
@@ -159,8 +161,17 @@ module RedAmber
|
|
159
161
|
alias_method :lt, :less
|
160
162
|
alias_method :ne, :not_equal
|
161
163
|
|
164
|
+
def coerce(other)
|
165
|
+
case other
|
166
|
+
when Vector, Array, Arrow::Array
|
167
|
+
raise VectorArgumentError, "Size unmatch: #{size} != #{other.length}" unless size == other.length
|
168
|
+
|
169
|
+
[Vector.new(Array(other)), self]
|
170
|
+
end
|
171
|
+
[Vector.new(Array(other) * size), self]
|
172
|
+
end
|
173
|
+
|
162
174
|
# (array functions)
|
163
|
-
# array_filter, array_take
|
164
175
|
# dictionary_encode,
|
165
176
|
# partition_nth_indices,
|
166
177
|
# quarter, quarters_between,
|
@@ -192,25 +203,27 @@ module RedAmber
|
|
192
203
|
# strptime, subsecond, us_week, week, weeks_between, year, year_month_day, years_between
|
193
204
|
|
194
205
|
# (onditional)
|
195
|
-
# case_when, cast,
|
206
|
+
# case_when, cast,
|
196
207
|
|
197
208
|
# (indices)
|
198
209
|
# choose, index_in, index_in_meta_binary, indices_nonzero
|
199
210
|
|
200
211
|
# (others)
|
201
|
-
# coalesce,
|
202
|
-
#
|
212
|
+
# coalesce,
|
213
|
+
# is_in_meta_binary,
|
203
214
|
# list_element, list_flatten, list_parent_indices, list_value_length, make_struct,
|
204
215
|
# max_element_wise, min_element_wise, random, select_k_unstable,
|
205
|
-
#
|
216
|
+
# struct_field,
|
206
217
|
|
207
218
|
private # =======
|
208
219
|
|
209
|
-
def exec_func_unary(function, options
|
220
|
+
def exec_func_unary(function, options)
|
221
|
+
options = nil if options.empty?
|
210
222
|
find(function).execute([data], options)
|
211
223
|
end
|
212
224
|
|
213
|
-
def exec_func_binary(function, other, options
|
225
|
+
def exec_func_binary(function, other, options)
|
226
|
+
options = nil if options.empty?
|
214
227
|
case other
|
215
228
|
when Vector
|
216
229
|
find(function).execute([data, other.data], options)
|
@@ -221,7 +234,7 @@ module RedAmber
|
|
221
234
|
end
|
222
235
|
end
|
223
236
|
|
224
|
-
def
|
237
|
+
def get_scalar(datum)
|
225
238
|
output = datum.value
|
226
239
|
case output
|
227
240
|
when Arrow::StringScalar then output.to_s
|
@@ -232,10 +245,6 @@ module RedAmber
|
|
232
245
|
end
|
233
246
|
end
|
234
247
|
|
235
|
-
def take_out_element_wise(datum)
|
236
|
-
Vector.new(datum.value)
|
237
|
-
end
|
238
|
-
|
239
248
|
module_function # ======
|
240
249
|
|
241
250
|
def find(function_name)
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
|
4
|
+
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
5
|
+
|
6
|
+
module RedAmber
|
7
|
+
# mix-ins for class Vector
|
8
|
+
# Functions to select some data.
|
9
|
+
module VectorSelectable
|
10
|
+
def drop_nil
|
11
|
+
datum = find(:drop_null).execute([data])
|
12
|
+
Vector.new(datum.value)
|
13
|
+
end
|
14
|
+
|
15
|
+
# vector calculation version of selection by indices
|
16
|
+
# TODO: support for option {boundscheck: true}
|
17
|
+
def take(*indices)
|
18
|
+
indices.flatten!
|
19
|
+
return Vector.new([]) if indices.empty?
|
20
|
+
|
21
|
+
indices = indices[0] if indices.one? && !indices[0].is_a?(Numeric)
|
22
|
+
indices = Vector.new(indices) unless indices.is_a?(Vector)
|
23
|
+
|
24
|
+
take_by_vector(indices) # returns sub Vector
|
25
|
+
end
|
26
|
+
|
27
|
+
# TODO: support for option {null_selection_behavior: :drop}
|
28
|
+
def filter(*booleans)
|
29
|
+
booleans.flatten!
|
30
|
+
return Vector.new([]) if booleans.empty?
|
31
|
+
|
32
|
+
b = booleans[0]
|
33
|
+
boolean_array =
|
34
|
+
case b
|
35
|
+
when Vector
|
36
|
+
raise VectorTypeError, 'Argument is not a boolean.' unless b.boolean?
|
37
|
+
|
38
|
+
b.data
|
39
|
+
when Arrow::BooleanArray
|
40
|
+
b
|
41
|
+
else
|
42
|
+
raise VectorTypeError, 'Argument is not a boolean.' unless booleans?(booleans)
|
43
|
+
|
44
|
+
Arrow::BooleanArray.new(booleans)
|
45
|
+
end
|
46
|
+
|
47
|
+
filter_by_array(boolean_array) # returns sub Vector
|
48
|
+
end
|
49
|
+
|
50
|
+
# @param indices
|
51
|
+
# @param booleans
|
52
|
+
def [](*args)
|
53
|
+
args.flatten!
|
54
|
+
return Vector.new([]) if args.empty?
|
55
|
+
|
56
|
+
arg = args[0]
|
57
|
+
case arg
|
58
|
+
when Vector
|
59
|
+
return take_by_vector(arg) if arg.numeric?
|
60
|
+
return filter_by_array(arg.data) if arg.boolean?
|
61
|
+
|
62
|
+
raise VectorTypeError, "Argument must be numeric or boolean: #{arg}"
|
63
|
+
when Arrow::BooleanArray
|
64
|
+
return filter_by_array(arg)
|
65
|
+
when Arrow::Array
|
66
|
+
array = arg
|
67
|
+
when Range
|
68
|
+
array = normalize_element(arg)
|
69
|
+
else
|
70
|
+
unless arg.is_a?(Numeric) || booleans?([arg])
|
71
|
+
raise VectorArgumentError, "Argument must be numeric or boolean: #{args}"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
array ||= Arrow::Array.new(args)
|
75
|
+
return filter_by_array(array) if array.is_a?(Arrow::BooleanArray)
|
76
|
+
|
77
|
+
vector = Vector.new(array)
|
78
|
+
return take_by_vector(vector) if vector.numeric?
|
79
|
+
|
80
|
+
raise VectorArgumentError, "Invalid argument: #{args}"
|
81
|
+
end
|
82
|
+
|
83
|
+
# @param values [Array, Arrow::Array, Vector]
|
84
|
+
def is_in(*values)
|
85
|
+
values.flatten!
|
86
|
+
array =
|
87
|
+
case values[0]
|
88
|
+
when Vector
|
89
|
+
values[0].data
|
90
|
+
when Arrow::Array
|
91
|
+
values[0]
|
92
|
+
end
|
93
|
+
array ||= data.class.new(values)
|
94
|
+
Vector.new(data.is_in(array))
|
95
|
+
end
|
96
|
+
|
97
|
+
# Arrow's support required
|
98
|
+
def index(element)
|
99
|
+
to_a.index(element)
|
100
|
+
end
|
101
|
+
|
102
|
+
private
|
103
|
+
|
104
|
+
# Accepts indices by numeric Vector
|
105
|
+
def take_by_vector(indices)
|
106
|
+
raise VectorTypeError, "Indices must be numeric Vector: #{indices}" unless indices.numeric?
|
107
|
+
raise VectorArgumentError, "Index out of range: #{indices.min}" if indices.min <= -size - 1
|
108
|
+
|
109
|
+
normalized_indices = (indices < 0).if_else(indices + size, indices) # normalize index from tail
|
110
|
+
raise VectorArgumentError, "Index out of range: #{normalized_indices.max}" if normalized_indices.max >= size
|
111
|
+
|
112
|
+
index_array = Arrow::UInt64ArrayBuilder.build(normalized_indices.data) # round to integer array
|
113
|
+
|
114
|
+
datum = find(:array_take).execute([data, index_array])
|
115
|
+
Vector.new(datum.value)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Accepts booleans by Arrow::BooleanArray
|
119
|
+
def filter_by_array(boolean_array)
|
120
|
+
raise VectorArgumentError, 'Booleans must be same size as self.' unless boolean_array.length == size
|
121
|
+
|
122
|
+
datum = find(:array_filter).execute([data, boolean_array])
|
123
|
+
Vector.new(datum.value)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
|
4
|
+
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
5
|
+
|
6
|
+
module RedAmber
|
7
|
+
# mix-ins for class Vector
|
8
|
+
# Functions to make up some data (especially missing) for new data.
|
9
|
+
module VectorUpdatable
|
10
|
+
# Replace data
|
11
|
+
# @param arg [Array, Vector, Arrow::Array] index specifier
|
12
|
+
# @param replacer [Array, Vector, Arrow::Array] new data to replace for.
|
13
|
+
# @return [Vector] Replaced new Vector
|
14
|
+
def replace(args, replacer)
|
15
|
+
args =
|
16
|
+
case args
|
17
|
+
when Array
|
18
|
+
args
|
19
|
+
when Range
|
20
|
+
normalize_element(args)
|
21
|
+
else
|
22
|
+
Array(args)
|
23
|
+
end
|
24
|
+
replacer = Array(replacer)
|
25
|
+
return self if args.empty? || args[0].nil?
|
26
|
+
|
27
|
+
replacer = nil if replacer.empty?
|
28
|
+
vector = parse_to_vector(args)
|
29
|
+
booleans =
|
30
|
+
if vector.boolean?
|
31
|
+
vector
|
32
|
+
elsif vector.numeric?
|
33
|
+
replacer.sort_by! { |x| args[replacer.index(x)] } if replacer # rubocop:disable Style/SafeNavigation
|
34
|
+
Vector.new(indices).is_in(vector)
|
35
|
+
else
|
36
|
+
raise VectorArgumentError, "Invalid data type #{args}"
|
37
|
+
end
|
38
|
+
replace_with(booleans, replacer)
|
39
|
+
end
|
40
|
+
|
41
|
+
# (related functions)
|
42
|
+
# fill_null_backward, fill_null_forward
|
43
|
+
|
44
|
+
# [Ternary element-wise]: boolean_vector.func(if_true, else) => vector
|
45
|
+
def if_else(true_choice, false_choice)
|
46
|
+
true_choice = true_choice.data if true_choice.is_a? Vector
|
47
|
+
false_choice = false_choice.data if false_choice.is_a? Vector
|
48
|
+
raise VectorTypeError, 'Reciever must be a boolean' unless boolean?
|
49
|
+
|
50
|
+
datum = find(:if_else).execute([data, true_choice, false_choice])
|
51
|
+
Vector.new(datum.value)
|
52
|
+
end
|
53
|
+
|
54
|
+
# same behavior as Ruby's invert
|
55
|
+
# ![true, false, nil] #=> [false, true, true]
|
56
|
+
def primitive_invert
|
57
|
+
raise VectorTypeError, "Not a boolean Vector: #{self}" unless boolean?
|
58
|
+
|
59
|
+
is_nil.if_else(false, self).invert
|
60
|
+
end
|
61
|
+
|
62
|
+
def shift(amount = 1, fill: nil)
|
63
|
+
raise VectorArgumentError, 'Shift amount is too large' if amount.abs > size
|
64
|
+
|
65
|
+
if amount.positive?
|
66
|
+
replace(amount..-1, self[0...-amount]).replace(0...amount, fill)
|
67
|
+
elsif amount.negative?
|
68
|
+
replace(0...amount, self[-amount..]).replace(amount..-1, fill)
|
69
|
+
else # amount == 0
|
70
|
+
self
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
# [Ternary]: replace_with(booleans, replacements) => vector
|
77
|
+
# Replace items selected with a boolean mask
|
78
|
+
#
|
79
|
+
# (from Arrow C++ inline doc.)
|
80
|
+
# Given an array and a boolean mask (either scalar or of equal length),
|
81
|
+
# along with replacement values (either scalar or array),
|
82
|
+
# each element of the array for which the corresponding mask element is
|
83
|
+
# true will be replaced by the next value from the replacements,
|
84
|
+
# or with null if the mask is null.
|
85
|
+
# Hence, for replacement arrays, len(replacements) == sum(mask == true).
|
86
|
+
|
87
|
+
def replace_with(booleans, replacer = nil)
|
88
|
+
specifier =
|
89
|
+
if booleans.is_a?(Arrow::BooleanArray)
|
90
|
+
booleans
|
91
|
+
elsif booleans.is_a?(Vector) && booleans.boolean?
|
92
|
+
booleans.data
|
93
|
+
elsif booleans.is_a?(Array) && booleans?(booleans)
|
94
|
+
Arrow::BooleanArray.new(booleans)
|
95
|
+
else
|
96
|
+
raise VectorTypeError, 'Not a valid type'
|
97
|
+
end
|
98
|
+
raise VectorArgumentError, 'Booleans size unmatch' if specifier.length != size
|
99
|
+
raise VectorArgumentError, 'Booleans not have any `true`' unless specifier.any?
|
100
|
+
|
101
|
+
r = Array(replacer) # scalar to [scalar]
|
102
|
+
r = [nil] if r.empty?
|
103
|
+
|
104
|
+
replacer =
|
105
|
+
if r.size == 1
|
106
|
+
case replacer
|
107
|
+
when Arrow::Array then replacer
|
108
|
+
when Vector then replacer.data
|
109
|
+
else
|
110
|
+
Arrow::Array.new(r * specifier.to_a.count(true)) # broadcast
|
111
|
+
end
|
112
|
+
else
|
113
|
+
Arrow::Array.new(r)
|
114
|
+
end
|
115
|
+
replacer = data.class.new(replacer) if replacer.uniq == [nil]
|
116
|
+
|
117
|
+
raise VectorArgumentError, 'Replacements size unmatch' if Array(specifier).count(true) != replacer.length
|
118
|
+
|
119
|
+
values = replacer.class.new(data)
|
120
|
+
|
121
|
+
datum = find('replace_with_mask').execute([values, specifier, replacer])
|
122
|
+
Vector.new(datum.value)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
data/lib/red_amber/version.rb
CHANGED
data/lib/red_amber.rb
CHANGED
@@ -3,15 +3,16 @@
|
|
3
3
|
require 'arrow'
|
4
4
|
require 'rover-df'
|
5
5
|
|
6
|
+
require_relative 'red_amber/helper'
|
6
7
|
require_relative 'red_amber/data_frame_displayable'
|
7
|
-
require_relative 'red_amber/data_frame_helper'
|
8
8
|
require_relative 'red_amber/data_frame_indexable'
|
9
9
|
require_relative 'red_amber/data_frame_selectable'
|
10
|
-
require_relative 'red_amber/data_frame_observation_operation'
|
11
10
|
require_relative 'red_amber/data_frame_variable_operation'
|
12
11
|
require_relative 'red_amber/data_frame'
|
13
|
-
require_relative 'red_amber/
|
12
|
+
require_relative 'red_amber/group'
|
14
13
|
require_relative 'red_amber/vector_functions'
|
14
|
+
require_relative 'red_amber/vector_updatable'
|
15
|
+
require_relative 'red_amber/vector_selectable'
|
15
16
|
require_relative 'red_amber/vector'
|
16
17
|
require_relative 'red_amber/version'
|
17
18
|
|
@@ -23,4 +24,6 @@ module RedAmber
|
|
23
24
|
|
24
25
|
class VectorArgumentError < ArgumentError; end
|
25
26
|
class VectorTypeError < TypeError; end
|
27
|
+
|
28
|
+
class GroupArgumentError < ArgumentError; end
|
26
29
|
end
|
data/red_amber.gemspec
CHANGED
@@ -31,8 +31,6 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.require_paths = ['lib']
|
32
32
|
|
33
33
|
spec.add_dependency 'red-arrow', '>= 8.0.0'
|
34
|
-
spec.add_dependency 'red-parquet', '>= 8.0.0'
|
35
|
-
spec.add_dependency 'rover-df', '~> 0.3.0'
|
36
34
|
|
37
35
|
# Development dependency has gone to the Gemfile (rubygems/bundler#7237)
|
38
36
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red_amber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hirokazu SUZUKI (heronshoes)
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-08-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: red-arrow
|
@@ -24,34 +24,6 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 8.0.0
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: red-parquet
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 8.0.0
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: 8.0.0
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rover-df
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: 0.3.0
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: 0.3.0
|
55
27
|
description: RedAmber is a simple dataframe library inspired by Rover-df and powered
|
56
28
|
by Red Arrow.
|
57
29
|
email:
|
@@ -62,6 +34,7 @@ extra_rdoc_files: []
|
|
62
34
|
files:
|
63
35
|
- ".rubocop.yml"
|
64
36
|
- ".rubocop_todo.yml"
|
37
|
+
- ".yardopts"
|
65
38
|
- CHANGELOG.md
|
66
39
|
- Gemfile
|
67
40
|
- LICENSE
|
@@ -72,6 +45,7 @@ files:
|
|
72
45
|
- doc/CODE_OF_CONDUCT.md
|
73
46
|
- doc/DataFrame.md
|
74
47
|
- doc/Vector.md
|
48
|
+
- doc/examples_of_red_amber.ipynb
|
75
49
|
- doc/image/arrow_table_new.png
|
76
50
|
- doc/image/dataframe/assign.png
|
77
51
|
- doc/image/dataframe/drop.png
|
@@ -90,17 +64,19 @@ files:
|
|
90
64
|
- doc/image/vector/unary_element_wise.png
|
91
65
|
- doc/tdr.md
|
92
66
|
- doc/tdr_ja.md
|
67
|
+
- lib/red-amber.rb
|
93
68
|
- lib/red_amber.rb
|
94
69
|
- lib/red_amber/data_frame.rb
|
95
70
|
- lib/red_amber/data_frame_displayable.rb
|
96
|
-
- lib/red_amber/data_frame_helper.rb
|
97
71
|
- lib/red_amber/data_frame_indexable.rb
|
98
|
-
- lib/red_amber/data_frame_observation_operation.rb
|
99
72
|
- lib/red_amber/data_frame_selectable.rb
|
100
73
|
- lib/red_amber/data_frame_variable_operation.rb
|
74
|
+
- lib/red_amber/group.rb
|
75
|
+
- lib/red_amber/helper.rb
|
101
76
|
- lib/red_amber/vector.rb
|
102
|
-
- lib/red_amber/vector_compensable.rb
|
103
77
|
- lib/red_amber/vector_functions.rb
|
78
|
+
- lib/red_amber/vector_selectable.rb
|
79
|
+
- lib/red_amber/vector_updatable.rb
|
104
80
|
- lib/red_amber/version.rb
|
105
81
|
- red_amber.gemspec
|
106
82
|
- sig/red_amber.rbs
|