red_amber 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +56 -22
- data/.yardopts +2 -0
- data/CHANGELOG.md +178 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +29 -30
- data/benchmark/basic.yml +7 -7
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -3
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +454 -85
- data/lib/red_amber/data_frame_combinable.rb +609 -115
- data/lib/red_amber/data_frame_displayable.rb +313 -34
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +623 -70
- data/lib/red_amber/data_frame_variable_operation.rb +452 -35
- data/lib/red_amber/group.rb +186 -22
- data/lib/red_amber/helper.rb +74 -14
- data/lib/red_amber/refinements.rb +26 -6
- data/lib/red_amber/subframes.rb +1101 -0
- data/lib/red_amber/vector.rb +362 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +506 -0
- data/lib/red_amber/vector_selectable.rb +265 -23
- data/lib/red_amber/vector_unary_element_wise.rb +529 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
@@ -1,242 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
|
4
|
-
# reference: https://arrow.apache.org/docs/cpp/compute.html
|
5
|
-
|
6
|
-
# Not implemented in Red Arrow 8.0.0
|
7
|
-
# divmod, # '%',
|
8
|
-
# true_unless_null
|
9
|
-
|
10
|
-
module RedAmber
|
11
|
-
# mix-ins for class Vector
|
12
|
-
module VectorFunctions
|
13
|
-
# [Unary aggregations]: vector.func => scalar
|
14
|
-
unary_aggregations =
|
15
|
-
%i[all any approximate_median count count_distinct max mean min min_max
|
16
|
-
product stddev sum variance]
|
17
|
-
unary_aggregations.each do |function|
|
18
|
-
define_method(function) do |**options|
|
19
|
-
datum = exec_func_unary(function, options)
|
20
|
-
get_scalar(datum)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
alias_method :median, :approximate_median
|
24
|
-
alias_method :count_uniq, :count_distinct
|
25
|
-
alias_method :all?, :all
|
26
|
-
alias_method :any?, :any
|
27
|
-
|
28
|
-
def unbiased_variance
|
29
|
-
variance(ddof: 1)
|
30
|
-
end
|
31
|
-
alias_method :var, :unbiased_variance
|
32
|
-
|
33
|
-
def sd
|
34
|
-
stddev(ddof: 1)
|
35
|
-
end
|
36
|
-
alias_method :std, :sd
|
37
|
-
|
38
|
-
# Return quantile
|
39
|
-
# 0.5 quantile (median) is returned by default.
|
40
|
-
# Or return quantile for specified probability (prob).
|
41
|
-
# If quantile lies between two data points, interpolated value is
|
42
|
-
# returned based on selected interpolation method.
|
43
|
-
# Nils and NaNs are ignored.
|
44
|
-
# Nil is returned if there are no valid data point.
|
45
|
-
#
|
46
|
-
# @param prob [Float] probability.
|
47
|
-
# @param interpolation [Symbol] specifies interpolation method to use,
|
48
|
-
# when the quantile lies between the data i and j.
|
49
|
-
# - Default value is :linear, which returns i + (j - i) * fraction.
|
50
|
-
# - :lower returns i.
|
51
|
-
# - :higher returns j.
|
52
|
-
# - :nearest returns i or j, whichever is closer.
|
53
|
-
# - :midpoint returns (i + j) / 2.
|
54
|
-
# @param skip_nils [Boolean] wheather to ignore nil.
|
55
|
-
# @param min_count [Integer] min count.
|
56
|
-
# @return [Float] quantile.
|
57
|
-
def quantile(prob = 0.5, interpolation: :linear, skip_nils: true, min_count: 0)
|
58
|
-
unless (0..1).cover? prob
|
59
|
-
raise VectorArgumentError,
|
60
|
-
"Invalid: probability #{prob} must be between 0 and 1"
|
61
|
-
end
|
62
|
-
|
63
|
-
datum = find(:quantile).execute([data],
|
64
|
-
q: prob,
|
65
|
-
interpolation: interpolation,
|
66
|
-
skip_nulls: skip_nils,
|
67
|
-
min_count: min_count)
|
68
|
-
datum.value.to_a.first
|
69
|
-
end
|
70
|
-
|
71
|
-
# Return quantiles in a DataFrame
|
72
|
-
#
|
73
|
-
def quantiles(probs = [1.0, 0.75, 0.5, 0.25, 0.0],
|
74
|
-
interpolation: :linear, skip_nils: true, min_count: 0)
|
75
|
-
if probs.empty? || !probs.all? { |q| (0..1).cover?(q) }
|
76
|
-
raise VectorArgumentError, "Invarid probavilities #{probs}"
|
77
|
-
end
|
78
|
-
|
79
|
-
DataFrame.new(
|
80
|
-
probs: probs,
|
81
|
-
quantiles: probs.map do |q|
|
82
|
-
quantile(q,
|
83
|
-
interpolation: interpolation, skip_nils: skip_nils,
|
84
|
-
min_count: min_count)
|
85
|
-
end
|
86
|
-
)
|
87
|
-
end
|
88
|
-
|
89
|
-
# [Unary element-wise]: vector.func => vector
|
90
|
-
unary_element_wise =
|
91
|
-
%i[abs acos asin array_sort_indices atan bit_wise_not ceil cos
|
92
|
-
fill_null_backward fill_null_forward floor
|
93
|
-
is_finite is_inf is_nan is_null is_valid ln log10 log1p log2
|
94
|
-
round round_to_multiple sign sin tan trunc unique]
|
95
|
-
unary_element_wise.each do |function|
|
96
|
-
define_method(function) do |**options|
|
97
|
-
datum = exec_func_unary(function, options)
|
98
|
-
Vector.create(datum.value)
|
99
|
-
end
|
100
|
-
end
|
101
|
-
alias_method :is_nil, :is_null
|
102
|
-
|
103
|
-
def is_na
|
104
|
-
numeric? ? (is_nil | is_nan) : is_nil
|
105
|
-
end
|
106
|
-
|
107
|
-
alias_method :fill_nil_backward, :fill_null_backward
|
108
|
-
alias_method :fill_nil_forward, :fill_null_forward
|
109
|
-
|
110
|
-
alias_method :sort_indexes, :array_sort_indices
|
111
|
-
alias_method :sort_indices, :array_sort_indices
|
112
|
-
alias_method :sort_index, :array_sort_indices
|
113
|
-
|
114
|
-
alias_method :uniq, :unique
|
115
|
-
|
116
|
-
# [Unary element-wise with operator]: vector.func => vector, op vector
|
117
|
-
unary_element_wise_op = {
|
118
|
-
invert: '!',
|
119
|
-
negate: '-@',
|
120
|
-
}
|
121
|
-
unary_element_wise_op.each do |function, operator|
|
122
|
-
define_method(function) do |**options|
|
123
|
-
datum = exec_func_unary(function, options)
|
124
|
-
Vector.create(datum.value)
|
125
|
-
end
|
126
|
-
|
127
|
-
define_method(operator) do |**options|
|
128
|
-
datum = exec_func_unary(function, options)
|
129
|
-
Vector.create(datum.value)
|
130
|
-
end
|
131
|
-
end
|
132
|
-
alias_method :not, :invert
|
133
|
-
|
134
|
-
# [Binary element-wise]: vector.func(other) => vector
|
135
|
-
binary_element_wise =
|
136
|
-
%i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor logb]
|
137
|
-
binary_element_wise.each do |function|
|
138
|
-
define_method(function) do |other, **options|
|
139
|
-
datum = exec_func_binary(function, other, options)
|
140
|
-
Vector.create(datum.value)
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
# [Logical binary element-wise]: vector.func(other) => vector
|
145
|
-
logical_binary_element_wise = {
|
146
|
-
'&': :and_kleene,
|
147
|
-
and_kleene: :and_kleene,
|
148
|
-
and_org: :and,
|
149
|
-
'|': :or_kleene,
|
150
|
-
or_kleene: :or_kleene,
|
151
|
-
or_org: :or,
|
152
|
-
}
|
153
|
-
logical_binary_element_wise.each do |method, function|
|
154
|
-
define_method(method) do |other, **options|
|
155
|
-
datum = exec_func_binary(function, other, options)
|
156
|
-
Vector.create(datum.value)
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
|
-
# [Binary element-wise with operator]: vector.func(other) => vector
|
161
|
-
binary_element_wise_op = {
|
162
|
-
add: '+',
|
163
|
-
divide: '/',
|
164
|
-
multiply: '*',
|
165
|
-
power: '**',
|
166
|
-
subtract: '-',
|
167
|
-
|
168
|
-
xor: '^',
|
169
|
-
shift_left: '<<',
|
170
|
-
shift_right: '>>',
|
171
|
-
|
172
|
-
equal: '==',
|
173
|
-
greater: '>',
|
174
|
-
greater_equal: '>=',
|
175
|
-
less: '<',
|
176
|
-
less_equal: '<=',
|
177
|
-
not_equal: '!=',
|
178
|
-
}
|
179
|
-
binary_element_wise_op.each do |function, operator|
|
180
|
-
define_method(function) do |other, **options|
|
181
|
-
datum = exec_func_binary(function, other, options)
|
182
|
-
Vector.create(datum.value)
|
183
|
-
end
|
184
|
-
|
185
|
-
define_method(operator) do |other, **options|
|
186
|
-
datum = exec_func_binary(function, other, options)
|
187
|
-
Vector.create(datum.value)
|
188
|
-
end
|
189
|
-
end
|
190
|
-
alias_method :eq, :equal
|
191
|
-
alias_method :ge, :greater_equal
|
192
|
-
alias_method :gt, :greater
|
193
|
-
alias_method :le, :less_equal
|
194
|
-
alias_method :lt, :less
|
195
|
-
alias_method :ne, :not_equal
|
196
|
-
|
197
|
-
def coerce(other)
|
198
|
-
[Vector.new(Array(other) * size), self]
|
199
|
-
end
|
200
|
-
|
201
|
-
private # =======
|
202
|
-
|
203
|
-
def exec_func_unary(function, options)
|
204
|
-
options = nil if options.empty?
|
205
|
-
find(function).execute([data], options)
|
206
|
-
end
|
207
|
-
|
208
|
-
def exec_func_binary(function, other, options)
|
209
|
-
options = nil if options.empty?
|
210
|
-
case other
|
211
|
-
when Vector
|
212
|
-
find(function).execute([data, other.data], options)
|
213
|
-
when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar,
|
214
|
-
Array, Numeric, String, TrueClass, FalseClass
|
215
|
-
find(function).execute([data, other], options)
|
216
|
-
end
|
217
|
-
end
|
218
|
-
|
219
|
-
def get_scalar(datum)
|
220
|
-
output = datum.value
|
221
|
-
case output
|
222
|
-
when Arrow::StringScalar then output.to_s
|
223
|
-
when Arrow::StructScalar
|
224
|
-
output.value.map { |s| s.is_a?(Arrow::StringScalar) ? s.to_s : s.value }
|
225
|
-
else
|
226
|
-
output.value
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
|
-
module_function # ======
|
231
|
-
|
232
|
-
def find(function_name)
|
233
|
-
Arrow::Function.find(function_name)
|
234
|
-
end
|
235
|
-
|
236
|
-
# temporary API until RedAmber document prepared.
|
237
|
-
def arrow_doc(function_name)
|
238
|
-
f = find(function_name)
|
239
|
-
"#{f}\n#{'-' * function_name.size}\n#{f.doc.description}"
|
240
|
-
end
|
241
|
-
end
|
242
|
-
end
|