red_amber 0.3.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +56 -22
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +29 -30
  8. data/benchmark/basic.yml +7 -7
  9. data/benchmark/combine.yml +3 -3
  10. data/benchmark/dataframe.yml +15 -9
  11. data/benchmark/group.yml +6 -6
  12. data/benchmark/reshape.yml +6 -6
  13. data/benchmark/vector.yml +6 -3
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +454 -85
  20. data/lib/red_amber/data_frame_combinable.rb +609 -115
  21. data/lib/red_amber/data_frame_displayable.rb +313 -34
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +623 -70
  26. data/lib/red_amber/data_frame_variable_operation.rb +452 -35
  27. data/lib/red_amber/group.rb +186 -22
  28. data/lib/red_amber/helper.rb +74 -14
  29. data/lib/red_amber/refinements.rb +26 -6
  30. data/lib/red_amber/subframes.rb +1101 -0
  31. data/lib/red_amber/vector.rb +362 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +506 -0
  34. data/lib/red_amber/vector_selectable.rb +265 -23
  35. data/lib/red_amber/vector_unary_element_wise.rb +529 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -1,242 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
4
- # reference: https://arrow.apache.org/docs/cpp/compute.html
5
-
6
- # Not implemented in Red Arrow 8.0.0
7
- # divmod, # '%',
8
- # true_unless_null
9
-
10
- module RedAmber
11
- # mix-ins for class Vector
12
- module VectorFunctions
13
- # [Unary aggregations]: vector.func => scalar
14
- unary_aggregations =
15
- %i[all any approximate_median count count_distinct max mean min min_max
16
- product stddev sum variance]
17
- unary_aggregations.each do |function|
18
- define_method(function) do |**options|
19
- datum = exec_func_unary(function, options)
20
- get_scalar(datum)
21
- end
22
- end
23
- alias_method :median, :approximate_median
24
- alias_method :count_uniq, :count_distinct
25
- alias_method :all?, :all
26
- alias_method :any?, :any
27
-
28
- def unbiased_variance
29
- variance(ddof: 1)
30
- end
31
- alias_method :var, :unbiased_variance
32
-
33
- def sd
34
- stddev(ddof: 1)
35
- end
36
- alias_method :std, :sd
37
-
38
- # Return quantile
39
- # 0.5 quantile (median) is returned by default.
40
- # Or return quantile for specified probability (prob).
41
- # If quantile lies between two data points, interpolated value is
42
- # returned based on selected interpolation method.
43
- # Nils and NaNs are ignored.
44
- # Nil is returned if there are no valid data point.
45
- #
46
- # @param prob [Float] probability.
47
- # @param interpolation [Symbol] specifies interpolation method to use,
48
- # when the quantile lies between the data i and j.
49
- # - Default value is :linear, which returns i + (j - i) * fraction.
50
- # - :lower returns i.
51
- # - :higher returns j.
52
- # - :nearest returns i or j, whichever is closer.
53
- # - :midpoint returns (i + j) / 2.
54
- # @param skip_nils [Boolean] wheather to ignore nil.
55
- # @param min_count [Integer] min count.
56
- # @return [Float] quantile.
57
- def quantile(prob = 0.5, interpolation: :linear, skip_nils: true, min_count: 0)
58
- unless (0..1).cover? prob
59
- raise VectorArgumentError,
60
- "Invalid: probability #{prob} must be between 0 and 1"
61
- end
62
-
63
- datum = find(:quantile).execute([data],
64
- q: prob,
65
- interpolation: interpolation,
66
- skip_nulls: skip_nils,
67
- min_count: min_count)
68
- datum.value.to_a.first
69
- end
70
-
71
- # Return quantiles in a DataFrame
72
- #
73
- def quantiles(probs = [1.0, 0.75, 0.5, 0.25, 0.0],
74
- interpolation: :linear, skip_nils: true, min_count: 0)
75
- if probs.empty? || !probs.all? { |q| (0..1).cover?(q) }
76
- raise VectorArgumentError, "Invarid probavilities #{probs}"
77
- end
78
-
79
- DataFrame.new(
80
- probs: probs,
81
- quantiles: probs.map do |q|
82
- quantile(q,
83
- interpolation: interpolation, skip_nils: skip_nils,
84
- min_count: min_count)
85
- end
86
- )
87
- end
88
-
89
- # [Unary element-wise]: vector.func => vector
90
- unary_element_wise =
91
- %i[abs acos asin array_sort_indices atan bit_wise_not ceil cos
92
- fill_null_backward fill_null_forward floor
93
- is_finite is_inf is_nan is_null is_valid ln log10 log1p log2
94
- round round_to_multiple sign sin tan trunc unique]
95
- unary_element_wise.each do |function|
96
- define_method(function) do |**options|
97
- datum = exec_func_unary(function, options)
98
- Vector.create(datum.value)
99
- end
100
- end
101
- alias_method :is_nil, :is_null
102
-
103
- def is_na
104
- numeric? ? (is_nil | is_nan) : is_nil
105
- end
106
-
107
- alias_method :fill_nil_backward, :fill_null_backward
108
- alias_method :fill_nil_forward, :fill_null_forward
109
-
110
- alias_method :sort_indexes, :array_sort_indices
111
- alias_method :sort_indices, :array_sort_indices
112
- alias_method :sort_index, :array_sort_indices
113
-
114
- alias_method :uniq, :unique
115
-
116
- # [Unary element-wise with operator]: vector.func => vector, op vector
117
- unary_element_wise_op = {
118
- invert: '!',
119
- negate: '-@',
120
- }
121
- unary_element_wise_op.each do |function, operator|
122
- define_method(function) do |**options|
123
- datum = exec_func_unary(function, options)
124
- Vector.create(datum.value)
125
- end
126
-
127
- define_method(operator) do |**options|
128
- datum = exec_func_unary(function, options)
129
- Vector.create(datum.value)
130
- end
131
- end
132
- alias_method :not, :invert
133
-
134
- # [Binary element-wise]: vector.func(other) => vector
135
- binary_element_wise =
136
- %i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor logb]
137
- binary_element_wise.each do |function|
138
- define_method(function) do |other, **options|
139
- datum = exec_func_binary(function, other, options)
140
- Vector.create(datum.value)
141
- end
142
- end
143
-
144
- # [Logical binary element-wise]: vector.func(other) => vector
145
- logical_binary_element_wise = {
146
- '&': :and_kleene,
147
- and_kleene: :and_kleene,
148
- and_org: :and,
149
- '|': :or_kleene,
150
- or_kleene: :or_kleene,
151
- or_org: :or,
152
- }
153
- logical_binary_element_wise.each do |method, function|
154
- define_method(method) do |other, **options|
155
- datum = exec_func_binary(function, other, options)
156
- Vector.create(datum.value)
157
- end
158
- end
159
-
160
- # [Binary element-wise with operator]: vector.func(other) => vector
161
- binary_element_wise_op = {
162
- add: '+',
163
- divide: '/',
164
- multiply: '*',
165
- power: '**',
166
- subtract: '-',
167
-
168
- xor: '^',
169
- shift_left: '<<',
170
- shift_right: '>>',
171
-
172
- equal: '==',
173
- greater: '>',
174
- greater_equal: '>=',
175
- less: '<',
176
- less_equal: '<=',
177
- not_equal: '!=',
178
- }
179
- binary_element_wise_op.each do |function, operator|
180
- define_method(function) do |other, **options|
181
- datum = exec_func_binary(function, other, options)
182
- Vector.create(datum.value)
183
- end
184
-
185
- define_method(operator) do |other, **options|
186
- datum = exec_func_binary(function, other, options)
187
- Vector.create(datum.value)
188
- end
189
- end
190
- alias_method :eq, :equal
191
- alias_method :ge, :greater_equal
192
- alias_method :gt, :greater
193
- alias_method :le, :less_equal
194
- alias_method :lt, :less
195
- alias_method :ne, :not_equal
196
-
197
- def coerce(other)
198
- [Vector.new(Array(other) * size), self]
199
- end
200
-
201
- private # =======
202
-
203
- def exec_func_unary(function, options)
204
- options = nil if options.empty?
205
- find(function).execute([data], options)
206
- end
207
-
208
- def exec_func_binary(function, other, options)
209
- options = nil if options.empty?
210
- case other
211
- when Vector
212
- find(function).execute([data, other.data], options)
213
- when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar,
214
- Array, Numeric, String, TrueClass, FalseClass
215
- find(function).execute([data, other], options)
216
- end
217
- end
218
-
219
- def get_scalar(datum)
220
- output = datum.value
221
- case output
222
- when Arrow::StringScalar then output.to_s
223
- when Arrow::StructScalar
224
- output.value.map { |s| s.is_a?(Arrow::StringScalar) ? s.to_s : s.value }
225
- else
226
- output.value
227
- end
228
- end
229
-
230
- module_function # ======
231
-
232
- def find(function_name)
233
- Arrow::Function.find(function_name)
234
- end
235
-
236
- # temporary API until RedAmber document prepared.
237
- def arrow_doc(function_name)
238
- f = find(function_name)
239
- "#{f}\n#{'-' * function_name.size}\n#{f.doc.description}"
240
- end
241
- end
242
- end