red_amber 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +56 -22
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +178 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE +1 -1
  7. data/README.md +29 -30
  8. data/benchmark/basic.yml +7 -7
  9. data/benchmark/combine.yml +3 -3
  10. data/benchmark/dataframe.yml +15 -9
  11. data/benchmark/group.yml +6 -6
  12. data/benchmark/reshape.yml +6 -6
  13. data/benchmark/vector.yml +6 -3
  14. data/doc/DataFrame.md +32 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +207 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +454 -85
  20. data/lib/red_amber/data_frame_combinable.rb +609 -115
  21. data/lib/red_amber/data_frame_displayable.rb +313 -34
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +78 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +184 -14
  25. data/lib/red_amber/data_frame_selectable.rb +623 -70
  26. data/lib/red_amber/data_frame_variable_operation.rb +452 -35
  27. data/lib/red_amber/group.rb +186 -22
  28. data/lib/red_amber/helper.rb +74 -14
  29. data/lib/red_amber/refinements.rb +26 -6
  30. data/lib/red_amber/subframes.rb +1101 -0
  31. data/lib/red_amber/vector.rb +362 -11
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +506 -0
  34. data/lib/red_amber/vector_selectable.rb +265 -23
  35. data/lib/red_amber/vector_unary_element_wise.rb +529 -0
  36. data/lib/red_amber/vector_updatable.rb +278 -34
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +13 -1
  39. data/red_amber.gemspec +2 -2
  40. metadata +13 -8
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -242
@@ -1,242 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
4
- # reference: https://arrow.apache.org/docs/cpp/compute.html
5
-
6
- # Not implemented in Red Arrow 8.0.0
7
- # divmod, # '%',
8
- # true_unless_null
9
-
10
- module RedAmber
11
- # mix-ins for class Vector
12
- module VectorFunctions
13
- # [Unary aggregations]: vector.func => scalar
14
- unary_aggregations =
15
- %i[all any approximate_median count count_distinct max mean min min_max
16
- product stddev sum variance]
17
- unary_aggregations.each do |function|
18
- define_method(function) do |**options|
19
- datum = exec_func_unary(function, options)
20
- get_scalar(datum)
21
- end
22
- end
23
- alias_method :median, :approximate_median
24
- alias_method :count_uniq, :count_distinct
25
- alias_method :all?, :all
26
- alias_method :any?, :any
27
-
28
- def unbiased_variance
29
- variance(ddof: 1)
30
- end
31
- alias_method :var, :unbiased_variance
32
-
33
- def sd
34
- stddev(ddof: 1)
35
- end
36
- alias_method :std, :sd
37
-
38
- # Return quantile
39
- # 0.5 quantile (median) is returned by default.
40
- # Or return quantile for specified probability (prob).
41
- # If quantile lies between two data points, interpolated value is
42
- # returned based on selected interpolation method.
43
- # Nils and NaNs are ignored.
44
- # Nil is returned if there are no valid data point.
45
- #
46
- # @param prob [Float] probability.
47
- # @param interpolation [Symbol] specifies interpolation method to use,
48
- # when the quantile lies between the data i and j.
49
- # - Default value is :linear, which returns i + (j - i) * fraction.
50
- # - :lower returns i.
51
- # - :higher returns j.
52
- # - :nearest returns i or j, whichever is closer.
53
- # - :midpoint returns (i + j) / 2.
54
- # @param skip_nils [Boolean] wheather to ignore nil.
55
- # @param min_count [Integer] min count.
56
- # @return [Float] quantile.
57
- def quantile(prob = 0.5, interpolation: :linear, skip_nils: true, min_count: 0)
58
- unless (0..1).cover? prob
59
- raise VectorArgumentError,
60
- "Invalid: probability #{prob} must be between 0 and 1"
61
- end
62
-
63
- datum = find(:quantile).execute([data],
64
- q: prob,
65
- interpolation: interpolation,
66
- skip_nulls: skip_nils,
67
- min_count: min_count)
68
- datum.value.to_a.first
69
- end
70
-
71
- # Return quantiles in a DataFrame
72
- #
73
- def quantiles(probs = [1.0, 0.75, 0.5, 0.25, 0.0],
74
- interpolation: :linear, skip_nils: true, min_count: 0)
75
- if probs.empty? || !probs.all? { |q| (0..1).cover?(q) }
76
- raise VectorArgumentError, "Invarid probavilities #{probs}"
77
- end
78
-
79
- DataFrame.new(
80
- probs: probs,
81
- quantiles: probs.map do |q|
82
- quantile(q,
83
- interpolation: interpolation, skip_nils: skip_nils,
84
- min_count: min_count)
85
- end
86
- )
87
- end
88
-
89
- # [Unary element-wise]: vector.func => vector
90
- unary_element_wise =
91
- %i[abs acos asin array_sort_indices atan bit_wise_not ceil cos
92
- fill_null_backward fill_null_forward floor
93
- is_finite is_inf is_nan is_null is_valid ln log10 log1p log2
94
- round round_to_multiple sign sin tan trunc unique]
95
- unary_element_wise.each do |function|
96
- define_method(function) do |**options|
97
- datum = exec_func_unary(function, options)
98
- Vector.create(datum.value)
99
- end
100
- end
101
- alias_method :is_nil, :is_null
102
-
103
- def is_na
104
- numeric? ? (is_nil | is_nan) : is_nil
105
- end
106
-
107
- alias_method :fill_nil_backward, :fill_null_backward
108
- alias_method :fill_nil_forward, :fill_null_forward
109
-
110
- alias_method :sort_indexes, :array_sort_indices
111
- alias_method :sort_indices, :array_sort_indices
112
- alias_method :sort_index, :array_sort_indices
113
-
114
- alias_method :uniq, :unique
115
-
116
- # [Unary element-wise with operator]: vector.func => vector, op vector
117
- unary_element_wise_op = {
118
- invert: '!',
119
- negate: '-@',
120
- }
121
- unary_element_wise_op.each do |function, operator|
122
- define_method(function) do |**options|
123
- datum = exec_func_unary(function, options)
124
- Vector.create(datum.value)
125
- end
126
-
127
- define_method(operator) do |**options|
128
- datum = exec_func_unary(function, options)
129
- Vector.create(datum.value)
130
- end
131
- end
132
- alias_method :not, :invert
133
-
134
- # [Binary element-wise]: vector.func(other) => vector
135
- binary_element_wise =
136
- %i[atan2 and_not and_not_kleene bit_wise_and bit_wise_or bit_wise_xor logb]
137
- binary_element_wise.each do |function|
138
- define_method(function) do |other, **options|
139
- datum = exec_func_binary(function, other, options)
140
- Vector.create(datum.value)
141
- end
142
- end
143
-
144
- # [Logical binary element-wise]: vector.func(other) => vector
145
- logical_binary_element_wise = {
146
- '&': :and_kleene,
147
- and_kleene: :and_kleene,
148
- and_org: :and,
149
- '|': :or_kleene,
150
- or_kleene: :or_kleene,
151
- or_org: :or,
152
- }
153
- logical_binary_element_wise.each do |method, function|
154
- define_method(method) do |other, **options|
155
- datum = exec_func_binary(function, other, options)
156
- Vector.create(datum.value)
157
- end
158
- end
159
-
160
- # [Binary element-wise with operator]: vector.func(other) => vector
161
- binary_element_wise_op = {
162
- add: '+',
163
- divide: '/',
164
- multiply: '*',
165
- power: '**',
166
- subtract: '-',
167
-
168
- xor: '^',
169
- shift_left: '<<',
170
- shift_right: '>>',
171
-
172
- equal: '==',
173
- greater: '>',
174
- greater_equal: '>=',
175
- less: '<',
176
- less_equal: '<=',
177
- not_equal: '!=',
178
- }
179
- binary_element_wise_op.each do |function, operator|
180
- define_method(function) do |other, **options|
181
- datum = exec_func_binary(function, other, options)
182
- Vector.create(datum.value)
183
- end
184
-
185
- define_method(operator) do |other, **options|
186
- datum = exec_func_binary(function, other, options)
187
- Vector.create(datum.value)
188
- end
189
- end
190
- alias_method :eq, :equal
191
- alias_method :ge, :greater_equal
192
- alias_method :gt, :greater
193
- alias_method :le, :less_equal
194
- alias_method :lt, :less
195
- alias_method :ne, :not_equal
196
-
197
- def coerce(other)
198
- [Vector.new(Array(other) * size), self]
199
- end
200
-
201
- private # =======
202
-
203
- def exec_func_unary(function, options)
204
- options = nil if options.empty?
205
- find(function).execute([data], options)
206
- end
207
-
208
- def exec_func_binary(function, other, options)
209
- options = nil if options.empty?
210
- case other
211
- when Vector
212
- find(function).execute([data, other.data], options)
213
- when Arrow::Array, Arrow::ChunkedArray, Arrow::Scalar,
214
- Array, Numeric, String, TrueClass, FalseClass
215
- find(function).execute([data, other], options)
216
- end
217
- end
218
-
219
- def get_scalar(datum)
220
- output = datum.value
221
- case output
222
- when Arrow::StringScalar then output.to_s
223
- when Arrow::StructScalar
224
- output.value.map { |s| s.is_a?(Arrow::StringScalar) ? s.to_s : s.value }
225
- else
226
- output.value
227
- end
228
- end
229
-
230
- module_function # ======
231
-
232
- def find(function_name)
233
- Arrow::Function.find(function_name)
234
- end
235
-
236
- # temporary API until RedAmber document prepared.
237
- def arrow_doc(function_name)
238
- f = find(function_name)
239
- "#{f}\n#{'-' * function_name.size}\n#{f.doc.description}"
240
- end
241
- end
242
- end