red_amber 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.devcontainer/Dockerfile +75 -0
- data/.devcontainer/devcontainer.json +38 -0
- data/.devcontainer/onCreateCommand.sh +26 -0
- data/.rubocop.yml +3 -3
- data/CHANGELOG.md +102 -18
- data/Gemfile +1 -1
- data/README.ja.md +51 -32
- data/README.md +46 -30
- data/Rakefile +55 -0
- data/doc/DataFrame_Comparison.md +9 -13
- data/doc/DataFrame_Comparison_ja.md +61 -0
- data/doc/Dev_Containers.ja.md +290 -0
- data/doc/Dev_Containers.md +292 -0
- data/doc/qmd/examples_of_red_amber.qmd +4596 -0
- data/doc/qmd/red-amber.qmd +90 -0
- data/docker/Dockerfile +2 -2
- data/docker/Gemfile +1 -1
- data/docker/docker-compose.yml +1 -1
- data/docker/readme.md +5 -5
- data/lib/red_amber/data_frame_displayable.rb +1 -1
- data/lib/red_amber/data_frame_loadsave.rb +1 -1
- data/lib/red_amber/data_frame_selectable.rb +2 -2
- data/lib/red_amber/data_frame_variable_operation.rb +6 -6
- data/lib/red_amber/group.rb +287 -39
- data/lib/red_amber/subframes.rb +6 -6
- data/lib/red_amber/vector.rb +2 -1
- data/lib/red_amber/vector_selectable.rb +68 -35
- data/lib/red_amber/vector_string_function.rb +81 -13
- data/lib/red_amber/version.rb +1 -1
- data/red_amber.gemspec +3 -3
- metadata +15 -11
- data/docker/Gemfile.lock +0 -118
- data/docker/example +0 -86
- data/docker/notebook/examples_of_red_amber.ipynb +0 -8562
- data/docker/notebook/red-amber.ipynb +0 -188
@@ -0,0 +1,90 @@
|
|
1
|
+
---
|
2
|
+
title: RedAmber Examples
|
3
|
+
date: 2023-08-06
|
4
|
+
author: heronshoes
|
5
|
+
jupyter: ruby
|
6
|
+
format:
|
7
|
+
pdf:
|
8
|
+
toc: true
|
9
|
+
---
|
10
|
+
|
11
|
+
This notebook walks through the [README of RedAmber](https://github.com/heronshoes/red_amber#readme).
|
12
|
+
|
13
|
+
## `RedAmber::DataFrame`
|
14
|
+
|
15
|
+
```{ruby}
|
16
|
+
#| tags: []
|
17
|
+
require 'red_amber'
|
18
|
+
include RedAmber
|
19
|
+
require 'datasets-arrow'
|
20
|
+
|
21
|
+
{RedAmber: VERSION, Datasets: Datasets::VERSION}
|
22
|
+
```
|
23
|
+
|
24
|
+
## Example: diamonds dataset
|
25
|
+
|
26
|
+
For the first loading of Datasets::Diamonds, it will take some time to download.
|
27
|
+
|
28
|
+
```{ruby}
|
29
|
+
#| tags: []
|
30
|
+
dataset = Datasets::Diamonds.new
|
31
|
+
diamonds = DataFrame.new(dataset)
|
32
|
+
```
|
33
|
+
|
34
|
+
```{ruby}
|
35
|
+
#| tags: []
|
36
|
+
df = diamonds
|
37
|
+
.slice { carat > 1 } # or use #filter instead of #slice
|
38
|
+
.group(:cut)
|
39
|
+
.mean(:price) # `pick` prior to `group` is not required if `:price` is specified here.
|
40
|
+
.sort('-mean(price)')
|
41
|
+
```
|
42
|
+
|
43
|
+
```{ruby}
|
44
|
+
#| tags: []
|
45
|
+
usdjpy = 110.0 # when the yen was stronger
|
46
|
+
|
47
|
+
df.rename('mean(price)': :mean_price_USD)
|
48
|
+
.assign(:mean_price_JPY) { mean_price_USD * usdjpy }
|
49
|
+
```
|
50
|
+
|
51
|
+
## Example: starwars dataset
|
52
|
+
|
53
|
+
```{ruby}
|
54
|
+
#| tags: []
|
55
|
+
uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')
|
56
|
+
|
57
|
+
starwars = DataFrame.load(uri)
|
58
|
+
```
|
59
|
+
|
60
|
+
```{ruby}
|
61
|
+
#| tags: []
|
62
|
+
starwars
|
63
|
+
.drop(0) # delete unnecessary index column
|
64
|
+
.remove { species == "NA" } # delete unnecessary rows
|
65
|
+
.group(:species) { [count(:species), mean(:height, :mass)] }
|
66
|
+
.slice { count > 1 } # or use #filter instead of slice
|
67
|
+
```
|
68
|
+
|
69
|
+
## `RedAmber::Vector`
|
70
|
+
|
71
|
+
```{ruby}
|
72
|
+
#| tags: []
|
73
|
+
penguins = DataFrame.new(Datasets::Penguins.new)
|
74
|
+
```
|
75
|
+
|
76
|
+
```{ruby}
|
77
|
+
#| tags: []
|
78
|
+
penguins[:bill_length_mm]
|
79
|
+
```
|
80
|
+
|
81
|
+
```{ruby}
|
82
|
+
#| tags: []
|
83
|
+
penguins[:bill_length_mm] < 40
|
84
|
+
```
|
85
|
+
|
86
|
+
```{ruby}
|
87
|
+
#| tags: []
|
88
|
+
penguins[:bill_length_mm].mean
|
89
|
+
```
|
90
|
+
|
data/docker/Dockerfile
CHANGED
data/docker/Gemfile
CHANGED
data/docker/docker-compose.yml
CHANGED
data/docker/readme.md
CHANGED
@@ -6,12 +6,12 @@ This is a docker image containing RedAmber created from
|
|
6
6
|
## Contents
|
7
7
|
|
8
8
|
- From jupyter/minimal-notebook:
|
9
|
-
- Based on 2023-
|
9
|
+
- Based on 2023-05-15 (513d0cb8a67c)
|
10
10
|
- x86-64
|
11
11
|
- Ubuntu-22.04
|
12
|
-
- python-3.10.
|
13
|
-
- lab-3.6.
|
14
|
-
- notebook-6.5.
|
12
|
+
- python-3.10.11
|
13
|
+
- lab-3.6.3
|
14
|
+
- notebook-6.5.4
|
15
15
|
- System ruby-dev:
|
16
16
|
- Ruby 3.0.2
|
17
17
|
- Arrow 11.0.0 for Ubuntu:
|
@@ -22,7 +22,7 @@ This is a docker image containing RedAmber created from
|
|
22
22
|
- Locally installed iruby:
|
23
23
|
- Using Ruby 3.0.2
|
24
24
|
- Locally installed bundler and Gemfile:
|
25
|
-
- RedAmber 0.
|
25
|
+
- RedAmber 0.5.0
|
26
26
|
- Others (see Gemfile)
|
27
27
|
|
28
28
|
## Install
|
@@ -44,7 +44,7 @@ module RedAmber
|
|
44
44
|
# BUFFER
|
45
45
|
#
|
46
46
|
# @example Load from a Buffer skipping comment line
|
47
|
-
# DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines:
|
47
|
+
# DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines: /\A#/)
|
48
48
|
# # comment
|
49
49
|
# name,age
|
50
50
|
# Yasuko,68
|
@@ -39,7 +39,7 @@ module RedAmber
|
|
39
39
|
# penguins[:bill_length_mm]
|
40
40
|
#
|
41
41
|
# # =>
|
42
|
-
# #<RedAmber::Vector(:double, size=344):
|
42
|
+
# #<RedAmber::Vector(:double, size=344, chunked):0x0000000000008f0c>
|
43
43
|
# [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
|
44
44
|
#
|
45
45
|
# @overload [](keys)
|
@@ -173,7 +173,7 @@ module RedAmber
|
|
173
173
|
# penguins.v(:bill_length_mm)
|
174
174
|
#
|
175
175
|
# # =>
|
176
|
-
# #<RedAmber::Vector(:double, size=344):
|
176
|
+
# #<RedAmber::Vector(:double, size=344, chunked):0x0000000000008f0c>
|
177
177
|
# [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
|
178
178
|
#
|
179
179
|
def v(key)
|
@@ -44,7 +44,7 @@ module RedAmber
|
|
44
44
|
# languages[:Language]
|
45
45
|
#
|
46
46
|
# # =>
|
47
|
-
# #<RedAmber::Vector(:string, size=4):0x000000000010359c>
|
47
|
+
# #<RedAmber::Vector(:string, size=4, chunked):0x000000000010359c>
|
48
48
|
# ["Ruby", "Python", "R", "Rust"]
|
49
49
|
#
|
50
50
|
# @overload pick(booleans)
|
@@ -512,8 +512,8 @@ module RedAmber
|
|
512
512
|
# 1 Rui 49 78 (nil)
|
513
513
|
# 2 Hinata 28 57 Momotaro
|
514
514
|
#
|
515
|
-
def assign(
|
516
|
-
assign_update(
|
515
|
+
def assign(...)
|
516
|
+
assign_update(false, ...)
|
517
517
|
end
|
518
518
|
|
519
519
|
# Assign new or updated variables (columns) and create an updated DataFrame.
|
@@ -583,13 +583,13 @@ module RedAmber
|
|
583
583
|
# @return [DataFrame]
|
584
584
|
# assigned DataFrame.
|
585
585
|
#
|
586
|
-
def assign_left(
|
587
|
-
assign_update(
|
586
|
+
def assign_left(...)
|
587
|
+
assign_update(true, ...)
|
588
588
|
end
|
589
589
|
|
590
590
|
private
|
591
591
|
|
592
|
-
def assign_update(*assigner,
|
592
|
+
def assign_update(append_to_left, *assigner, &block)
|
593
593
|
if block
|
594
594
|
assigner_from_block = instance_eval(&block)
|
595
595
|
assigner =
|
data/lib/red_amber/group.rb
CHANGED
@@ -26,12 +26,7 @@ module RedAmber
|
|
26
26
|
private
|
27
27
|
|
28
28
|
# @!macro [attach] define_group_aggregation
|
29
|
-
#
|
30
|
-
# Group aggregation function `$1`.
|
31
|
-
# @param summary_keys [Array<Symbol, String>]
|
32
|
-
# summary keys.
|
33
|
-
# @return [DataFrame]
|
34
|
-
# aggregated DataFrame
|
29
|
+
# Returns aggregated DataFrame.
|
35
30
|
#
|
36
31
|
def define_group_aggregation(function)
|
37
32
|
define_method(function) do |*summary_keys|
|
@@ -55,7 +50,7 @@ module RedAmber
|
|
55
50
|
# @param group_keys [Array<Symbol, String>]
|
56
51
|
# keys for grouping.
|
57
52
|
# @return [Group]
|
58
|
-
# Group object.
|
53
|
+
# Group object. It inspects grouped columns and its count.
|
59
54
|
# @example
|
60
55
|
# Group.new(penguins, :species)
|
61
56
|
#
|
@@ -79,13 +74,93 @@ module RedAmber
|
|
79
74
|
@group = @dataframe.table.group(*@group_keys)
|
80
75
|
end
|
81
76
|
|
82
|
-
|
77
|
+
# @!macro group_aggregation
|
78
|
+
# @param group_keys [Array<Symbol, String>]
|
79
|
+
# keys for grouping.
|
80
|
+
# @return [DataFrame]
|
81
|
+
# aggregated DataFrame
|
82
|
+
|
83
|
+
# Whether all elements in each group evaluate to true.
|
84
|
+
#
|
85
|
+
# @!method all(*group_keys)
|
86
|
+
# @macro group_aggregation
|
87
|
+
# @example For boolean columns by default.
|
88
|
+
# dataframe
|
89
|
+
#
|
90
|
+
# # =>
|
91
|
+
# #<RedAmber::DataFrame : 6 x 3 Vectors, 0x00000000000230dc>
|
92
|
+
# x y z
|
93
|
+
# <uint8> <string> <boolean>
|
94
|
+
# 0 1 A false
|
95
|
+
# 1 2 A true
|
96
|
+
# 2 3 B false
|
97
|
+
# 3 4 B (nil)
|
98
|
+
# 4 5 B true
|
99
|
+
# 5 6 C false
|
100
|
+
#
|
101
|
+
# dataframe.group(:y).all
|
102
|
+
#
|
103
|
+
# # =>
|
104
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000fc08>
|
105
|
+
# y all(z)
|
106
|
+
# <string> <boolean>
|
107
|
+
# 0 A false
|
108
|
+
# 1 B false
|
109
|
+
# 2 C false
|
110
|
+
#
|
111
|
+
define_group_aggregation :all
|
112
|
+
|
113
|
+
# Whether any elements in each group evaluate to true.
|
114
|
+
#
|
115
|
+
# @!method any(*group_keys)
|
116
|
+
# @macro group_aggregation
|
117
|
+
# @example For boolean columns by default.
|
118
|
+
# dataframe.group(:y).any
|
119
|
+
#
|
120
|
+
# # =>
|
121
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000000117ec>
|
122
|
+
# y any(z)
|
123
|
+
# <string> <boolean>
|
124
|
+
# 0 A true
|
125
|
+
# 1 B true
|
126
|
+
# 2 C false
|
127
|
+
#
|
128
|
+
define_group_aggregation :any
|
129
|
+
|
130
|
+
# Count the number of non-nil values in each group.
|
131
|
+
# If counts are the same (and do not include NaN or nil),
|
132
|
+
# columns for counts are unified.
|
133
|
+
#
|
134
|
+
# @!method max(*group_keys)
|
135
|
+
# @macro group_aggregation
|
136
|
+
# @example Show counts for each group.
|
137
|
+
# dataframe.group(:y).count
|
138
|
+
#
|
139
|
+
# # =>
|
140
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000011ea04>
|
141
|
+
# y count(x) count(z)
|
142
|
+
# <string> <int64> <int64>
|
143
|
+
# 0 A 2 2
|
144
|
+
# 1 B 3 2
|
145
|
+
# 2 C 1 1
|
146
|
+
#
|
147
|
+
# dataframe.group(:z).count
|
148
|
+
# # same as dataframe.group(:z).count(:x, :y)
|
149
|
+
#
|
150
|
+
# =>
|
151
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000122834>
|
152
|
+
# z count
|
153
|
+
# <boolean> <int64>
|
154
|
+
# 0 false 3
|
155
|
+
# 1 true 2
|
156
|
+
# 2 (nil) 1
|
157
|
+
#
|
158
|
+
define_group_aggregation :count
|
83
159
|
alias_method :__count, :count
|
84
160
|
private :__count
|
85
161
|
|
86
|
-
def count(*
|
87
|
-
df = __count(
|
88
|
-
# if counts are the same (and do not include NaN or nil), aggregate count columns.
|
162
|
+
def count(*group_keys)
|
163
|
+
df = __count(group_keys)
|
89
164
|
if df.pick(@group_keys.size..).to_h.values.uniq.size == 1
|
90
165
|
df.pick(0..@group_keys.size).rename { [keys[-1], :count] }
|
91
166
|
else
|
@@ -93,19 +168,213 @@ module RedAmber
|
|
93
168
|
end
|
94
169
|
end
|
95
170
|
|
96
|
-
|
171
|
+
# Returns each record group size as a DataFrame.
|
172
|
+
#
|
173
|
+
# @return [DataFrame]
|
174
|
+
# DataFrame consists of:
|
175
|
+
# - Group key columns.
|
176
|
+
# - Result columns by group aggregation.
|
177
|
+
# @example
|
178
|
+
# penguins.group(:species).group_count
|
179
|
+
#
|
180
|
+
# # =>
|
181
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003a70>
|
182
|
+
# species group_count
|
183
|
+
# <string> <uint8>
|
184
|
+
# 0 Adelie 152
|
185
|
+
# 1 Chinstrap 68
|
186
|
+
# 2 Gentoo 124
|
187
|
+
#
|
188
|
+
def group_count
|
189
|
+
DataFrame.create(group_table)
|
190
|
+
end
|
191
|
+
alias_method :count_all, :group_count
|
192
|
+
|
193
|
+
# Count the unique values in each group.
|
194
|
+
#
|
195
|
+
# @!method count_uniq(*group_keys)
|
196
|
+
# @macro group_aggregation
|
197
|
+
# @example Show counts for each group.
|
198
|
+
# dataframe.group(:y).count_uniq
|
199
|
+
#
|
200
|
+
# # =>
|
201
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000011ea04>
|
202
|
+
# y count_uniq(x)
|
203
|
+
# <string> <int64>
|
204
|
+
# 0 A 2
|
205
|
+
# 1 B 3
|
206
|
+
# 2 C 1
|
207
|
+
#
|
208
|
+
define_group_aggregation :count_distinct
|
209
|
+
def count_uniq(*group_keys)
|
210
|
+
df = count_distinct(*group_keys)
|
211
|
+
df.rename do
|
212
|
+
keys_org = keys.select { _1.start_with?('count_distinct') }
|
213
|
+
keys_renamed = keys_org.map { _1.to_s.gsub('distinct', 'uniq') }
|
214
|
+
keys_org.zip keys_renamed
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
# Compute maximum of values in each group for numeric columns.
|
219
|
+
#
|
220
|
+
# @!method max(*group_keys)
|
221
|
+
# @macro group_aggregation
|
222
|
+
# @example
|
223
|
+
# dataframe.group(:y).max
|
224
|
+
#
|
225
|
+
# # =>
|
226
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000014ae74>
|
227
|
+
# y max(x)
|
228
|
+
# <string> <uint8>
|
229
|
+
# 0 A 2
|
230
|
+
# 1 B 5
|
231
|
+
# 2 C 6
|
232
|
+
#
|
233
|
+
define_group_aggregation :max
|
234
|
+
|
235
|
+
# Compute mean of values in each group for numeric columns.
|
236
|
+
#
|
237
|
+
# @!method mean(*group_keys)
|
238
|
+
# @macro group_aggregation
|
239
|
+
# @example
|
240
|
+
# dataframe.group(:y).mean
|
241
|
+
#
|
242
|
+
# # =>
|
243
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000138a8>
|
244
|
+
# y mean(x)
|
245
|
+
# <string> <double>
|
246
|
+
# 0 A 1.5
|
247
|
+
# 1 B 4.0
|
248
|
+
# 2 C 6.0
|
249
|
+
#
|
250
|
+
define_group_aggregation :mean
|
251
|
+
|
252
|
+
# Compute median of values in each group for numeric columns.
|
253
|
+
#
|
254
|
+
# @!method median(*group_keys)
|
255
|
+
# @macro group_aggregation
|
256
|
+
# @example
|
257
|
+
# dataframe.group(:y).median
|
258
|
+
#
|
259
|
+
# # =>
|
260
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000138a8>
|
261
|
+
# y median(x)
|
262
|
+
# <string> <double>
|
263
|
+
# 0 A 1.5
|
264
|
+
# 1 B 4.0
|
265
|
+
# 2 C 6.0
|
266
|
+
#
|
267
|
+
define_group_aggregation :approximate_median
|
268
|
+
def median(*group_keys)
|
269
|
+
df = approximate_median(*group_keys)
|
270
|
+
df.rename do
|
271
|
+
keys_org = keys.select { _1.start_with?('approximate_') }
|
272
|
+
keys_renamed = keys_org.map { _1.to_s.delete_prefix('approximate_') }
|
273
|
+
keys_org.zip keys_renamed
|
274
|
+
end
|
275
|
+
end
|
97
276
|
|
98
|
-
|
277
|
+
# Compute minimum of values in each group for numeric columns.
|
278
|
+
#
|
279
|
+
# @!method min(*group_keys)
|
280
|
+
# @macro group_aggregation
|
281
|
+
# @example
|
282
|
+
# dataframe.group(:y).min
|
283
|
+
#
|
284
|
+
# # =>
|
285
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000018f38>
|
286
|
+
# y min(x)
|
287
|
+
# <string> <uint8>
|
288
|
+
# 0 A 1
|
289
|
+
# 1 B 3
|
290
|
+
# 2 C 6
|
291
|
+
#
|
292
|
+
define_group_aggregation :min
|
99
293
|
|
100
|
-
|
294
|
+
# Get one value from each group.
|
295
|
+
#
|
296
|
+
# @!method one(*group_keys)
|
297
|
+
# @macro group_aggregation
|
298
|
+
# @example
|
299
|
+
# dataframe.group(:y).one
|
300
|
+
#
|
301
|
+
# # =>
|
302
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000002885c>
|
303
|
+
# y one(x)
|
304
|
+
# <string> <uint8>
|
305
|
+
# 0 A 1
|
306
|
+
# 1 B 3
|
307
|
+
# 2 C 6
|
308
|
+
#
|
309
|
+
define_group_aggregation :one
|
101
310
|
|
102
|
-
|
311
|
+
# Compute product of values in each group for numeric columns.
|
312
|
+
#
|
313
|
+
# @!method product(*group_keys)
|
314
|
+
# @macro group_aggregation
|
315
|
+
# @example
|
316
|
+
# dataframe.group(:y).product
|
317
|
+
#
|
318
|
+
# # =>
|
319
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000021a84>
|
320
|
+
# y product(x)
|
321
|
+
# <string> <uint64>
|
322
|
+
# 0 A 2
|
323
|
+
# 1 B 60
|
324
|
+
# 2 C 6
|
325
|
+
#
|
326
|
+
define_group_aggregation :product
|
103
327
|
|
104
|
-
|
328
|
+
# Compute standard deviation of values in each group for numeric columns.
|
329
|
+
#
|
330
|
+
# @!method stddev(*group_keys)
|
331
|
+
# @macro group_aggregation
|
332
|
+
# @example
|
333
|
+
# dataframe.group(:y).stddev
|
334
|
+
#
|
335
|
+
# # =>
|
336
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000002be6c>
|
337
|
+
# y stddev(x)
|
338
|
+
# <string> <double>
|
339
|
+
# 0 A 0.5
|
340
|
+
# 1 B 0.082
|
341
|
+
# 2 C 0.0
|
342
|
+
#
|
343
|
+
define_group_aggregation :stddev
|
105
344
|
|
106
|
-
|
345
|
+
# Compute sum of values in each group for numeric columns.
|
346
|
+
#
|
347
|
+
# @!method sum(*group_keys)
|
348
|
+
# @macro group_aggregation
|
349
|
+
# @example
|
350
|
+
# dataframe.group(:y).sum
|
351
|
+
#
|
352
|
+
# # =>
|
353
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000032a14>
|
354
|
+
# y sum(x)
|
355
|
+
# <string> <uint64>
|
356
|
+
# 0 A 3
|
357
|
+
# 1 B 12
|
358
|
+
# 2 C 6
|
359
|
+
#
|
360
|
+
define_group_aggregation :sum
|
107
361
|
|
108
|
-
|
362
|
+
# Compute variance of values in each group for numeric columns.
|
363
|
+
#
|
364
|
+
# @!method variance(*group_keys)
|
365
|
+
# @macro group_aggregation
|
366
|
+
# @example
|
367
|
+
# dataframe.group(:y).variance
|
368
|
+
#
|
369
|
+
# # =>
|
370
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000003b1dc>
|
371
|
+
# y variance(x)
|
372
|
+
# <string> <double>
|
373
|
+
# 0 A 0.25
|
374
|
+
# 1 B 0.067
|
375
|
+
# 2 C 0.0
|
376
|
+
#
|
377
|
+
define_group_aggregation :variance
|
109
378
|
|
110
379
|
# Returns Array of boolean filters to select each records in the Group.
|
111
380
|
#
|
@@ -168,27 +437,6 @@ module RedAmber
|
|
168
437
|
@filters.size
|
169
438
|
end
|
170
439
|
|
171
|
-
# Returns each record group size as a DataFrame.
|
172
|
-
#
|
173
|
-
# @return [DataFrame]
|
174
|
-
# DataFrame consists of:
|
175
|
-
# - Group key columns.
|
176
|
-
# - Result columns by group aggregation.
|
177
|
-
# @example
|
178
|
-
# penguins.group(:species).group_count
|
179
|
-
#
|
180
|
-
# # =>
|
181
|
-
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003a70>
|
182
|
-
# species group_count
|
183
|
-
# <string> <uint8>
|
184
|
-
# 0 Adelie 152
|
185
|
-
# 1 Chinstrap 68
|
186
|
-
# 2 Gentoo 124
|
187
|
-
#
|
188
|
-
def group_count
|
189
|
-
DataFrame.create(group_table)
|
190
|
-
end
|
191
|
-
|
192
440
|
# String representation of self.
|
193
441
|
#
|
194
442
|
# @return [String]
|
data/lib/red_amber/subframes.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
# class SubFrames treats
|
4
|
+
# class SubFrames treats subsets of a DataFrame
|
5
5
|
# [Experimental feature] Class SubFrames may be removed or be changed in the future.
|
6
6
|
class SubFrames
|
7
7
|
include Enumerable # may change to use Forwardable.
|
@@ -434,7 +434,7 @@ module RedAmber
|
|
434
434
|
# @return [DataFrame]
|
435
435
|
# created DataFrame.
|
436
436
|
# @example Aggregate by key labels in arguments and values from block.
|
437
|
-
# subframes.aggregate(:y, :sum_x) { [y.
|
437
|
+
# subframes.aggregate(:y, :sum_x) { [y.one, x.sum] }
|
438
438
|
#
|
439
439
|
# # =>
|
440
440
|
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
@@ -445,7 +445,7 @@ module RedAmber
|
|
445
445
|
# 2 C 6
|
446
446
|
#
|
447
447
|
# @example Aggregate by key labels in an Array and values from block.
|
448
|
-
# subframes.aggregate([:y, :sum_x]) { [y.
|
448
|
+
# subframes.aggregate([:y, :sum_x]) { [y.one, x.sum] }
|
449
449
|
#
|
450
450
|
# # =>
|
451
451
|
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
@@ -457,7 +457,7 @@ module RedAmber
|
|
457
457
|
#
|
458
458
|
# @overload aggregate
|
459
459
|
#
|
460
|
-
# Aggregate SubFrames creating DataFrame with pairs of key and aggregated
|
460
|
+
# Aggregate SubFrames creating DataFrame with pairs of key and aggregated values
|
461
461
|
# in Hash from the block.
|
462
462
|
#
|
463
463
|
# @yieldparam dataframe [DataFrame]
|
@@ -470,7 +470,7 @@ module RedAmber
|
|
470
470
|
# created DataFrame.
|
471
471
|
# @example Aggregate by key and value pairs from block.
|
472
472
|
# subframes.aggregate do
|
473
|
-
# { y: y.
|
473
|
+
# { y: y.one, sum_x: x.sum }
|
474
474
|
# end
|
475
475
|
#
|
476
476
|
# # =>
|
@@ -712,7 +712,7 @@ module RedAmber
|
|
712
712
|
# @example
|
713
713
|
# subframes.assign(:sum_x, :frac_x) do
|
714
714
|
# group_sum = x.sum
|
715
|
-
# [[group_sum] * size, x /
|
715
|
+
# [[group_sum] * size, x / group_sum.to_f]
|
716
716
|
# end
|
717
717
|
#
|
718
718
|
# # =>
|
data/lib/red_amber/vector.rb
CHANGED
@@ -180,7 +180,8 @@ module RedAmber
|
|
180
180
|
end
|
181
181
|
sio << ']'
|
182
182
|
|
183
|
-
|
183
|
+
chunked = chunked? ? ', chunked' : ''
|
184
|
+
format "#<#{self.class}(:#{type}, size=#{size}#{chunked}):0x%016x>\n%s\n",
|
184
185
|
object_id, sio.string
|
185
186
|
end
|
186
187
|
end
|