red_amber 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.devcontainer/Dockerfile +75 -0
- data/.devcontainer/devcontainer.json +38 -0
- data/.devcontainer/onCreateCommand.sh +22 -0
- data/.rubocop.yml +3 -3
- data/CHANGELOG.md +85 -18
- data/README.ja.md +45 -26
- data/README.md +40 -24
- data/Rakefile +55 -0
- data/doc/Dev_Containers.ja.md +290 -0
- data/doc/Dev_Containers.md +292 -0
- data/doc/qmd/examples_of_red_amber.qmd +4596 -0
- data/doc/qmd/red-amber.qmd +90 -0
- data/docker/Dockerfile +2 -2
- data/docker/Gemfile +1 -1
- data/docker/docker-compose.yml +1 -1
- data/docker/readme.md +5 -5
- data/lib/red_amber/data_frame_displayable.rb +1 -1
- data/lib/red_amber/data_frame_loadsave.rb +1 -1
- data/lib/red_amber/data_frame_selectable.rb +2 -2
- data/lib/red_amber/data_frame_variable_operation.rb +6 -6
- data/lib/red_amber/group.rb +287 -39
- data/lib/red_amber/subframes.rb +6 -6
- data/lib/red_amber/vector.rb +2 -1
- data/lib/red_amber/vector_selectable.rb +68 -35
- data/lib/red_amber/vector_string_function.rb +81 -13
- data/lib/red_amber/version.rb +1 -1
- data/red_amber.gemspec +2 -2
- metadata +16 -13
- data/docker/Gemfile.lock +0 -118
- data/docker/example +0 -86
- data/docker/notebook/examples_of_red_amber.ipynb +0 -8562
- data/docker/notebook/red-amber.ipynb +0 -188
@@ -0,0 +1,90 @@
|
|
1
|
+
---
|
2
|
+
title: RedAmber Examples
|
3
|
+
date: 2023-08-06
|
4
|
+
author: heronshoes
|
5
|
+
jupyter: ruby
|
6
|
+
format:
|
7
|
+
pdf:
|
8
|
+
toc: true
|
9
|
+
---
|
10
|
+
|
11
|
+
This notebook walks through the [README of RedAmber](https://github.com/heronshoes/red_amber#readme).
|
12
|
+
|
13
|
+
## `RedAmber::DataFrame`
|
14
|
+
|
15
|
+
```{ruby}
|
16
|
+
#| tags: []
|
17
|
+
require 'red_amber'
|
18
|
+
include RedAmber
|
19
|
+
require 'datasets-arrow'
|
20
|
+
|
21
|
+
{RedAmber: VERSION, Datasets: Datasets::VERSION}
|
22
|
+
```
|
23
|
+
|
24
|
+
## Example: diamonds dataset
|
25
|
+
|
26
|
+
For the first loading of Datasets::Diamonds, it will take some time to download.
|
27
|
+
|
28
|
+
```{ruby}
|
29
|
+
#| tags: []
|
30
|
+
dataset = Datasets::Diamonds.new
|
31
|
+
diamonds = DataFrame.new(dataset)
|
32
|
+
```
|
33
|
+
|
34
|
+
```{ruby}
|
35
|
+
#| tags: []
|
36
|
+
df = diamonds
|
37
|
+
.slice { carat > 1 } # or use #filter instead of #slice
|
38
|
+
.group(:cut)
|
39
|
+
.mean(:price) # `pick` prior to `group` is not required if `:price` is specified here.
|
40
|
+
.sort('-mean(price)')
|
41
|
+
```
|
42
|
+
|
43
|
+
```{ruby}
|
44
|
+
#| tags: []
|
45
|
+
usdjpy = 110.0 # when the yen was stronger
|
46
|
+
|
47
|
+
df.rename('mean(price)': :mean_price_USD)
|
48
|
+
.assign(:mean_price_JPY) { mean_price_USD * usdjpy }
|
49
|
+
```
|
50
|
+
|
51
|
+
## Example: starwars dataset
|
52
|
+
|
53
|
+
```{ruby}
|
54
|
+
#| tags: []
|
55
|
+
uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')
|
56
|
+
|
57
|
+
starwars = DataFrame.load(uri)
|
58
|
+
```
|
59
|
+
|
60
|
+
```{ruby}
|
61
|
+
#| tags: []
|
62
|
+
starwars
|
63
|
+
.drop(0) # delete unnecessary index column
|
64
|
+
.remove { species == "NA" } # delete unnecessary rows
|
65
|
+
.group(:species) { [count(:species), mean(:height, :mass)] }
|
66
|
+
.slice { count > 1 } # or use #filter instead of slice
|
67
|
+
```
|
68
|
+
|
69
|
+
## `RedAmber::Vector`
|
70
|
+
|
71
|
+
```{ruby}
|
72
|
+
#| tags: []
|
73
|
+
penguins = DataFrame.new(Datasets::Penguins.new)
|
74
|
+
```
|
75
|
+
|
76
|
+
```{ruby}
|
77
|
+
#| tags: []
|
78
|
+
penguins[:bill_length_mm]
|
79
|
+
```
|
80
|
+
|
81
|
+
```{ruby}
|
82
|
+
#| tags: []
|
83
|
+
penguins[:bill_length_mm] < 40
|
84
|
+
```
|
85
|
+
|
86
|
+
```{ruby}
|
87
|
+
#| tags: []
|
88
|
+
penguins[:bill_length_mm].mean
|
89
|
+
```
|
90
|
+
|
data/docker/Dockerfile
CHANGED
data/docker/Gemfile
CHANGED
data/docker/docker-compose.yml
CHANGED
data/docker/readme.md
CHANGED
@@ -6,12 +6,12 @@ This is a docker image containing RedAmber created from
|
|
6
6
|
## Contents
|
7
7
|
|
8
8
|
- From jupyter/minimal-notebook:
|
9
|
-
- Based on 2023-
|
9
|
+
- Based on 2023-05-15 (513d0cb8a67c)
|
10
10
|
- x86-64
|
11
11
|
- Ubuntu-22.04
|
12
|
-
- python-3.10.
|
13
|
-
- lab-3.6.
|
14
|
-
- notebook-6.5.
|
12
|
+
- python-3.10.11
|
13
|
+
- lab-3.6.3
|
14
|
+
- notebook-6.5.4
|
15
15
|
- System ruby-dev:
|
16
16
|
- Ruby 3.0.2
|
17
17
|
- Arrow 11.0.0 for Ubuntu:
|
@@ -22,7 +22,7 @@ This is a docker image containing RedAmber created from
|
|
22
22
|
- Locally installed iruby:
|
23
23
|
- Using Ruby 3.0.2
|
24
24
|
- Locally installed bundler and Gemfile:
|
25
|
-
- RedAmber 0.
|
25
|
+
- RedAmber 0.5.0
|
26
26
|
- Others (see Gemfile)
|
27
27
|
|
28
28
|
## Install
|
@@ -44,7 +44,7 @@ module RedAmber
|
|
44
44
|
# BUFFER
|
45
45
|
#
|
46
46
|
# @example Load from a Buffer skipping comment line
|
47
|
-
# DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines:
|
47
|
+
# DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines: /\A#/)
|
48
48
|
# # comment
|
49
49
|
# name,age
|
50
50
|
# Yasuko,68
|
@@ -39,7 +39,7 @@ module RedAmber
|
|
39
39
|
# penguins[:bill_length_mm]
|
40
40
|
#
|
41
41
|
# # =>
|
42
|
-
# #<RedAmber::Vector(:double, size=344):
|
42
|
+
# #<RedAmber::Vector(:double, size=344, chunked):0x0000000000008f0c>
|
43
43
|
# [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
|
44
44
|
#
|
45
45
|
# @overload [](keys)
|
@@ -173,7 +173,7 @@ module RedAmber
|
|
173
173
|
# penguins.v(:bill_length_mm)
|
174
174
|
#
|
175
175
|
# # =>
|
176
|
-
# #<RedAmber::Vector(:double, size=344):
|
176
|
+
# #<RedAmber::Vector(:double, size=344, chunked):0x0000000000008f0c>
|
177
177
|
# [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
|
178
178
|
#
|
179
179
|
def v(key)
|
@@ -44,7 +44,7 @@ module RedAmber
|
|
44
44
|
# languages[:Language]
|
45
45
|
#
|
46
46
|
# # =>
|
47
|
-
# #<RedAmber::Vector(:string, size=4):0x000000000010359c>
|
47
|
+
# #<RedAmber::Vector(:string, size=4, chunked):0x000000000010359c>
|
48
48
|
# ["Ruby", "Python", "R", "Rust"]
|
49
49
|
#
|
50
50
|
# @overload pick(booleans)
|
@@ -512,8 +512,8 @@ module RedAmber
|
|
512
512
|
# 1 Rui 49 78 (nil)
|
513
513
|
# 2 Hinata 28 57 Momotaro
|
514
514
|
#
|
515
|
-
def assign(
|
516
|
-
assign_update(
|
515
|
+
def assign(...)
|
516
|
+
assign_update(false, ...)
|
517
517
|
end
|
518
518
|
|
519
519
|
# Assign new or updated variables (columns) and create an updated DataFrame.
|
@@ -583,13 +583,13 @@ module RedAmber
|
|
583
583
|
# @return [DataFrame]
|
584
584
|
# assigned DataFrame.
|
585
585
|
#
|
586
|
-
def assign_left(
|
587
|
-
assign_update(
|
586
|
+
def assign_left(...)
|
587
|
+
assign_update(true, ...)
|
588
588
|
end
|
589
589
|
|
590
590
|
private
|
591
591
|
|
592
|
-
def assign_update(*assigner,
|
592
|
+
def assign_update(append_to_left, *assigner, &block)
|
593
593
|
if block
|
594
594
|
assigner_from_block = instance_eval(&block)
|
595
595
|
assigner =
|
data/lib/red_amber/group.rb
CHANGED
@@ -26,12 +26,7 @@ module RedAmber
|
|
26
26
|
private
|
27
27
|
|
28
28
|
# @!macro [attach] define_group_aggregation
|
29
|
-
#
|
30
|
-
# Group aggregation function `$1`.
|
31
|
-
# @param summary_keys [Array<Symbol, String>]
|
32
|
-
# summary keys.
|
33
|
-
# @return [DataFrame]
|
34
|
-
# aggregated DataFrame
|
29
|
+
# Returns aggregated DataFrame.
|
35
30
|
#
|
36
31
|
def define_group_aggregation(function)
|
37
32
|
define_method(function) do |*summary_keys|
|
@@ -55,7 +50,7 @@ module RedAmber
|
|
55
50
|
# @param group_keys [Array<Symbol, String>]
|
56
51
|
# keys for grouping.
|
57
52
|
# @return [Group]
|
58
|
-
# Group object.
|
53
|
+
# Group object. It inspects grouped columns and its count.
|
59
54
|
# @example
|
60
55
|
# Group.new(penguins, :species)
|
61
56
|
#
|
@@ -79,13 +74,93 @@ module RedAmber
|
|
79
74
|
@group = @dataframe.table.group(*@group_keys)
|
80
75
|
end
|
81
76
|
|
82
|
-
|
77
|
+
# @!macro group_aggregation
|
78
|
+
# @param group_keys [Array<Symbol, String>]
|
79
|
+
# keys for grouping.
|
80
|
+
# @return [DataFrame]
|
81
|
+
# aggregated DataFrame
|
82
|
+
|
83
|
+
# Whether all elements in each group evaluate to true.
|
84
|
+
#
|
85
|
+
# @!method all(*group_keys)
|
86
|
+
# @macro group_aggregation
|
87
|
+
# @example For boolean columns by default.
|
88
|
+
# dataframe
|
89
|
+
#
|
90
|
+
# # =>
|
91
|
+
# #<RedAmber::DataFrame : 6 x 3 Vectors, 0x00000000000230dc>
|
92
|
+
# x y z
|
93
|
+
# <uint8> <string> <boolean>
|
94
|
+
# 0 1 A false
|
95
|
+
# 1 2 A true
|
96
|
+
# 2 3 B false
|
97
|
+
# 3 4 B (nil)
|
98
|
+
# 4 5 B true
|
99
|
+
# 5 6 C false
|
100
|
+
#
|
101
|
+
# dataframe.group(:y).all
|
102
|
+
#
|
103
|
+
# # =>
|
104
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000fc08>
|
105
|
+
# y all(z)
|
106
|
+
# <string> <boolean>
|
107
|
+
# 0 A false
|
108
|
+
# 1 B false
|
109
|
+
# 2 C false
|
110
|
+
#
|
111
|
+
define_group_aggregation :all
|
112
|
+
|
113
|
+
# Whether any elements in each group evaluate to true.
|
114
|
+
#
|
115
|
+
# @!method any(*group_keys)
|
116
|
+
# @macro group_aggregation
|
117
|
+
# @example For boolean columns by default.
|
118
|
+
# dataframe.group(:y).any
|
119
|
+
#
|
120
|
+
# # =>
|
121
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000000117ec>
|
122
|
+
# y any(z)
|
123
|
+
# <string> <boolean>
|
124
|
+
# 0 A true
|
125
|
+
# 1 B true
|
126
|
+
# 2 C false
|
127
|
+
#
|
128
|
+
define_group_aggregation :any
|
129
|
+
|
130
|
+
# Count the number of non-nil values in each group.
|
131
|
+
# If counts are the same (and do not include NaN or nil),
|
132
|
+
# columns for counts are unified.
|
133
|
+
#
|
134
|
+
# @!method max(*group_keys)
|
135
|
+
# @macro group_aggregation
|
136
|
+
# @example Show counts for each group.
|
137
|
+
# dataframe.group(:y).count
|
138
|
+
#
|
139
|
+
# # =>
|
140
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000011ea04>
|
141
|
+
# y count(x) count(z)
|
142
|
+
# <string> <int64> <int64>
|
143
|
+
# 0 A 2 2
|
144
|
+
# 1 B 3 2
|
145
|
+
# 2 C 1 1
|
146
|
+
#
|
147
|
+
# dataframe.group(:z).count
|
148
|
+
# # same as dataframe.group(:z).count(:x, :y)
|
149
|
+
#
|
150
|
+
# =>
|
151
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000122834>
|
152
|
+
# z count
|
153
|
+
# <boolean> <int64>
|
154
|
+
# 0 false 3
|
155
|
+
# 1 true 2
|
156
|
+
# 2 (nil) 1
|
157
|
+
#
|
158
|
+
define_group_aggregation :count
|
83
159
|
alias_method :__count, :count
|
84
160
|
private :__count
|
85
161
|
|
86
|
-
def count(*
|
87
|
-
df = __count(
|
88
|
-
# if counts are the same (and do not include NaN or nil), aggregate count columns.
|
162
|
+
def count(*group_keys)
|
163
|
+
df = __count(group_keys)
|
89
164
|
if df.pick(@group_keys.size..).to_h.values.uniq.size == 1
|
90
165
|
df.pick(0..@group_keys.size).rename { [keys[-1], :count] }
|
91
166
|
else
|
@@ -93,19 +168,213 @@ module RedAmber
|
|
93
168
|
end
|
94
169
|
end
|
95
170
|
|
96
|
-
|
171
|
+
# Returns each record group size as a DataFrame.
|
172
|
+
#
|
173
|
+
# @return [DataFrame]
|
174
|
+
# DataFrame consists of:
|
175
|
+
# - Group key columns.
|
176
|
+
# - Result columns by group aggregation.
|
177
|
+
# @example
|
178
|
+
# penguins.group(:species).group_count
|
179
|
+
#
|
180
|
+
# # =>
|
181
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003a70>
|
182
|
+
# species group_count
|
183
|
+
# <string> <uint8>
|
184
|
+
# 0 Adelie 152
|
185
|
+
# 1 Chinstrap 68
|
186
|
+
# 2 Gentoo 124
|
187
|
+
#
|
188
|
+
def group_count
|
189
|
+
DataFrame.create(group_table)
|
190
|
+
end
|
191
|
+
alias_method :count_all, :group_count
|
192
|
+
|
193
|
+
# Count the unique values in each group.
|
194
|
+
#
|
195
|
+
# @!method count_uniq(*group_keys)
|
196
|
+
# @macro group_aggregation
|
197
|
+
# @example Show counts for each group.
|
198
|
+
# dataframe.group(:y).count_uniq
|
199
|
+
#
|
200
|
+
# # =>
|
201
|
+
# #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000011ea04>
|
202
|
+
# y count_uniq(x)
|
203
|
+
# <string> <int64>
|
204
|
+
# 0 A 2
|
205
|
+
# 1 B 3
|
206
|
+
# 2 C 1
|
207
|
+
#
|
208
|
+
define_group_aggregation :count_distinct
|
209
|
+
def count_uniq(*group_keys)
|
210
|
+
df = count_distinct(*group_keys)
|
211
|
+
df.rename do
|
212
|
+
keys_org = keys.select { _1.start_with?('count_distinct') }
|
213
|
+
keys_renamed = keys_org.map { _1.to_s.gsub('distinct', 'uniq') }
|
214
|
+
keys_org.zip keys_renamed
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
# Compute maximum of values in each group for numeric columns.
|
219
|
+
#
|
220
|
+
# @!method max(*group_keys)
|
221
|
+
# @macro group_aggregation
|
222
|
+
# @example
|
223
|
+
# dataframe.group(:y).max
|
224
|
+
#
|
225
|
+
# # =>
|
226
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000014ae74>
|
227
|
+
# y max(x)
|
228
|
+
# <string> <uint8>
|
229
|
+
# 0 A 2
|
230
|
+
# 1 B 5
|
231
|
+
# 2 C 6
|
232
|
+
#
|
233
|
+
define_group_aggregation :max
|
234
|
+
|
235
|
+
# Compute mean of values in each group for numeric columns.
|
236
|
+
#
|
237
|
+
# @!method mean(*group_keys)
|
238
|
+
# @macro group_aggregation
|
239
|
+
# @example
|
240
|
+
# dataframe.group(:y).mean
|
241
|
+
#
|
242
|
+
# # =>
|
243
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000138a8>
|
244
|
+
# y mean(x)
|
245
|
+
# <string> <double>
|
246
|
+
# 0 A 1.5
|
247
|
+
# 1 B 4.0
|
248
|
+
# 2 C 6.0
|
249
|
+
#
|
250
|
+
define_group_aggregation :mean
|
251
|
+
|
252
|
+
# Compute median of values in each group for numeric columns.
|
253
|
+
#
|
254
|
+
# @!method median(*group_keys)
|
255
|
+
# @macro group_aggregation
|
256
|
+
# @example
|
257
|
+
# dataframe.group(:y).median
|
258
|
+
#
|
259
|
+
# # =>
|
260
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000138a8>
|
261
|
+
# y median(x)
|
262
|
+
# <string> <double>
|
263
|
+
# 0 A 1.5
|
264
|
+
# 1 B 4.0
|
265
|
+
# 2 C 6.0
|
266
|
+
#
|
267
|
+
define_group_aggregation :approximate_median
|
268
|
+
def median(*group_keys)
|
269
|
+
df = approximate_median(*group_keys)
|
270
|
+
df.rename do
|
271
|
+
keys_org = keys.select { _1.start_with?('approximate_') }
|
272
|
+
keys_renamed = keys_org.map { _1.to_s.delete_prefix('approximate_') }
|
273
|
+
keys_org.zip keys_renamed
|
274
|
+
end
|
275
|
+
end
|
97
276
|
|
98
|
-
|
277
|
+
# Compute minimum of values in each group for numeric columns.
|
278
|
+
#
|
279
|
+
# @!method min(*group_keys)
|
280
|
+
# @macro group_aggregation
|
281
|
+
# @example
|
282
|
+
# dataframe.group(:y).min
|
283
|
+
#
|
284
|
+
# # =>
|
285
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000018f38>
|
286
|
+
# y min(x)
|
287
|
+
# <string> <uint8>
|
288
|
+
# 0 A 1
|
289
|
+
# 1 B 3
|
290
|
+
# 2 C 6
|
291
|
+
#
|
292
|
+
define_group_aggregation :min
|
99
293
|
|
100
|
-
|
294
|
+
# Get one value from each group.
|
295
|
+
#
|
296
|
+
# @!method one(*group_keys)
|
297
|
+
# @macro group_aggregation
|
298
|
+
# @example
|
299
|
+
# dataframe.group(:y).one
|
300
|
+
#
|
301
|
+
# # =>
|
302
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000002885c>
|
303
|
+
# y one(x)
|
304
|
+
# <string> <uint8>
|
305
|
+
# 0 A 1
|
306
|
+
# 1 B 3
|
307
|
+
# 2 C 6
|
308
|
+
#
|
309
|
+
define_group_aggregation :one
|
101
310
|
|
102
|
-
|
311
|
+
# Compute product of values in each group for numeric columns.
|
312
|
+
#
|
313
|
+
# @!method product(*group_keys)
|
314
|
+
# @macro group_aggregation
|
315
|
+
# @example
|
316
|
+
# dataframe.group(:y).product
|
317
|
+
#
|
318
|
+
# # =>
|
319
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000021a84>
|
320
|
+
# y product(x)
|
321
|
+
# <string> <uint64>
|
322
|
+
# 0 A 2
|
323
|
+
# 1 B 60
|
324
|
+
# 2 C 6
|
325
|
+
#
|
326
|
+
define_group_aggregation :product
|
103
327
|
|
104
|
-
|
328
|
+
# Compute standard deviation of values in each group for numeric columns.
|
329
|
+
#
|
330
|
+
# @!method stddev(*group_keys)
|
331
|
+
# @macro group_aggregation
|
332
|
+
# @example
|
333
|
+
# dataframe.group(:y).stddev
|
334
|
+
#
|
335
|
+
# # =>
|
336
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000002be6c>
|
337
|
+
# y stddev(x)
|
338
|
+
# <string> <double>
|
339
|
+
# 0 A 0.5
|
340
|
+
# 1 B 0.082
|
341
|
+
# 2 C 0.0
|
342
|
+
#
|
343
|
+
define_group_aggregation :stddev
|
105
344
|
|
106
|
-
|
345
|
+
# Compute sum of values in each group for numeric columns.
|
346
|
+
#
|
347
|
+
# @!method sum(*group_keys)
|
348
|
+
# @macro group_aggregation
|
349
|
+
# @example
|
350
|
+
# dataframe.group(:y).sum
|
351
|
+
#
|
352
|
+
# # =>
|
353
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000032a14>
|
354
|
+
# y sum(x)
|
355
|
+
# <string> <uint64>
|
356
|
+
# 0 A 3
|
357
|
+
# 1 B 12
|
358
|
+
# 2 C 6
|
359
|
+
#
|
360
|
+
define_group_aggregation :sum
|
107
361
|
|
108
|
-
|
362
|
+
# Compute variance of values in each group for numeric columns.
|
363
|
+
#
|
364
|
+
# @!method variance(*group_keys)
|
365
|
+
# @macro group_aggregation
|
366
|
+
# @example
|
367
|
+
# dataframe.group(:y).variance
|
368
|
+
#
|
369
|
+
# # =>
|
370
|
+
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000003b1dc>
|
371
|
+
# y variance(x)
|
372
|
+
# <string> <double>
|
373
|
+
# 0 A 0.25
|
374
|
+
# 1 B 0.067
|
375
|
+
# 2 C 0.0
|
376
|
+
#
|
377
|
+
define_group_aggregation :variance
|
109
378
|
|
110
379
|
# Returns Array of boolean filters to select each records in the Group.
|
111
380
|
#
|
@@ -168,27 +437,6 @@ module RedAmber
|
|
168
437
|
@filters.size
|
169
438
|
end
|
170
439
|
|
171
|
-
# Returns each record group size as a DataFrame.
|
172
|
-
#
|
173
|
-
# @return [DataFrame]
|
174
|
-
# DataFrame consists of:
|
175
|
-
# - Group key columns.
|
176
|
-
# - Result columns by group aggregation.
|
177
|
-
# @example
|
178
|
-
# penguins.group(:species).group_count
|
179
|
-
#
|
180
|
-
# # =>
|
181
|
-
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003a70>
|
182
|
-
# species group_count
|
183
|
-
# <string> <uint8>
|
184
|
-
# 0 Adelie 152
|
185
|
-
# 1 Chinstrap 68
|
186
|
-
# 2 Gentoo 124
|
187
|
-
#
|
188
|
-
def group_count
|
189
|
-
DataFrame.create(group_table)
|
190
|
-
end
|
191
|
-
|
192
440
|
# String representation of self.
|
193
441
|
#
|
194
442
|
# @return [String]
|
data/lib/red_amber/subframes.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module RedAmber
|
4
|
-
# class SubFrames treats
|
4
|
+
# class SubFrames treats subsets of a DataFrame
|
5
5
|
# [Experimental feature] Class SubFrames may be removed or be changed in the future.
|
6
6
|
class SubFrames
|
7
7
|
include Enumerable # may change to use Forwardable.
|
@@ -434,7 +434,7 @@ module RedAmber
|
|
434
434
|
# @return [DataFrame]
|
435
435
|
# created DataFrame.
|
436
436
|
# @example Aggregate by key labels in arguments and values from block.
|
437
|
-
# subframes.aggregate(:y, :sum_x) { [y.
|
437
|
+
# subframes.aggregate(:y, :sum_x) { [y.one, x.sum] }
|
438
438
|
#
|
439
439
|
# # =>
|
440
440
|
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
@@ -445,7 +445,7 @@ module RedAmber
|
|
445
445
|
# 2 C 6
|
446
446
|
#
|
447
447
|
# @example Aggregate by key labels in an Array and values from block.
|
448
|
-
# subframes.aggregate([:y, :sum_x]) { [y.
|
448
|
+
# subframes.aggregate([:y, :sum_x]) { [y.one, x.sum] }
|
449
449
|
#
|
450
450
|
# # =>
|
451
451
|
# #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
|
@@ -457,7 +457,7 @@ module RedAmber
|
|
457
457
|
#
|
458
458
|
# @overload aggregate
|
459
459
|
#
|
460
|
-
# Aggregate SubFrames creating DataFrame with pairs of key and aggregated
|
460
|
+
# Aggregate SubFrames creating DataFrame with pairs of key and aggregated values
|
461
461
|
# in Hash from the block.
|
462
462
|
#
|
463
463
|
# @yieldparam dataframe [DataFrame]
|
@@ -470,7 +470,7 @@ module RedAmber
|
|
470
470
|
# created DataFrame.
|
471
471
|
# @example Aggregate by key and value pairs from block.
|
472
472
|
# subframes.aggregate do
|
473
|
-
# { y: y.
|
473
|
+
# { y: y.one, sum_x: x.sum }
|
474
474
|
# end
|
475
475
|
#
|
476
476
|
# # =>
|
@@ -712,7 +712,7 @@ module RedAmber
|
|
712
712
|
# @example
|
713
713
|
# subframes.assign(:sum_x, :frac_x) do
|
714
714
|
# group_sum = x.sum
|
715
|
-
# [[group_sum] * size, x /
|
715
|
+
# [[group_sum] * size, x / group_sum.to_f]
|
716
716
|
# end
|
717
717
|
#
|
718
718
|
# # =>
|
data/lib/red_amber/vector.rb
CHANGED
@@ -180,7 +180,8 @@ module RedAmber
|
|
180
180
|
end
|
181
181
|
sio << ']'
|
182
182
|
|
183
|
-
|
183
|
+
chunked = chunked? ? ', chunked' : ''
|
184
|
+
format "#<#{self.class}(:#{type}, size=#{size}#{chunked}):0x%016x>\n%s\n",
|
184
185
|
object_id, sio.string
|
185
186
|
end
|
186
187
|
end
|