red_amber 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +7 -1
- data/.rubocop_todo.yml +2 -15
- data/.yardopts +1 -0
- data/CHANGELOG.md +35 -0
- data/Gemfile +1 -0
- data/README.md +206 -16
- data/doc/DataFrame.md +63 -73
- data/doc/Vector.md +25 -0
- data/doc/{47_examples_of_red_amber.ipynb → examples_of_red_amber.ipynb} +693 -111
- data/lib/red_amber/data_frame.rb +26 -8
- data/lib/red_amber/data_frame_displayable.rb +7 -5
- data/lib/red_amber/group.rb +25 -27
- data/lib/red_amber/vector_selectable.rb +2 -0
- data/lib/red_amber/vector_updatable.rb +22 -1
- data/lib/red_amber/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3853e70f378cac65013a3bcfc51a2d55cb70cc494f3f3b70675bed944cc15b49
|
4
|
+
data.tar.gz: 3c65999cf978f1edf8c2c7fcce9a0ccb192d4da051f34fa0bf3f66ddc178eb1c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fac66ba0bf5955cfe0d21a51b90ec16407182b9053e9b586dfe9f8e2526de4e90efecdd8eba1e8b3c99b12fc44544c82fb2f6af4b666b97876a64a6ee4deedf1
|
7
|
+
data.tar.gz: 1a4cc526ce9f097438f2b7d018552a4cd6aaa2d900012297cd1777c4b9e39063cc2988af91c138e93f291a56175aefb6a6b00c211f9b9c5bd38d75d6bc40acb9
|
data/.rubocop.yml
CHANGED
@@ -43,6 +43,11 @@ Lint/BinaryOperatorWithIdenticalOperands:
|
|
43
43
|
Exclude:
|
44
44
|
- 'test/test_vector_function.rb'
|
45
45
|
|
46
|
+
# Need for test with empty block
|
47
|
+
Lint/EmptyBlock:
|
48
|
+
Exclude:
|
49
|
+
- 'test/test_group.rb'
|
50
|
+
|
46
51
|
# Max: 120
|
47
52
|
Layout/LineLength:
|
48
53
|
Max: 118
|
@@ -78,9 +83,10 @@ Metrics/ClassLength:
|
|
78
83
|
Metrics/CyclomaticComplexity:
|
79
84
|
Max: 12
|
80
85
|
Exclude:
|
86
|
+
- 'lib/red_amber/data_frame_displayable.rb' # Max: 18
|
81
87
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 14
|
88
|
+
- 'lib/red_amber/vector_selectable.rb' # Max: 13
|
82
89
|
- 'lib/red_amber/vector_updatable.rb' # Max: 14
|
83
|
-
- 'lib/red_amber/data_frame_displayable.rb' # Max: 18
|
84
90
|
|
85
91
|
# Max: 10
|
86
92
|
Metrics/MethodLength:
|
data/.rubocop_todo.yml
CHANGED
@@ -1,15 +1,2 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
# on 2022-05-08 02:37:36 UTC using RuboCop version 1.27.0.
|
4
|
-
# The point is for the user to remove these configuration records
|
5
|
-
# one by one as the offenses are removed from the code base.
|
6
|
-
# Note that changes in the inspected code, or installation of new
|
7
|
-
# versions of RuboCop, may require this file to be generated again.
|
8
|
-
|
9
|
-
# Offense count: 1
|
10
|
-
# This cop supports unsafe auto-correction (--auto-correct-all).
|
11
|
-
# Configuration parameters: EnforcedStyle.
|
12
|
-
# SupportedStyles: forbid_for_all_comparison_operators, forbid_for_equality_operators_only, require_for_all_comparison_operators, require_for_equality_operators_only
|
13
|
-
Style/YodaCondition:
|
14
|
-
Exclude:
|
15
|
-
- 'lib/red_amber/data_frame.rb'
|
1
|
+
# We will use cops to detect bugs in an early stage
|
2
|
+
# Feel free to use .rubocop_todo.yml by --auto-gen-config
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--output-dir doc/yard
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,41 @@
|
|
2
2
|
|
3
3
|
- Supports Arrow 9.0.0
|
4
4
|
|
5
|
+
## [0.1.8] - 2022-08-04 (experimental)
|
6
|
+
|
7
|
+
- Bug fixes
|
8
|
+
|
9
|
+
- Fix unnamed column in table formatter (#52)
|
10
|
+
- Fix DataFrame#key?, DataFrame#key_index when @keys.nil? (#52)
|
11
|
+
- Align order of replacer in Vector#replace (#53, resolved #38)
|
12
|
+
|
13
|
+
- New features and improvements
|
14
|
+
|
15
|
+
- Refine DataFrame.new for empty arguments (#50)
|
16
|
+
- Delete .rubocop_todo.yml for not to use yoda condition (#50)
|
17
|
+
|
18
|
+
- Refine Group (#52, resolved #28)
|
19
|
+
- Refine Group methods creation
|
20
|
+
- Make group key at first(left)
|
21
|
+
- Show only one group count when same counts
|
22
|
+
- Add block acceptability for group
|
23
|
+
- Rename empty key to :unnamed in DataFrame.new
|
24
|
+
- Rename Group#aggregated_by to #summarize (#54)
|
25
|
+
|
26
|
+
- Add Vector#shift (#51)
|
27
|
+
|
28
|
+
- Vector#[] accepts Range as an argument (#51)
|
29
|
+
|
30
|
+
- Update documents
|
31
|
+
|
32
|
+
- Add support for yard (#54)
|
33
|
+
|
34
|
+
- Renew jupyter notebook '53 examples' (#54)
|
35
|
+
|
36
|
+
- Add more examples and images in README (#52)
|
37
|
+
- Add document of group manipulations in README (#52)
|
38
|
+
- Renew DF#group document in DataFrame.md (#52)
|
39
|
+
|
5
40
|
## [0.1.7] - 2022-07-15 (experimental)
|
6
41
|
|
7
42
|
- Bug fixes
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -56,7 +56,7 @@ require 'red_amber' # require 'red-amber' is also OK.
|
|
56
56
|
require 'datasets-arrow'
|
57
57
|
|
58
58
|
arrow = Datasets::Penguins.new.to_arrow
|
59
|
-
RedAmber::DataFrame.new(arrow)
|
59
|
+
penguins = RedAmber::DataFrame.new(arrow)
|
60
60
|
|
61
61
|
# =>
|
62
62
|
#<RedAmber::DataFrame : 344 x 8 Vectors, 0x0000000000013790>
|
@@ -78,28 +78,71 @@ RedAmber::DataFrame.new(arrow)
|
|
78
78
|
|
79
79
|
For example, `DataFrame#pick` accepts keys as an argument and returns a sub DataFrame.
|
80
80
|
|
81
|
+

|
82
|
+
|
81
83
|
```ruby
|
82
|
-
|
84
|
+
penguins.keys
|
85
|
+
# =>
|
86
|
+
[:species,
|
87
|
+
:island,
|
88
|
+
:bill_length_mm,
|
89
|
+
:bill_depth_mm,
|
90
|
+
:flipper_length_mm,
|
91
|
+
:body_mass_g,
|
92
|
+
:sex,
|
93
|
+
:year]
|
94
|
+
|
95
|
+
df = penguins.pick(:species, :island, :body_mass_g)
|
83
96
|
df
|
84
97
|
|
85
98
|
# =>
|
86
|
-
#<RedAmber::DataFrame : 344 x
|
87
|
-
body_mass_g
|
88
|
-
|
89
|
-
1 3750
|
90
|
-
2 3800
|
91
|
-
3 3250
|
92
|
-
4 (nil)
|
93
|
-
5 3450
|
94
|
-
:
|
95
|
-
342
|
96
|
-
343
|
99
|
+
#<RedAmber::DataFrame : 344 x 3 Vectors, 0x000000000003cc1c>
|
100
|
+
species island body_mass_g
|
101
|
+
<string> <string> <uint16>
|
102
|
+
1 Adelie Torgersen 3750
|
103
|
+
2 Adelie Torgersen 3800
|
104
|
+
3 Adelie Torgersen 3250
|
105
|
+
4 Adelie Torgersen (nil)
|
106
|
+
5 Adelie Torgersen 3450
|
107
|
+
: : : :
|
108
|
+
342 Gentoo Biscoe 5750
|
109
|
+
343 Gentoo Biscoe 5200
|
110
|
+
344 Gentoo Biscoe 5400
|
111
|
+
```
|
112
|
+
|
113
|
+
`DataFrame#drop` drops some columns to create a remainer DataFrame.
|
114
|
+
|
115
|
+

|
116
|
+
|
117
|
+
You can specify by keys or a boolean array (same size as n_keys).
|
118
|
+
|
119
|
+
```ruby
|
120
|
+
# Same as df.drop(:species, :island)
|
121
|
+
df = df.drop(true, true, false)
|
122
|
+
|
123
|
+
# =>
|
124
|
+
#<RedAmber::DataFrame : 344 x 1 Vector, 0x0000000000048760>
|
125
|
+
body_mass_g
|
126
|
+
<uint16>
|
127
|
+
1 3750
|
128
|
+
2 3800
|
129
|
+
3 3250
|
130
|
+
4 (nil)
|
131
|
+
5 3450
|
132
|
+
: :
|
133
|
+
342 5750
|
134
|
+
343 5200
|
97
135
|
344 5400
|
98
136
|
```
|
99
137
|
|
138
|
+
Arrow data is immutable, so these methods always return an new object.
|
139
|
+
|
100
140
|
`DataFrame#assign` creates new variables (column in the table).
|
101
141
|
|
142
|
+

|
143
|
+
|
102
144
|
```ruby
|
145
|
+
# New column is created because ':body_mass_kg' is a new key.
|
103
146
|
df.assign(:body_mass_kg => df[:body_mass_g] / 1000.0)
|
104
147
|
|
105
148
|
# =>
|
@@ -117,12 +160,97 @@ df.assign(:body_mass_kg => df[:body_mass_g] / 1000.0)
|
|
117
160
|
344 5400 5.4
|
118
161
|
```
|
119
162
|
|
163
|
+
`DataFrame#slice` selects rows (observations) to create a sub DataFrame.
|
164
|
+
|
165
|
+

|
166
|
+
|
167
|
+
```ruby
|
168
|
+
# returns 5 rows at the start and 5 rows from the end
|
169
|
+
penguins.slice(0...5, -5..-1)
|
170
|
+
|
171
|
+
# =>
|
172
|
+
#<RedAmber::DataFrame : 10 x 8 Vectors, 0x0000000000042be4>
|
173
|
+
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
174
|
+
<string> <string> <double> <double> <uint8> ... <uint16>
|
175
|
+
1 Adelie Torgersen 39.1 18.7 181 ... 2007
|
176
|
+
2 Adelie Torgersen 39.5 17.4 186 ... 2007
|
177
|
+
3 Adelie Torgersen 40.3 18.0 195 ... 2007
|
178
|
+
4 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
179
|
+
5 Adelie Torgersen 36.7 19.3 193 ... 2007
|
180
|
+
: : : : : : ... :
|
181
|
+
8 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
182
|
+
9 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
183
|
+
10 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
184
|
+
```
|
185
|
+
|
186
|
+
`DataFrame#remove` rejects rows (observations) to create a remainer DataFrame.
|
187
|
+
|
188
|
+

|
189
|
+
|
190
|
+
```ruby
|
191
|
+
# penguins[:bill_length_mm] < 40 returns a boolean Vector
|
192
|
+
penguins.remove(penguins[:bill_length_mm] < 40)
|
193
|
+
|
194
|
+
# =>
|
195
|
+
#<RedAmber::DataFrame : 244 x 8 Vectors, 0x000000000007d6f4>
|
196
|
+
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
197
|
+
<string> <string> <double> <double> <uint8> ... <uint16>
|
198
|
+
1 Adelie Torgersen 40.3 18.0 195 ... 2007
|
199
|
+
2 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
200
|
+
3 Adelie Torgersen 42.0 20.2 190 ... 2007
|
201
|
+
4 Adelie Torgersen 41.1 17.6 182 ... 2007
|
202
|
+
5 Adelie Torgersen 42.5 20.7 197 ... 2007
|
203
|
+
: : : : : : ... :
|
204
|
+
242 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
205
|
+
243 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
206
|
+
244 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
207
|
+
```
|
208
|
+
|
120
209
|
DataFrame manipulating methods like `pick`, `drop`, `slice`, `remove`, `rename` and `assign` accept a block.
|
121
210
|
|
122
|
-
This is
|
211
|
+
This example is usage of block to update numeric columns.
|
123
212
|
|
124
213
|
```ruby
|
125
|
-
|
214
|
+
df = RedAmber::DataFrame.new(
|
215
|
+
integer: [0, 1, 2, 3, nil],
|
216
|
+
float: [0.0, 1.1, 2.2, Float::NAN, nil],
|
217
|
+
string: ['A', 'B', 'C', 'D', nil],
|
218
|
+
boolean: [true, false, true, false, nil])
|
219
|
+
df
|
220
|
+
|
221
|
+
# =>
|
222
|
+
#<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000003131c>
|
223
|
+
integer float string boolean
|
224
|
+
<uint8> <double> <string> <boolean>
|
225
|
+
1 0 0.0 A true
|
226
|
+
2 1 1.1 B false
|
227
|
+
3 2 2.2 C true
|
228
|
+
4 3 NaN D false
|
229
|
+
5 (nil) (nil) (nil) (nil)
|
230
|
+
|
231
|
+
df.assign do
|
232
|
+
vectors.each_with_object({}) do |v, h|
|
233
|
+
h[v.key] = -v if v.numeric?
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
# =>
|
238
|
+
#<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000009a1b4>
|
239
|
+
integer float string boolean
|
240
|
+
<uint8> <double> <string> <boolean>
|
241
|
+
1 0 -0.0 A true
|
242
|
+
2 255 -1.1 B false
|
243
|
+
3 254 -2.2 C true
|
244
|
+
4 253 NaN D false
|
245
|
+
5 (nil) (nil) (nil) (nil)
|
246
|
+
```
|
247
|
+
|
248
|
+
Negate (-@) method of unsigned integer Vector returns complement.
|
249
|
+
|
250
|
+
Next example is to eliminate observations (row in the table) containing nil.
|
251
|
+
|
252
|
+
```ruby
|
253
|
+
# remove all observations containing nil
|
126
254
|
nil_removed = penguins.remove { vectors.map(&:is_nil).reduce(&:|) }
|
127
255
|
nil_removed.tdr
|
128
256
|
# =>
|
@@ -145,12 +273,51 @@ For this frequently needed task, we can do it much simpler.
|
|
145
273
|
penguins.remove_nil # => same result as above
|
146
274
|
```
|
147
275
|
|
276
|
+
`DataFrame#group` method can be used for the grouping tasks.
|
277
|
+
|
278
|
+
```ruby
|
279
|
+
starwars = RedAmber::DataFrame.load(URI("https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv"))
|
280
|
+
starwars
|
281
|
+
|
282
|
+
# =>
|
283
|
+
#<RedAmber::DataFrame : 87 x 12 Vectors, 0x000000000000607c>
|
284
|
+
unnamed1 name height mass hair_color skin_color eye_color ... species
|
285
|
+
<int64> <string> <int64> <double> <string> <string> <string> ... <string>
|
286
|
+
1 1 Luke Skywalker 172 77.0 blond fair blue ... Human
|
287
|
+
2 2 C-3PO 167 75.0 NA gold yellow ... Droid
|
288
|
+
3 3 R2-D2 96 32.0 NA white, blue red ... Droid
|
289
|
+
4 4 Darth Vader 202 136.0 none white yellow ... Human
|
290
|
+
5 5 Leia Organa 150 49.0 brown light brown ... Human
|
291
|
+
: : : : : : : : ... :
|
292
|
+
85 85 BB8 (nil) (nil) none none black ... Droid
|
293
|
+
86 86 Captain Phasma (nil) (nil) unknown unknown unknown ... NA
|
294
|
+
87 87 Padmé Amidala 165 45.0 brown light brown ... Human
|
295
|
+
|
296
|
+
grouped = starwars.group(:species) { [count(:species), mean(:height, :mass)] }
|
297
|
+
grouped.slice { v(:count) > 1 }
|
298
|
+
|
299
|
+
# =>
|
300
|
+
#<RedAmber::DataFrame : 9 x 4 Vectors, 0x000000000006e848>
|
301
|
+
species count mean(height) mean(mass)
|
302
|
+
<string> <int64> <double> <double>
|
303
|
+
1 Human 35 176.6 82.8
|
304
|
+
2 Droid 6 131.2 69.8
|
305
|
+
3 Wookiee 2 231.0 124.0
|
306
|
+
4 Gungan 3 208.7 74.0
|
307
|
+
5 NA 4 181.3 48.0
|
308
|
+
: : : : :
|
309
|
+
7 Twi'lek 2 179.0 55.0
|
310
|
+
8 Mirialan 2 168.0 53.1
|
311
|
+
9 Kaminoan 2 221.0 88.0
|
312
|
+
```
|
313
|
+
|
148
314
|
See [DataFrame.md](doc/DataFrame.md) for details.
|
149
315
|
|
150
316
|
|
151
317
|
## `RedAmber::Vector`
|
152
318
|
|
153
319
|
Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
320
|
+
Method `RedAmber::DataFrame#[key]` returns a Vector with the key `key`.
|
154
321
|
|
155
322
|
```ruby
|
156
323
|
penguins[:bill_length_mm]
|
@@ -161,11 +328,34 @@ penguins[:bill_length_mm]
|
|
161
328
|
|
162
329
|
Vectors accepts some [functional methods from Arrow](https://arrow.apache.org/docs/cpp/compute.html).
|
163
330
|
|
331
|
+
This is an element-wise comparison and returns a boolean Vector of same size.
|
332
|
+
|
333
|
+

|
334
|
+
|
335
|
+
```ruby
|
336
|
+
penguins[:bill_length_mm] < 40
|
337
|
+
|
338
|
+
# =>
|
339
|
+
#<RedAmber::Vector(:boolean, size=344):0x000000000007e7ac>
|
340
|
+
[true, true, false, nil, true, true, true, true, true, false, true, true, false, ... ]
|
341
|
+
```
|
342
|
+
|
343
|
+
Next example returns aggregated result.
|
344
|
+
|
345
|
+

|
346
|
+
|
347
|
+
```ruby
|
348
|
+
penguins[:bill_length_mm].mean
|
349
|
+
43.92192982456141
|
350
|
+
# =>
|
351
|
+
|
352
|
+
```
|
353
|
+
|
164
354
|
See [Vector.md](doc/Vector.md) for details.
|
165
355
|
|
166
356
|
## Jupyter notebook
|
167
357
|
|
168
|
-
[
|
358
|
+
[53 Examples of Red Amber](doc/examples_of_red_amber.ipynb)
|
169
359
|
|
170
360
|
## Development
|
171
361
|
|
data/doc/DataFrame.md
CHANGED
@@ -860,16 +860,10 @@ penguins.to_rover
|
|
860
860
|
|
861
861
|
## Grouping
|
862
862
|
|
863
|
-
### `group(
|
864
|
-
|
865
|
-
(
|
866
|
-
This API will change in the future version. Especcially I want to change:
|
867
|
-
- Order of the column of the result (aggregation_keys should be the first)
|
868
|
-
- DataFrame#group will accept a block (heronshoes/red_amber #28)
|
869
|
-
)
|
863
|
+
### `group(group_keys)`
|
870
864
|
|
871
865
|
`group` creates a class `Group` object. `Group` accepts functions below as a method.
|
872
|
-
Method accepts options as `
|
866
|
+
Method accepts options as `group_keys`.
|
873
867
|
|
874
868
|
Available functions are:
|
875
869
|
|
@@ -889,8 +883,8 @@ penguins.to_rover
|
|
889
883
|
- [ ] tdigest
|
890
884
|
- ✓ variance
|
891
885
|
|
892
|
-
For the each group of `
|
893
|
-
|
886
|
+
For the each group of `group_keys`, the aggregation `function` is applied and returns a new dataframe with aggregated keys according to `summary_keys`.
|
887
|
+
Summary key names are provided by `function(summary_keys)` style.
|
894
888
|
|
895
889
|
This is an example of grouping of famous STARWARS dataset.
|
896
890
|
|
@@ -900,18 +894,18 @@ penguins.to_rover
|
|
900
894
|
starwars
|
901
895
|
|
902
896
|
# =>
|
903
|
-
#<RedAmber::DataFrame : 87 x 12 Vectors,
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
897
|
+
#<RedAmber::DataFrame : 87 x 12 Vectors, 0x0000000000005a50>
|
898
|
+
unnamed1 name height mass hair_color skin_color eye_color ... species
|
899
|
+
<int64> <string> <int64> <double> <string> <string> <string> ... <string>
|
900
|
+
1 1 Luke Skywalker 172 77.0 blond fair blue ... Human
|
901
|
+
2 2 C-3PO 167 75.0 NA gold yellow ... Droid
|
902
|
+
3 3 R2-D2 96 32.0 NA white, blue red ... Droid
|
903
|
+
4 4 Darth Vader 202 136.0 none white yellow ... Human
|
904
|
+
5 5 Leia Organa 150 49.0 brown light brown ... Human
|
905
|
+
: : : : : : : : ... :
|
906
|
+
85 85 BB8 (nil) (nil) none none black ... Droid
|
907
|
+
86 86 Captain Phasma (nil) (nil) unknown unknown unknown ... NA
|
908
|
+
87 87 Padmé Amidala 165 45.0 brown light brown ... Human
|
915
909
|
|
916
910
|
starwars.tdr(12)
|
917
911
|
|
@@ -919,7 +913,7 @@ penguins.to_rover
|
|
919
913
|
RedAmber::DataFrame : 87 x 12 Vectors
|
920
914
|
Vectors : 4 numeric, 8 strings
|
921
915
|
# key type level data_preview
|
922
|
-
1 :
|
916
|
+
1 :unnamed1 int64 87 [1, 2, 3, 4, 5, ... ]
|
923
917
|
2 :name string 87 ["Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Organa", ... ]
|
924
918
|
3 :height int64 46 [172, 167, 96, 202, 150, ... ], 6 nils
|
925
919
|
4 :mass double 39 [77.0, 75.0, 32.0, 136.0, 49.0, ... ], 28 nils
|
@@ -933,74 +927,70 @@ penguins.to_rover
|
|
933
927
|
12 :species string 38 ["Human", "Droid", "Droid", "Human", "Human", ... ]
|
934
928
|
```
|
935
929
|
|
936
|
-
We can
|
930
|
+
We can group by `:species` and calculate the count.
|
937
931
|
|
938
932
|
```ruby
|
939
|
-
|
940
|
-
grouped
|
933
|
+
starwars.group(:species).count(:species)
|
941
934
|
|
942
935
|
# =>
|
943
|
-
#<RedAmber::DataFrame : 38 x
|
944
|
-
|
945
|
-
|
946
|
-
1
|
947
|
-
2
|
948
|
-
3
|
949
|
-
4
|
950
|
-
5
|
951
|
-
:
|
952
|
-
36
|
953
|
-
37
|
954
|
-
38
|
936
|
+
#<RedAmber::DataFrame : 38 x 2 Vectors, 0x000000000001d6f0>
|
937
|
+
species count
|
938
|
+
<string> <int64>
|
939
|
+
1 Human 35
|
940
|
+
2 Droid 6
|
941
|
+
3 Wookiee 2
|
942
|
+
4 Rodian 1
|
943
|
+
5 Hutt 1
|
944
|
+
: : :
|
945
|
+
36 Kaleesh 1
|
946
|
+
37 Pau'an 1
|
947
|
+
38 Kel Dor 1
|
955
948
|
```
|
956
949
|
|
957
|
-
|
958
|
-
|
950
|
+
We can also calculate the mean of `:mass` and `:height` together.
|
951
|
+
|
959
952
|
```ruby
|
960
|
-
|
961
|
-
grouped = grouped.slice(count > 1)
|
953
|
+
grouped = starwars.group(:species) { [count(:species), mean(:height, :mass)] }
|
962
954
|
|
963
955
|
# =>
|
964
|
-
#<RedAmber::DataFrame :
|
965
|
-
mean(
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
956
|
+
#<RedAmber::DataFrame : 38 x 4 Vectors, 0x00000000000407cc>
|
957
|
+
species count mean(height) mean(mass)
|
958
|
+
<string> <int64> <double> <double>
|
959
|
+
1 Human 35 176.6 82.8
|
960
|
+
2 Droid 6 131.2 69.8
|
961
|
+
3 Wookiee 2 231.0 124.0
|
962
|
+
4 Rodian 1 173.0 74.0
|
963
|
+
5 Hutt 1 175.0 1358.0
|
964
|
+
: : : : :
|
965
|
+
36 Kaleesh 1 216.0 159.0
|
966
|
+
37 Pau'an 1 206.0 80.0
|
967
|
+
38 Kel Dor 1 188.0 80.0
|
976
968
|
```
|
977
969
|
|
978
|
-
|
979
|
-
|
980
|
-
```ruby
|
981
|
-
grouped.assign(count: count[count > 1]).pick { [2,3,0,1].map{ |i| keys[i] } }
|
970
|
+
Select rows for count > 1.
|
982
971
|
|
972
|
+
```ruby
|
973
|
+
grouped.slice(grouped[:count] > 1)
|
974
|
+
|
983
975
|
# =>
|
984
|
-
#<RedAmber::DataFrame : 9 x 4 Vectors,
|
985
|
-
species count mean(
|
986
|
-
<string> <
|
987
|
-
1 Human 35 82.8
|
988
|
-
2 Droid 6 69.8
|
989
|
-
3 Wookiee 2 124.0
|
990
|
-
4 Gungan 3 74.0
|
991
|
-
5 NA 4 48.0
|
992
|
-
: : : :
|
993
|
-
7 Twi'lek 2 55.0
|
994
|
-
8 Mirialan 2 53.1
|
995
|
-
9 Kaminoan 2 88.0
|
976
|
+
#<RedAmber::DataFrame : 9 x 4 Vectors, 0x000000000004c270>
|
977
|
+
species count mean(height) mean(mass)
|
978
|
+
<string> <int64> <double> <double>
|
979
|
+
1 Human 35 176.6 82.8
|
980
|
+
2 Droid 6 131.2 69.8
|
981
|
+
3 Wookiee 2 231.0 124.0
|
982
|
+
4 Gungan 3 208.7 74.0
|
983
|
+
5 NA 4 181.3 48.0
|
984
|
+
: : : : :
|
985
|
+
7 Twi'lek 2 179.0 55.0
|
986
|
+
8 Mirialan 2 168.0 53.1
|
987
|
+
9 Kaminoan 2 221.0 88.0
|
996
988
|
```
|
997
989
|
|
998
990
|
## Combining DataFrames
|
999
991
|
|
1000
992
|
- [ ] Combining rows to a dataframe
|
1001
993
|
|
1002
|
-
- [ ] Add vars
|
1003
|
-
|
1004
994
|
- [ ] Inner join
|
1005
995
|
|
1006
996
|
- [ ] Left join
|
@@ -1009,6 +999,6 @@ penguins.to_rover
|
|
1009
999
|
|
1010
1000
|
- [ ] One-hot encoding
|
1011
1001
|
|
1012
|
-
## Iteration
|
1002
|
+
## Iteration
|
1013
1003
|
|
1014
1004
|
- [ ] each_rows
|
data/doc/Vector.md
CHANGED
@@ -500,3 +500,28 @@ vector.is_in(1, -1)
|
|
500
500
|
#<RedAmber::Vector(:boolean, size=3):0x000000000000f320>
|
501
501
|
[true, false, true]
|
502
502
|
```
|
503
|
+
|
504
|
+
### `shift(amount = 1, fill: nil)`
|
505
|
+
|
506
|
+
Shift vector's values by specified `amount`. Shifted space is filled by value `fill`.
|
507
|
+
|
508
|
+
```ruby
|
509
|
+
vector = RedAmber::Vector.new([1, 2, 3, 4, 5])
|
510
|
+
vector.shift
|
511
|
+
|
512
|
+
# =>
|
513
|
+
#<RedAmber::Vector(:uint8, size=5):0x00000000000072d8>
|
514
|
+
[nil, 1, 2, 3, 4]
|
515
|
+
|
516
|
+
vector.shift(-2)
|
517
|
+
|
518
|
+
# =>
|
519
|
+
#<RedAmber::Vector(:uint8, size=5):0x0000000000009970>
|
520
|
+
[3, 4, 5, nil, nil]
|
521
|
+
|
522
|
+
vector.shift(fill: Float::NAN)
|
523
|
+
|
524
|
+
# =>
|
525
|
+
#<RedAmber::Vector(:double, size=5):0x0000000000011d3c>
|
526
|
+
[NaN, 1.0, 2.0, 3.0, 4.0]
|
527
|
+
```
|