red_amber 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +7 -1
- data/.rubocop_todo.yml +2 -15
- data/.yardopts +1 -0
- data/CHANGELOG.md +35 -0
- data/Gemfile +1 -0
- data/README.md +206 -16
- data/doc/DataFrame.md +63 -73
- data/doc/Vector.md +25 -0
- data/doc/{47_examples_of_red_amber.ipynb → examples_of_red_amber.ipynb} +693 -111
- data/lib/red_amber/data_frame.rb +26 -8
- data/lib/red_amber/data_frame_displayable.rb +7 -5
- data/lib/red_amber/group.rb +25 -27
- data/lib/red_amber/vector_selectable.rb +2 -0
- data/lib/red_amber/vector_updatable.rb +22 -1
- data/lib/red_amber/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3853e70f378cac65013a3bcfc51a2d55cb70cc494f3f3b70675bed944cc15b49
|
4
|
+
data.tar.gz: 3c65999cf978f1edf8c2c7fcce9a0ccb192d4da051f34fa0bf3f66ddc178eb1c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fac66ba0bf5955cfe0d21a51b90ec16407182b9053e9b586dfe9f8e2526de4e90efecdd8eba1e8b3c99b12fc44544c82fb2f6af4b666b97876a64a6ee4deedf1
|
7
|
+
data.tar.gz: 1a4cc526ce9f097438f2b7d018552a4cd6aaa2d900012297cd1777c4b9e39063cc2988af91c138e93f291a56175aefb6a6b00c211f9b9c5bd38d75d6bc40acb9
|
data/.rubocop.yml
CHANGED
@@ -43,6 +43,11 @@ Lint/BinaryOperatorWithIdenticalOperands:
|
|
43
43
|
Exclude:
|
44
44
|
- 'test/test_vector_function.rb'
|
45
45
|
|
46
|
+
# Need for test with empty block
|
47
|
+
Lint/EmptyBlock:
|
48
|
+
Exclude:
|
49
|
+
- 'test/test_group.rb'
|
50
|
+
|
46
51
|
# Max: 120
|
47
52
|
Layout/LineLength:
|
48
53
|
Max: 118
|
@@ -78,9 +83,10 @@ Metrics/ClassLength:
|
|
78
83
|
Metrics/CyclomaticComplexity:
|
79
84
|
Max: 12
|
80
85
|
Exclude:
|
86
|
+
- 'lib/red_amber/data_frame_displayable.rb' # Max: 18
|
81
87
|
- 'lib/red_amber/data_frame_selectable.rb' # Max: 14
|
88
|
+
- 'lib/red_amber/vector_selectable.rb' # Max: 13
|
82
89
|
- 'lib/red_amber/vector_updatable.rb' # Max: 14
|
83
|
-
- 'lib/red_amber/data_frame_displayable.rb' # Max: 18
|
84
90
|
|
85
91
|
# Max: 10
|
86
92
|
Metrics/MethodLength:
|
data/.rubocop_todo.yml
CHANGED
@@ -1,15 +1,2 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
# on 2022-05-08 02:37:36 UTC using RuboCop version 1.27.0.
|
4
|
-
# The point is for the user to remove these configuration records
|
5
|
-
# one by one as the offenses are removed from the code base.
|
6
|
-
# Note that changes in the inspected code, or installation of new
|
7
|
-
# versions of RuboCop, may require this file to be generated again.
|
8
|
-
|
9
|
-
# Offense count: 1
|
10
|
-
# This cop supports unsafe auto-correction (--auto-correct-all).
|
11
|
-
# Configuration parameters: EnforcedStyle.
|
12
|
-
# SupportedStyles: forbid_for_all_comparison_operators, forbid_for_equality_operators_only, require_for_all_comparison_operators, require_for_equality_operators_only
|
13
|
-
Style/YodaCondition:
|
14
|
-
Exclude:
|
15
|
-
- 'lib/red_amber/data_frame.rb'
|
1
|
+
# We will use cops to detect bugs in an early stage
|
2
|
+
# Feel free to use .rubocop_todo.yml by --auto-gen-config
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--output-dir doc/yard
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,41 @@
|
|
2
2
|
|
3
3
|
- Supports Arrow 9.0.0
|
4
4
|
|
5
|
+
## [0.1.8] - 2022-08-04 (experimental)
|
6
|
+
|
7
|
+
- Bug fixes
|
8
|
+
|
9
|
+
- Fix unnamed column in table formatter (#52)
|
10
|
+
- Fix DataFrame#key?, DataFrame#key_index when @keys.nil? (#52)
|
11
|
+
- Align order of replacer in Vector#replace (#53, resolved #38)
|
12
|
+
|
13
|
+
- New features and improvements
|
14
|
+
|
15
|
+
- Refine DataFrame.new for empty arguments (#50)
|
16
|
+
- Delete .rubocop_todo.yml for not to use yoda condition (#50)
|
17
|
+
|
18
|
+
- Refine Group (#52, resolved #28)
|
19
|
+
- Refine Group methods creation
|
20
|
+
- Make group key at first(left)
|
21
|
+
- Show only one group count when same counts
|
22
|
+
- Add block acceptability for group
|
23
|
+
- Rename empty key to :unnamed in DataFrame.new
|
24
|
+
- Rename Group#aggregated_by to #summarize (#54)
|
25
|
+
|
26
|
+
- Add Vector#shift (#51)
|
27
|
+
|
28
|
+
- Vector#[] accepts Range as an argument (#51)
|
29
|
+
|
30
|
+
- Update documents
|
31
|
+
|
32
|
+
- Add support for yard (#54)
|
33
|
+
|
34
|
+
- Renew jupyter notebook '53 examples' (#54)
|
35
|
+
|
36
|
+
- Add more examples and images in README (#52)
|
37
|
+
- Add document of group manipulations in README (#52)
|
38
|
+
- Renew DF#group document in DataFrame.md (#52)
|
39
|
+
|
5
40
|
## [0.1.7] - 2022-07-15 (experimental)
|
6
41
|
|
7
42
|
- Bug fixes
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -56,7 +56,7 @@ require 'red_amber' # require 'red-amber' is also OK.
|
|
56
56
|
require 'datasets-arrow'
|
57
57
|
|
58
58
|
arrow = Datasets::Penguins.new.to_arrow
|
59
|
-
RedAmber::DataFrame.new(arrow)
|
59
|
+
penguins = RedAmber::DataFrame.new(arrow)
|
60
60
|
|
61
61
|
# =>
|
62
62
|
#<RedAmber::DataFrame : 344 x 8 Vectors, 0x0000000000013790>
|
@@ -78,28 +78,71 @@ RedAmber::DataFrame.new(arrow)
|
|
78
78
|
|
79
79
|
For example, `DataFrame#pick` accepts keys as an argument and returns a sub DataFrame.
|
80
80
|
|
81
|
+
![pick method image](doc/image/dataframe/pick.png)
|
82
|
+
|
81
83
|
```ruby
|
82
|
-
|
84
|
+
penguins.keys
|
85
|
+
# =>
|
86
|
+
[:species,
|
87
|
+
:island,
|
88
|
+
:bill_length_mm,
|
89
|
+
:bill_depth_mm,
|
90
|
+
:flipper_length_mm,
|
91
|
+
:body_mass_g,
|
92
|
+
:sex,
|
93
|
+
:year]
|
94
|
+
|
95
|
+
df = penguins.pick(:species, :island, :body_mass_g)
|
83
96
|
df
|
84
97
|
|
85
98
|
# =>
|
86
|
-
#<RedAmber::DataFrame : 344 x
|
87
|
-
body_mass_g
|
88
|
-
|
89
|
-
1 3750
|
90
|
-
2 3800
|
91
|
-
3 3250
|
92
|
-
4 (nil)
|
93
|
-
5 3450
|
94
|
-
:
|
95
|
-
342
|
96
|
-
343
|
99
|
+
#<RedAmber::DataFrame : 344 x 3 Vectors, 0x000000000003cc1c>
|
100
|
+
species island body_mass_g
|
101
|
+
<string> <string> <uint16>
|
102
|
+
1 Adelie Torgersen 3750
|
103
|
+
2 Adelie Torgersen 3800
|
104
|
+
3 Adelie Torgersen 3250
|
105
|
+
4 Adelie Torgersen (nil)
|
106
|
+
5 Adelie Torgersen 3450
|
107
|
+
: : : :
|
108
|
+
342 Gentoo Biscoe 5750
|
109
|
+
343 Gentoo Biscoe 5200
|
110
|
+
344 Gentoo Biscoe 5400
|
111
|
+
```
|
112
|
+
|
113
|
+
`DataFrame#drop` drops some columns to create a remainer DataFrame.
|
114
|
+
|
115
|
+
![drop method image](doc/image/dataframe/drop.png)
|
116
|
+
|
117
|
+
You can specify by keys or a boolean array (same size as n_keys).
|
118
|
+
|
119
|
+
```ruby
|
120
|
+
# Same as df.drop(:species, :island)
|
121
|
+
df = df.drop(true, true, false)
|
122
|
+
|
123
|
+
# =>
|
124
|
+
#<RedAmber::DataFrame : 344 x 1 Vector, 0x0000000000048760>
|
125
|
+
body_mass_g
|
126
|
+
<uint16>
|
127
|
+
1 3750
|
128
|
+
2 3800
|
129
|
+
3 3250
|
130
|
+
4 (nil)
|
131
|
+
5 3450
|
132
|
+
: :
|
133
|
+
342 5750
|
134
|
+
343 5200
|
97
135
|
344 5400
|
98
136
|
```
|
99
137
|
|
138
|
+
Arrow data is immutable, so these methods always return an new object.
|
139
|
+
|
100
140
|
`DataFrame#assign` creates new variables (column in the table).
|
101
141
|
|
142
|
+
![assign method image](doc/image/dataframe/assign.png)
|
143
|
+
|
102
144
|
```ruby
|
145
|
+
# New column is created because ':body_mass_kg' is a new key.
|
103
146
|
df.assign(:body_mass_kg => df[:body_mass_g] / 1000.0)
|
104
147
|
|
105
148
|
# =>
|
@@ -117,12 +160,97 @@ df.assign(:body_mass_kg => df[:body_mass_g] / 1000.0)
|
|
117
160
|
344 5400 5.4
|
118
161
|
```
|
119
162
|
|
163
|
+
`DataFrame#slice` selects rows (observations) to create a sub DataFrame.
|
164
|
+
|
165
|
+
![slice method image](doc/image/dataframe/slice.png)
|
166
|
+
|
167
|
+
```ruby
|
168
|
+
# returns 5 rows at the start and 5 rows from the end
|
169
|
+
penguins.slice(0...5, -5..-1)
|
170
|
+
|
171
|
+
# =>
|
172
|
+
#<RedAmber::DataFrame : 10 x 8 Vectors, 0x0000000000042be4>
|
173
|
+
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
174
|
+
<string> <string> <double> <double> <uint8> ... <uint16>
|
175
|
+
1 Adelie Torgersen 39.1 18.7 181 ... 2007
|
176
|
+
2 Adelie Torgersen 39.5 17.4 186 ... 2007
|
177
|
+
3 Adelie Torgersen 40.3 18.0 195 ... 2007
|
178
|
+
4 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
179
|
+
5 Adelie Torgersen 36.7 19.3 193 ... 2007
|
180
|
+
: : : : : : ... :
|
181
|
+
8 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
182
|
+
9 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
183
|
+
10 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
184
|
+
```
|
185
|
+
|
186
|
+
`DataFrame#remove` rejects rows (observations) to create a remainer DataFrame.
|
187
|
+
|
188
|
+
![remove method image](doc/image/dataframe/remove.png)
|
189
|
+
|
190
|
+
```ruby
|
191
|
+
# penguins[:bill_length_mm] < 40 returns a boolean Vector
|
192
|
+
penguins.remove(penguins[:bill_length_mm] < 40)
|
193
|
+
|
194
|
+
# =>
|
195
|
+
#<RedAmber::DataFrame : 244 x 8 Vectors, 0x000000000007d6f4>
|
196
|
+
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
197
|
+
<string> <string> <double> <double> <uint8> ... <uint16>
|
198
|
+
1 Adelie Torgersen 40.3 18.0 195 ... 2007
|
199
|
+
2 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
200
|
+
3 Adelie Torgersen 42.0 20.2 190 ... 2007
|
201
|
+
4 Adelie Torgersen 41.1 17.6 182 ... 2007
|
202
|
+
5 Adelie Torgersen 42.5 20.7 197 ... 2007
|
203
|
+
: : : : : : ... :
|
204
|
+
242 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
205
|
+
243 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
206
|
+
244 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
207
|
+
```
|
208
|
+
|
120
209
|
DataFrame manipulating methods like `pick`, `drop`, `slice`, `remove`, `rename` and `assign` accept a block.
|
121
210
|
|
122
|
-
This is
|
211
|
+
This example is usage of block to update numeric columns.
|
123
212
|
|
124
213
|
```ruby
|
125
|
-
|
214
|
+
df = RedAmber::DataFrame.new(
|
215
|
+
integer: [0, 1, 2, 3, nil],
|
216
|
+
float: [0.0, 1.1, 2.2, Float::NAN, nil],
|
217
|
+
string: ['A', 'B', 'C', 'D', nil],
|
218
|
+
boolean: [true, false, true, false, nil])
|
219
|
+
df
|
220
|
+
|
221
|
+
# =>
|
222
|
+
#<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000003131c>
|
223
|
+
integer float string boolean
|
224
|
+
<uint8> <double> <string> <boolean>
|
225
|
+
1 0 0.0 A true
|
226
|
+
2 1 1.1 B false
|
227
|
+
3 2 2.2 C true
|
228
|
+
4 3 NaN D false
|
229
|
+
5 (nil) (nil) (nil) (nil)
|
230
|
+
|
231
|
+
df.assign do
|
232
|
+
vectors.each_with_object({}) do |v, h|
|
233
|
+
h[v.key] = -v if v.numeric?
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
# =>
|
238
|
+
#<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000009a1b4>
|
239
|
+
integer float string boolean
|
240
|
+
<uint8> <double> <string> <boolean>
|
241
|
+
1 0 -0.0 A true
|
242
|
+
2 255 -1.1 B false
|
243
|
+
3 254 -2.2 C true
|
244
|
+
4 253 NaN D false
|
245
|
+
5 (nil) (nil) (nil) (nil)
|
246
|
+
```
|
247
|
+
|
248
|
+
Negate (-@) method of unsigned integer Vector returns complement.
|
249
|
+
|
250
|
+
Next example is to eliminate observations (row in the table) containing nil.
|
251
|
+
|
252
|
+
```ruby
|
253
|
+
# remove all observations containing nil
|
126
254
|
nil_removed = penguins.remove { vectors.map(&:is_nil).reduce(&:|) }
|
127
255
|
nil_removed.tdr
|
128
256
|
# =>
|
@@ -145,12 +273,51 @@ For this frequently needed task, we can do it much simpler.
|
|
145
273
|
penguins.remove_nil # => same result as above
|
146
274
|
```
|
147
275
|
|
276
|
+
`DataFrame#group` method can be used for the grouping tasks.
|
277
|
+
|
278
|
+
```ruby
|
279
|
+
starwars = RedAmber::DataFrame.load(URI("https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv"))
|
280
|
+
starwars
|
281
|
+
|
282
|
+
# =>
|
283
|
+
#<RedAmber::DataFrame : 87 x 12 Vectors, 0x000000000000607c>
|
284
|
+
unnamed1 name height mass hair_color skin_color eye_color ... species
|
285
|
+
<int64> <string> <int64> <double> <string> <string> <string> ... <string>
|
286
|
+
1 1 Luke Skywalker 172 77.0 blond fair blue ... Human
|
287
|
+
2 2 C-3PO 167 75.0 NA gold yellow ... Droid
|
288
|
+
3 3 R2-D2 96 32.0 NA white, blue red ... Droid
|
289
|
+
4 4 Darth Vader 202 136.0 none white yellow ... Human
|
290
|
+
5 5 Leia Organa 150 49.0 brown light brown ... Human
|
291
|
+
: : : : : : : : ... :
|
292
|
+
85 85 BB8 (nil) (nil) none none black ... Droid
|
293
|
+
86 86 Captain Phasma (nil) (nil) unknown unknown unknown ... NA
|
294
|
+
87 87 Padmé Amidala 165 45.0 brown light brown ... Human
|
295
|
+
|
296
|
+
grouped = starwars.group(:species) { [count(:species), mean(:height, :mass)] }
|
297
|
+
grouped.slice { v(:count) > 1 }
|
298
|
+
|
299
|
+
# =>
|
300
|
+
#<RedAmber::DataFrame : 9 x 4 Vectors, 0x000000000006e848>
|
301
|
+
species count mean(height) mean(mass)
|
302
|
+
<string> <int64> <double> <double>
|
303
|
+
1 Human 35 176.6 82.8
|
304
|
+
2 Droid 6 131.2 69.8
|
305
|
+
3 Wookiee 2 231.0 124.0
|
306
|
+
4 Gungan 3 208.7 74.0
|
307
|
+
5 NA 4 181.3 48.0
|
308
|
+
: : : : :
|
309
|
+
7 Twi'lek 2 179.0 55.0
|
310
|
+
8 Mirialan 2 168.0 53.1
|
311
|
+
9 Kaminoan 2 221.0 88.0
|
312
|
+
```
|
313
|
+
|
148
314
|
See [DataFrame.md](doc/DataFrame.md) for details.
|
149
315
|
|
150
316
|
|
151
317
|
## `RedAmber::Vector`
|
152
318
|
|
153
319
|
Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
320
|
+
Method `RedAmber::DataFrame#[key]` returns a Vector with the key `key`.
|
154
321
|
|
155
322
|
```ruby
|
156
323
|
penguins[:bill_length_mm]
|
@@ -161,11 +328,34 @@ penguins[:bill_length_mm]
|
|
161
328
|
|
162
329
|
Vectors accepts some [functional methods from Arrow](https://arrow.apache.org/docs/cpp/compute.html).
|
163
330
|
|
331
|
+
This is an element-wise comparison and returns a boolean Vector of same size.
|
332
|
+
|
333
|
+
![unary element-wise](doc/image/vector/unary_element_wise.png)
|
334
|
+
|
335
|
+
```ruby
|
336
|
+
penguins[:bill_length_mm] < 40
|
337
|
+
|
338
|
+
# =>
|
339
|
+
#<RedAmber::Vector(:boolean, size=344):0x000000000007e7ac>
|
340
|
+
[true, true, false, nil, true, true, true, true, true, false, true, true, false, ... ]
|
341
|
+
```
|
342
|
+
|
343
|
+
Next example returns aggregated result.
|
344
|
+
|
345
|
+
![unary aggregation](doc/image/vector/unary_aggregation.png)
|
346
|
+
|
347
|
+
```ruby
|
348
|
+
penguins[:bill_length_mm].mean
|
349
|
+
43.92192982456141
|
350
|
+
# =>
|
351
|
+
|
352
|
+
```
|
353
|
+
|
164
354
|
See [Vector.md](doc/Vector.md) for details.
|
165
355
|
|
166
356
|
## Jupyter notebook
|
167
357
|
|
168
|
-
[
|
358
|
+
[53 Examples of Red Amber](doc/examples_of_red_amber.ipynb)
|
169
359
|
|
170
360
|
## Development
|
171
361
|
|
data/doc/DataFrame.md
CHANGED
@@ -860,16 +860,10 @@ penguins.to_rover
|
|
860
860
|
|
861
861
|
## Grouping
|
862
862
|
|
863
|
-
### `group(
|
864
|
-
|
865
|
-
(
|
866
|
-
This API will change in the future version. Especcially I want to change:
|
867
|
-
- Order of the column of the result (aggregation_keys should be the first)
|
868
|
-
- DataFrame#group will accept a block (heronshoes/red_amber #28)
|
869
|
-
)
|
863
|
+
### `group(group_keys)`
|
870
864
|
|
871
865
|
`group` creates a class `Group` object. `Group` accepts functions below as a method.
|
872
|
-
Method accepts options as `
|
866
|
+
Method accepts options as `group_keys`.
|
873
867
|
|
874
868
|
Available functions are:
|
875
869
|
|
@@ -889,8 +883,8 @@ penguins.to_rover
|
|
889
883
|
- [ ] tdigest
|
890
884
|
- ✓ variance
|
891
885
|
|
892
|
-
For the each group of `
|
893
|
-
|
886
|
+
For the each group of `group_keys`, the aggregation `function` is applied and returns a new dataframe with aggregated keys according to `summary_keys`.
|
887
|
+
Summary key names are provided by `function(summary_keys)` style.
|
894
888
|
|
895
889
|
This is an example of grouping of famous STARWARS dataset.
|
896
890
|
|
@@ -900,18 +894,18 @@ penguins.to_rover
|
|
900
894
|
starwars
|
901
895
|
|
902
896
|
# =>
|
903
|
-
#<RedAmber::DataFrame : 87 x 12 Vectors,
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
897
|
+
#<RedAmber::DataFrame : 87 x 12 Vectors, 0x0000000000005a50>
|
898
|
+
unnamed1 name height mass hair_color skin_color eye_color ... species
|
899
|
+
<int64> <string> <int64> <double> <string> <string> <string> ... <string>
|
900
|
+
1 1 Luke Skywalker 172 77.0 blond fair blue ... Human
|
901
|
+
2 2 C-3PO 167 75.0 NA gold yellow ... Droid
|
902
|
+
3 3 R2-D2 96 32.0 NA white, blue red ... Droid
|
903
|
+
4 4 Darth Vader 202 136.0 none white yellow ... Human
|
904
|
+
5 5 Leia Organa 150 49.0 brown light brown ... Human
|
905
|
+
: : : : : : : : ... :
|
906
|
+
85 85 BB8 (nil) (nil) none none black ... Droid
|
907
|
+
86 86 Captain Phasma (nil) (nil) unknown unknown unknown ... NA
|
908
|
+
87 87 Padmé Amidala 165 45.0 brown light brown ... Human
|
915
909
|
|
916
910
|
starwars.tdr(12)
|
917
911
|
|
@@ -919,7 +913,7 @@ penguins.to_rover
|
|
919
913
|
RedAmber::DataFrame : 87 x 12 Vectors
|
920
914
|
Vectors : 4 numeric, 8 strings
|
921
915
|
# key type level data_preview
|
922
|
-
1 :
|
916
|
+
1 :unnamed1 int64 87 [1, 2, 3, 4, 5, ... ]
|
923
917
|
2 :name string 87 ["Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Organa", ... ]
|
924
918
|
3 :height int64 46 [172, 167, 96, 202, 150, ... ], 6 nils
|
925
919
|
4 :mass double 39 [77.0, 75.0, 32.0, 136.0, 49.0, ... ], 28 nils
|
@@ -933,74 +927,70 @@ penguins.to_rover
|
|
933
927
|
12 :species string 38 ["Human", "Droid", "Droid", "Human", "Human", ... ]
|
934
928
|
```
|
935
929
|
|
936
|
-
We can
|
930
|
+
We can group by `:species` and calculate the count.
|
937
931
|
|
938
932
|
```ruby
|
939
|
-
|
940
|
-
grouped
|
933
|
+
starwars.group(:species).count(:species)
|
941
934
|
|
942
935
|
# =>
|
943
|
-
#<RedAmber::DataFrame : 38 x
|
944
|
-
|
945
|
-
|
946
|
-
1
|
947
|
-
2
|
948
|
-
3
|
949
|
-
4
|
950
|
-
5
|
951
|
-
:
|
952
|
-
36
|
953
|
-
37
|
954
|
-
38
|
936
|
+
#<RedAmber::DataFrame : 38 x 2 Vectors, 0x000000000001d6f0>
|
937
|
+
species count
|
938
|
+
<string> <int64>
|
939
|
+
1 Human 35
|
940
|
+
2 Droid 6
|
941
|
+
3 Wookiee 2
|
942
|
+
4 Rodian 1
|
943
|
+
5 Hutt 1
|
944
|
+
: : :
|
945
|
+
36 Kaleesh 1
|
946
|
+
37 Pau'an 1
|
947
|
+
38 Kel Dor 1
|
955
948
|
```
|
956
949
|
|
957
|
-
|
958
|
-
|
950
|
+
We can also calculate the mean of `:mass` and `:height` together.
|
951
|
+
|
959
952
|
```ruby
|
960
|
-
|
961
|
-
grouped = grouped.slice(count > 1)
|
953
|
+
grouped = starwars.group(:species) { [count(:species), mean(:height, :mass)] }
|
962
954
|
|
963
955
|
# =>
|
964
|
-
#<RedAmber::DataFrame :
|
965
|
-
mean(
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
956
|
+
#<RedAmber::DataFrame : 38 x 4 Vectors, 0x00000000000407cc>
|
957
|
+
species count mean(height) mean(mass)
|
958
|
+
<string> <int64> <double> <double>
|
959
|
+
1 Human 35 176.6 82.8
|
960
|
+
2 Droid 6 131.2 69.8
|
961
|
+
3 Wookiee 2 231.0 124.0
|
962
|
+
4 Rodian 1 173.0 74.0
|
963
|
+
5 Hutt 1 175.0 1358.0
|
964
|
+
: : : : :
|
965
|
+
36 Kaleesh 1 216.0 159.0
|
966
|
+
37 Pau'an 1 206.0 80.0
|
967
|
+
38 Kel Dor 1 188.0 80.0
|
976
968
|
```
|
977
969
|
|
978
|
-
|
979
|
-
|
980
|
-
```ruby
|
981
|
-
grouped.assign(count: count[count > 1]).pick { [2,3,0,1].map{ |i| keys[i] } }
|
970
|
+
Select rows for count > 1.
|
982
971
|
|
972
|
+
```ruby
|
973
|
+
grouped.slice(grouped[:count] > 1)
|
974
|
+
|
983
975
|
# =>
|
984
|
-
#<RedAmber::DataFrame : 9 x 4 Vectors,
|
985
|
-
species count mean(
|
986
|
-
<string> <
|
987
|
-
1 Human 35 82.8
|
988
|
-
2 Droid 6 69.8
|
989
|
-
3 Wookiee 2 124.0
|
990
|
-
4 Gungan 3 74.0
|
991
|
-
5 NA 4 48.0
|
992
|
-
: : : :
|
993
|
-
7 Twi'lek 2 55.0
|
994
|
-
8 Mirialan 2 53.1
|
995
|
-
9 Kaminoan 2 88.0
|
976
|
+
#<RedAmber::DataFrame : 9 x 4 Vectors, 0x000000000004c270>
|
977
|
+
species count mean(height) mean(mass)
|
978
|
+
<string> <int64> <double> <double>
|
979
|
+
1 Human 35 176.6 82.8
|
980
|
+
2 Droid 6 131.2 69.8
|
981
|
+
3 Wookiee 2 231.0 124.0
|
982
|
+
4 Gungan 3 208.7 74.0
|
983
|
+
5 NA 4 181.3 48.0
|
984
|
+
: : : : :
|
985
|
+
7 Twi'lek 2 179.0 55.0
|
986
|
+
8 Mirialan 2 168.0 53.1
|
987
|
+
9 Kaminoan 2 221.0 88.0
|
996
988
|
```
|
997
989
|
|
998
990
|
## Combining DataFrames
|
999
991
|
|
1000
992
|
- [ ] Combining rows to a dataframe
|
1001
993
|
|
1002
|
-
- [ ] Add vars
|
1003
|
-
|
1004
994
|
- [ ] Inner join
|
1005
995
|
|
1006
996
|
- [ ] Left join
|
@@ -1009,6 +999,6 @@ penguins.to_rover
|
|
1009
999
|
|
1010
1000
|
- [ ] One-hot encoding
|
1011
1001
|
|
1012
|
-
## Iteration
|
1002
|
+
## Iteration
|
1013
1003
|
|
1014
1004
|
- [ ] each_rows
|
data/doc/Vector.md
CHANGED
@@ -500,3 +500,28 @@ vector.is_in(1, -1)
|
|
500
500
|
#<RedAmber::Vector(:boolean, size=3):0x000000000000f320>
|
501
501
|
[true, false, true]
|
502
502
|
```
|
503
|
+
|
504
|
+
### `shift(amount = 1, fill: nil)`
|
505
|
+
|
506
|
+
Shift vector's values by specified `amount`. Shifted space is filled by value `fill`.
|
507
|
+
|
508
|
+
```ruby
|
509
|
+
vector = RedAmber::Vector.new([1, 2, 3, 4, 5])
|
510
|
+
vector.shift
|
511
|
+
|
512
|
+
# =>
|
513
|
+
#<RedAmber::Vector(:uint8, size=5):0x00000000000072d8>
|
514
|
+
[nil, 1, 2, 3, 4]
|
515
|
+
|
516
|
+
vector.shift(-2)
|
517
|
+
|
518
|
+
# =>
|
519
|
+
#<RedAmber::Vector(:uint8, size=5):0x0000000000009970>
|
520
|
+
[3, 4, 5, nil, nil]
|
521
|
+
|
522
|
+
vector.shift(fill: Float::NAN)
|
523
|
+
|
524
|
+
# =>
|
525
|
+
#<RedAmber::Vector(:double, size=5):0x0000000000011d3c>
|
526
|
+
[NaN, 1.0, 2.0, 3.0, 4.0]
|
527
|
+
```
|