red_amber 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -0
- data/CHANGELOG.md +125 -0
- data/README.md +86 -269
- data/doc/DataFrame.md +427 -281
- data/doc/Vector.md +35 -54
- data/doc/image/basic_verbs.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/assign_operation.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/pick_operation.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/rename_operation.png +0 -0
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe/slice_operation.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/group_operation.png +0 -0
- data/doc/image/replace-if_then.png +0 -0
- data/doc/image/reshaping_dataframe.png +0 -0
- data/doc/image/screenshot.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red_amber/data_frame.rb +33 -41
- data/lib/red_amber/data_frame_displayable.rb +59 -6
- data/lib/red_amber/data_frame_loadsave.rb +36 -0
- data/lib/red_amber/data_frame_reshaping.rb +12 -10
- data/lib/red_amber/data_frame_selectable.rb +53 -9
- data/lib/red_amber/data_frame_variable_operation.rb +57 -20
- data/lib/red_amber/group.rb +5 -3
- data/lib/red_amber/helper.rb +20 -18
- data/lib/red_amber/vector.rb +50 -31
- data/lib/red_amber/vector_functions.rb +21 -24
- data/lib/red_amber/vector_selectable.rb +18 -9
- data/lib/red_amber/vector_updatable.rb +6 -3
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -0
- metadata +13 -3
- data/doc/examples_of_red_amber.ipynb +0 -6783
data/doc/DataFrame.md
CHANGED
@@ -14,30 +14,38 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
14
14
|
### `new` from a Hash
|
15
15
|
|
16
16
|
```ruby
|
17
|
-
RedAmber::DataFrame.new(x: [1, 2, 3])
|
17
|
+
df = RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C])
|
18
18
|
```
|
19
19
|
|
20
20
|
### `new` from a schema (by Hash) and data (by Array)
|
21
21
|
|
22
22
|
```ruby
|
23
|
-
RedAmber::DataFrame.new({:
|
23
|
+
RedAmber::DataFrame.new({x: :uint8, y: :string}, [[1, "A"], [2, "B"], [3, "C"]])
|
24
24
|
```
|
25
25
|
|
26
26
|
### `new` from an Arrow::Table
|
27
27
|
|
28
28
|
|
29
29
|
```ruby
|
30
|
-
table = Arrow::Table.new(x: [1, 2, 3])
|
30
|
+
table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
|
31
31
|
RedAmber::DataFrame.new(table)
|
32
32
|
```
|
33
33
|
|
34
|
+
### `new` from an Object which responds to `to_arrow`
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
require "datasets-arrow"
|
38
|
+
dataset = Datasets::Penguins.new
|
39
|
+
RedAmber::DataFrame.new(dataset)
|
40
|
+
```
|
41
|
+
|
34
42
|
### `new` from a Rover::DataFrame
|
35
43
|
|
36
44
|
|
37
45
|
```ruby
|
38
46
|
require 'rover'
|
39
47
|
|
40
|
-
rover = Rover::DataFrame.new(x: [1, 2, 3])
|
48
|
+
rover = Rover::DataFrame.new(x: [1, 2, 3], y: %w[A B C])
|
41
49
|
RedAmber::DataFrame.new(rover)
|
42
50
|
```
|
43
51
|
|
@@ -63,7 +71,7 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
63
71
|
```ruby
|
64
72
|
require 'parquet'
|
65
73
|
|
66
|
-
|
74
|
+
df = RedAmber::DataFrame.load("file.parquet")
|
67
75
|
```
|
68
76
|
|
69
77
|
### `save` (instance method)
|
@@ -79,7 +87,7 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
79
87
|
```ruby
|
80
88
|
require 'parquet'
|
81
89
|
|
82
|
-
|
90
|
+
df.save("file.parquet")
|
83
91
|
```
|
84
92
|
|
85
93
|
## Properties
|
@@ -155,7 +163,25 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
155
163
|
|
156
164
|
### `indices`, `indexes`
|
157
165
|
|
158
|
-
- Returns
|
166
|
+
- Returns indexes in an Array.
|
167
|
+
Accepts an option `start` as the first of indexes.
|
168
|
+
|
169
|
+
```ruby
|
170
|
+
df = RedAmber::DataFrame.new(x: [1, 2, 3, 4, 5])
|
171
|
+
df.indices
|
172
|
+
|
173
|
+
# =>
|
174
|
+
[0, 1, 2, 3, 4]
|
175
|
+
|
176
|
+
df.indices(1)
|
177
|
+
|
178
|
+
# =>
|
179
|
+
[1, 2, 3, 4, 5]
|
180
|
+
|
181
|
+
df.indices(:a)
|
182
|
+
# =>
|
183
|
+
[:a, :b, :c, :d, :e]
|
184
|
+
```
|
159
185
|
|
160
186
|
### `to_h`
|
161
187
|
|
@@ -192,15 +218,15 @@ puts penguins.to_s
|
|
192
218
|
# =>
|
193
219
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
194
220
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
221
|
+
0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
222
|
+
1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
223
|
+
2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
224
|
+
3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
225
|
+
4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
200
226
|
: : : : : : ... :
|
201
|
-
|
202
|
-
|
203
|
-
|
227
|
+
341 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
228
|
+
342 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
229
|
+
343 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
204
230
|
```
|
205
231
|
### `inspect`
|
206
232
|
|
@@ -217,11 +243,11 @@ puts penguins.summary.to_s(width: 82) # needs more width to show all stats in th
|
|
217
243
|
# =>
|
218
244
|
variables count mean std min 25% median 75% max
|
219
245
|
<dictionary> <uint16> <double> <double> <double> <double> <double> <double> <double>
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
246
|
+
0 bill_length_mm 342 43.92 5.46 32.1 39.23 44.38 48.5 59.6
|
247
|
+
1 bill_depth_mm 342 17.15 1.97 13.1 15.6 17.32 18.7 21.5
|
248
|
+
2 flipper_length_mm 342 200.92 14.06 172.0 190.0 197.0 213.0 231.0
|
249
|
+
3 body_mass_g 342 4201.75 801.95 2700.0 3550.0 4031.5 4750.0 6300.0
|
250
|
+
4 year 344 2008.03 0.82 2007.0 2007.0 2008.0 2009.0 2009.0
|
225
251
|
```
|
226
252
|
|
227
253
|
### `to_rover`
|
@@ -247,21 +273,22 @@ penguins.to_rover
|
|
247
273
|
require 'red_amber'
|
248
274
|
require 'datasets-arrow'
|
249
275
|
|
250
|
-
|
251
|
-
|
276
|
+
dataset = Datasets::Penguins.new
|
277
|
+
# (From 0.2.2) responsible to the object which has `to_arrow` method.
|
278
|
+
RedAmber::DataFrame.new(dataset).tdr
|
252
279
|
|
253
280
|
# =>
|
254
281
|
RedAmber::DataFrame : 344 x 8 Vectors
|
255
282
|
Vectors : 5 numeric, 3 strings
|
256
283
|
# key type level data_preview
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
284
|
+
0 :species string 3 {"Adelie"=>152, "Chinstrap"=>68, "Gentoo"=>124}
|
285
|
+
1 :island string 3 {"Torgersen"=>52, "Biscoe"=>168, "Dream"=>124}
|
286
|
+
2 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils
|
287
|
+
3 :bill_depth_mm double 81 [18.7, 17.4, 18.0, nil, 19.3, ... ], 2 nils
|
288
|
+
4 :flipper_length_mm uint8 56 [181, 186, 195, nil, 193, ... ], 2 nils
|
289
|
+
5 :body_mass_g uint16 95 [3750, 3800, 3250, nil, 3450, ... ], 2 nils
|
290
|
+
6 :sex string 3 {"male"=>168, "female"=>165, nil=>11}
|
291
|
+
7 :year uint16 3 {2007=>110, 2008=>114, 2009=>120}
|
265
292
|
```
|
266
293
|
|
267
294
|
- limit: limit of variables to show. Default value is 10.
|
@@ -293,9 +320,9 @@ penguins.to_rover
|
|
293
320
|
#<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000328fc>
|
294
321
|
b c a
|
295
322
|
<string> <double> <uint8>
|
296
|
-
|
297
|
-
|
298
|
-
|
323
|
+
0 A 1.0 1
|
324
|
+
1 B 2.0 2
|
325
|
+
2 C 3.0 3
|
299
326
|
```
|
300
327
|
|
301
328
|
If `#[]` represents single variable (column), it returns a Vector object.
|
@@ -341,10 +368,10 @@ penguins.to_rover
|
|
341
368
|
#<RedAmber::DataFrame : 4 x 3 Vectors, 0x0000000000033270>
|
342
369
|
a b c
|
343
370
|
<uint8> <string> <double>
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
371
|
+
0 3 C 3.0
|
372
|
+
1 1 A 1.0
|
373
|
+
2 2 B 2.0
|
374
|
+
3 3 C 3.0
|
348
375
|
```
|
349
376
|
|
350
377
|
- Select obs. by a boolean Array or a boolean RedAmber::Vector at same size as self.
|
@@ -372,13 +399,13 @@ penguins.to_rover
|
|
372
399
|
|
373
400
|
### `pick ` - pick up variables by key label -
|
374
401
|
|
375
|
-
Pick up some
|
402
|
+
Pick up some columns (variables) to create a sub DataFrame.
|
376
403
|
|
377
404
|

|
378
405
|
|
379
406
|
- Keys as arguments
|
380
407
|
|
381
|
-
`pick(keys)` accepts keys as arguments in an Array.
|
408
|
+
`pick(keys)` accepts keys as arguments in an Array or a Range.
|
382
409
|
|
383
410
|
```ruby
|
384
411
|
penguins.pick(:species, :bill_length_mm)
|
@@ -387,42 +414,64 @@ penguins.to_rover
|
|
387
414
|
#<RedAmber::DataFrame : 344 x 2 Vectors, 0x0000000000035ebc>
|
388
415
|
species bill_length_mm
|
389
416
|
<string> <double>
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
417
|
+
0 Adelie 39.1
|
418
|
+
1 Adelie 39.5
|
419
|
+
2 Adelie 40.3
|
420
|
+
3 Adelie (nil)
|
421
|
+
4 Adelie 36.7
|
395
422
|
: : :
|
396
|
-
|
397
|
-
|
398
|
-
|
423
|
+
341 Gentoo 50.4
|
424
|
+
342 Gentoo 45.2
|
425
|
+
343 Gentoo 49.9
|
426
|
+
```
|
427
|
+
|
428
|
+
- Indices as arguments
|
429
|
+
|
430
|
+
`pick(indices)` accepts indices as arguments. Indices should be Integers, Floats or Ranges of Integers.
|
431
|
+
|
432
|
+
```ruby
|
433
|
+
penguins.pick(0..2, -1)
|
434
|
+
|
435
|
+
# =>
|
436
|
+
#<RedAmber::DataFrame : 344 x 4 Vectors, 0x0000000000055ce4>
|
437
|
+
species island bill_length_mm year
|
438
|
+
<string> <string> <double> <uint16>
|
439
|
+
0 Adelie Torgersen 39.1 2007
|
440
|
+
1 Adelie Torgersen 39.5 2007
|
441
|
+
2 Adelie Torgersen 40.3 2007
|
442
|
+
3 Adelie Torgersen (nil) 2007
|
443
|
+
4 Adelie Torgersen 36.7 2007
|
444
|
+
: : : : :
|
445
|
+
341 Gentoo Biscoe 50.4 2009
|
446
|
+
342 Gentoo Biscoe 45.2 2009
|
447
|
+
343 Gentoo Biscoe 49.9 2009
|
399
448
|
```
|
400
449
|
|
401
|
-
- Booleans as
|
450
|
+
- Booleans as arguments
|
402
451
|
|
403
|
-
`pick(booleans)` accepts booleans as
|
452
|
+
`pick(booleans)` accepts booleans as arguments in an Array. Booleans must be same length as `n_keys`.
|
404
453
|
|
405
454
|
```ruby
|
406
|
-
penguins.pick(penguins.
|
455
|
+
penguins.pick(penguins.vectors.map(&:string?))
|
407
456
|
|
408
457
|
# =>
|
409
458
|
#<RedAmber::DataFrame : 344 x 3 Vectors, 0x00000000000387ac>
|
410
459
|
species island sex
|
411
460
|
<string> <string> <string>
|
412
|
-
|
461
|
+
0 Adelie Torgersen male
|
462
|
+
1 Adelie Torgersen female
|
413
463
|
2 Adelie Torgersen female
|
414
|
-
3 Adelie Torgersen
|
415
|
-
4 Adelie Torgersen
|
416
|
-
5 Adelie Torgersen female
|
464
|
+
3 Adelie Torgersen (nil)
|
465
|
+
4 Adelie Torgersen female
|
417
466
|
: : : :
|
418
|
-
|
419
|
-
|
420
|
-
|
467
|
+
341 Gentoo Biscoe male
|
468
|
+
342 Gentoo Biscoe female
|
469
|
+
343 Gentoo Biscoe male
|
421
470
|
```
|
422
471
|
|
423
|
-
|
472
|
+
- Keys or booleans by a block
|
424
473
|
|
425
|
-
`pick {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return keys, or a boolean Array with a same length as `n_keys`. Block is called in the context of self.
|
474
|
+
`pick {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return keys, indices or a boolean Array with a same length as `n_keys`. Block is called in the context of self.
|
426
475
|
|
427
476
|
```ruby
|
428
477
|
penguins.pick { keys.map { |key| key.end_with?('mm') } }
|
@@ -431,34 +480,38 @@ penguins.to_rover
|
|
431
480
|
#<RedAmber::DataFrame : 344 x 3 Vectors, 0x000000000003dd4c>
|
432
481
|
bill_length_mm bill_depth_mm flipper_length_mm
|
433
482
|
<double> <double> <uint8>
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
483
|
+
0 39.1 18.7 181
|
484
|
+
1 39.5 17.4 186
|
485
|
+
2 40.3 18.0 195
|
486
|
+
3 (nil) (nil) (nil)
|
487
|
+
4 36.7 19.3 193
|
439
488
|
: : : :
|
440
|
-
|
441
|
-
|
442
|
-
|
489
|
+
341 50.4 15.7 222
|
490
|
+
342 45.2 14.8 212
|
491
|
+
343 49.9 16.1 213
|
443
492
|
```
|
444
493
|
|
445
494
|
### `drop ` - pick and drop -
|
446
495
|
|
447
|
-
Drop some
|
496
|
+
Drop some columns (variables) to create a remainer DataFrame.
|
448
497
|
|
449
498
|

|
450
499
|
|
451
500
|
- Keys as arguments
|
452
501
|
|
453
|
-
`drop(keys)` accepts keys as arguments in an Array.
|
502
|
+
`drop(keys)` accepts keys as arguments in an Array or a Range.
|
503
|
+
|
504
|
+
- Indices as arguments
|
505
|
+
|
506
|
+
`drop(indices)` accepts indices as a arguments. Indices should be Integers, Floats or Ranges of Integers.
|
454
507
|
|
455
|
-
- Booleans as
|
508
|
+
- Booleans as arguments
|
456
509
|
|
457
|
-
`drop(booleans)` accepts booleans as
|
510
|
+
`drop(booleans)` accepts booleans as an argument in an Array. Booleans must be same length as `n_keys`.
|
458
511
|
|
459
512
|
- Keys or booleans by a block
|
460
513
|
|
461
|
-
`drop {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return keys, or a boolean Array with a same length as `n_keys`. Block is called in the context of self.
|
514
|
+
`drop {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return keys, indices or a boolean Array with a same length as `n_keys`. Block is called in the context of self.
|
462
515
|
|
463
516
|
- Notice for nil
|
464
517
|
|
@@ -482,9 +535,9 @@ penguins.to_rover
|
|
482
535
|
#<RedAmber::DataFrame : 3 x 1 Vector, 0x000000000003f4bc>
|
483
536
|
a
|
484
537
|
<uint8>
|
485
|
-
|
486
|
-
|
487
|
-
|
538
|
+
0 1
|
539
|
+
1 2
|
540
|
+
2 3
|
488
541
|
|
489
542
|
df[:a]
|
490
543
|
|
@@ -493,9 +546,20 @@ penguins.to_rover
|
|
493
546
|
[1, 2, 3]
|
494
547
|
```
|
495
548
|
|
549
|
+
A simple key name is usable as a method of the DataFrame if the key name is acceptable as a method name.
|
550
|
+
It returns a Vector same as `[]`.
|
551
|
+
|
552
|
+
```ruby
|
553
|
+
df.a
|
554
|
+
|
555
|
+
# =>
|
556
|
+
#<RedAmber::Vector(:uint8, size=3):0x000000000000f258>
|
557
|
+
[1, 2, 3]
|
558
|
+
```
|
559
|
+
|
496
560
|
### `slice ` - to cut vertically is slice -
|
497
561
|
|
498
|
-
Slice and select
|
562
|
+
Slice and select rows (observations) to create a sub DataFrame.
|
499
563
|
|
500
564
|

|
501
565
|
|
@@ -511,22 +575,22 @@ penguins.to_rover
|
|
511
575
|
|
512
576
|
# =>
|
513
577
|
#<RedAmber::DataFrame : 10 x 8 Vectors, 0x0000000000042be4>
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
578
|
+
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
579
|
+
<string> <string> <double> <double> <uint8> ... <uint16>
|
580
|
+
0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
581
|
+
1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
582
|
+
2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
583
|
+
3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
584
|
+
4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
585
|
+
: : : : : : ... :
|
586
|
+
7 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
587
|
+
8 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
588
|
+
9 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
525
589
|
```
|
526
590
|
|
527
591
|
- Booleans as an argument
|
528
592
|
|
529
|
-
`slice(booleans)` accepts booleans as
|
593
|
+
`slice(booleans)` accepts booleans as an argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `size`.
|
530
594
|
|
531
595
|
```ruby
|
532
596
|
vector = penguins[:bill_length_mm]
|
@@ -536,15 +600,15 @@ penguins.to_rover
|
|
536
600
|
#<RedAmber::DataFrame : 242 x 8 Vectors, 0x0000000000043d3c>
|
537
601
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
538
602
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
603
|
+
0 Adelie Torgersen 40.3 18.0 195 ... 2007
|
604
|
+
1 Adelie Torgersen 42.0 20.2 190 ... 2007
|
605
|
+
2 Adelie Torgersen 41.1 17.6 182 ... 2007
|
606
|
+
3 Adelie Torgersen 42.5 20.7 197 ... 2007
|
607
|
+
4 Adelie Torgersen 46.0 21.5 194 ... 2007
|
544
608
|
: : : : : : ... :
|
545
|
-
|
546
|
-
|
547
|
-
|
609
|
+
239 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
610
|
+
240 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
611
|
+
241 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
548
612
|
```
|
549
613
|
|
550
614
|
- Indices or booleans by a block
|
@@ -564,15 +628,15 @@ penguins.to_rover
|
|
564
628
|
#<RedAmber::DataFrame : 204 x 8 Vectors, 0x0000000000047a40>
|
565
629
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
566
630
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
631
|
+
0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
632
|
+
1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
633
|
+
2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
634
|
+
3 Adelie Torgersen 39.3 20.6 190 ... 2007
|
635
|
+
4 Adelie Torgersen 38.9 17.8 181 ... 2007
|
572
636
|
: : : : : : ... :
|
573
|
-
|
574
|
-
|
575
|
-
|
637
|
+
201 Gentoo Biscoe 47.2 13.7 214 ... 2009
|
638
|
+
202 Gentoo Biscoe 46.8 14.3 215 ... 2009
|
639
|
+
203 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
576
640
|
```
|
577
641
|
|
578
642
|
- Notice: nil option
|
@@ -603,7 +667,7 @@ penguins.to_rover
|
|
603
667
|
|
604
668
|
### `remove`
|
605
669
|
|
606
|
-
Slice and reject
|
670
|
+
Slice and reject rows (observations) to create a remainer DataFrame.
|
607
671
|
|
608
672
|

|
609
673
|
|
@@ -619,20 +683,20 @@ penguins.to_rover
|
|
619
683
|
#<RedAmber::DataFrame : 334 x 8 Vectors, 0x00000000000487c4>
|
620
684
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
621
685
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
686
|
+
0 Adelie Torgersen 39.3 20.6 190 ... 2007
|
687
|
+
1 Adelie Torgersen 38.9 17.8 181 ... 2007
|
688
|
+
2 Adelie Torgersen 39.2 19.6 195 ... 2007
|
689
|
+
3 Adelie Torgersen 34.1 18.1 193 ... 2007
|
690
|
+
4 Adelie Torgersen 42.0 20.2 190 ... 2007
|
627
691
|
: : : : : : ... :
|
628
|
-
|
629
|
-
|
630
|
-
|
692
|
+
331 Gentoo Biscoe 44.5 15.7 217 ... 2009
|
693
|
+
332 Gentoo Biscoe 48.8 16.2 222 ... 2009
|
694
|
+
333 Gentoo Biscoe 47.2 13.7 214 ... 2009
|
631
695
|
```
|
632
696
|
|
633
697
|
- Booleans as an argument
|
634
698
|
|
635
|
-
`remove(booleans)` accepts booleans as
|
699
|
+
`remove(booleans)` accepts booleans as an argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `size`.
|
636
700
|
|
637
701
|
```ruby
|
638
702
|
# remove all observation contains nil
|
@@ -643,15 +707,15 @@ penguins.to_rover
|
|
643
707
|
#<RedAmber::DataFrame : 333 x 8 Vectors, 0x0000000000049fac>
|
644
708
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
645
709
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
710
|
+
0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
711
|
+
1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
712
|
+
2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
713
|
+
3 Adelie Torgersen 36.7 19.3 193 ... 2007
|
714
|
+
4 Adelie Torgersen 39.3 20.6 190 ... 2007
|
651
715
|
: : : : : : ... :
|
652
|
-
|
653
|
-
|
654
|
-
|
716
|
+
330 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
717
|
+
331 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
718
|
+
332 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
655
719
|
```
|
656
720
|
|
657
721
|
- Indices or booleans by a block
|
@@ -660,26 +724,29 @@ penguins.to_rover
|
|
660
724
|
|
661
725
|
```ruby
|
662
726
|
penguins.remove do
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
727
|
+
# We will use another style shown in slice
|
728
|
+
# self.bill_length_mm returns Vector
|
729
|
+
mean = bill_length_mm.mean
|
730
|
+
min = mean - bill_length_mm.std
|
731
|
+
max = mean + bill_length_mm.std
|
732
|
+
bill_length_mm.to_a.map { |e| (min..max).include? e }
|
667
733
|
end
|
668
734
|
|
669
735
|
# =>
|
670
736
|
#<RedAmber::DataFrame : 140 x 8 Vectors, 0x000000000004de40>
|
671
737
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
672
738
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
739
|
+
0 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
740
|
+
1 Adelie Torgersen 36.7 19.3 193 ... 2007
|
741
|
+
2 Adelie Torgersen 34.1 18.1 193 ... 2007
|
742
|
+
3 Adelie Torgersen 37.8 17.1 186 ... 2007
|
743
|
+
4 Adelie Torgersen 37.8 17.3 180 ... 2007
|
678
744
|
: : : : : : ... :
|
679
|
-
|
680
|
-
|
681
|
-
|
745
|
+
137 Gentoo Biscoe (nil) (nil) (nil) ... 2009
|
746
|
+
138 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
747
|
+
139 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
682
748
|
```
|
749
|
+
|
683
750
|
- Notice for nil
|
684
751
|
- When `remove` used with booleans, nil in booleans is treated as false. This behavior is aligned with Ruby's `nil#!`.
|
685
752
|
|
@@ -712,8 +779,8 @@ penguins.to_rover
|
|
712
779
|
#<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000005df98>
|
713
780
|
a b c
|
714
781
|
<uint8> <string> <double>
|
715
|
-
|
716
|
-
|
782
|
+
0 1 A 1.0
|
783
|
+
1 (nil) C 3.0
|
717
784
|
```
|
718
785
|
|
719
786
|
### `rename`
|
@@ -734,9 +801,9 @@ penguins.to_rover
|
|
734
801
|
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000060838>
|
735
802
|
name age_in_1993
|
736
803
|
<string> <uint8>
|
737
|
-
|
738
|
-
|
739
|
-
|
804
|
+
0 Yasuko 68
|
805
|
+
1 Rui 49
|
806
|
+
2 Hinata 28
|
740
807
|
```
|
741
808
|
|
742
809
|
- Key pairs by a block
|
@@ -772,23 +839,27 @@ penguins.to_rover
|
|
772
839
|
|
773
840
|
# =>
|
774
841
|
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000062804>
|
775
|
-
name age
|
776
|
-
<string> <uint8>
|
777
|
-
|
778
|
-
|
779
|
-
|
842
|
+
name age
|
843
|
+
<string> <uint8>
|
844
|
+
0 Yasuko 68
|
845
|
+
1 Rui 49
|
846
|
+
2 Hinata 28
|
780
847
|
|
781
848
|
# update :age and add :brother
|
782
|
-
|
783
|
-
|
849
|
+
df.assign do
|
850
|
+
{
|
851
|
+
age: age + 29,
|
852
|
+
brother: ['Santa', nil, 'Momotaro']
|
853
|
+
}
|
854
|
+
end
|
784
855
|
|
785
856
|
# =>
|
786
857
|
#<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000658b0>
|
787
858
|
name age brother
|
788
859
|
<string> <uint8> <string>
|
789
|
-
|
790
|
-
|
791
|
-
|
860
|
+
0 Yasuko 97 Santa
|
861
|
+
1 Rui 78 (nil)
|
862
|
+
2 Hinata 57 Momotaro
|
792
863
|
```
|
793
864
|
|
794
865
|
- Key pairs by a block
|
@@ -799,18 +870,19 @@ penguins.to_rover
|
|
799
870
|
df = RedAmber::DataFrame.new(
|
800
871
|
index: [0, 1, 2, 3, nil],
|
801
872
|
float: [0.0, 1.1, 2.2, Float::NAN, nil],
|
802
|
-
string: ['A', 'B', 'C', 'D', nil]
|
873
|
+
string: ['A', 'B', 'C', 'D', nil]
|
874
|
+
)
|
803
875
|
df
|
804
876
|
|
805
877
|
# =>
|
806
878
|
#<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000069e60>
|
807
879
|
index float string
|
808
880
|
<uint8> <double> <string>
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
881
|
+
0 0 0.0 A
|
882
|
+
1 1 1.1 B
|
883
|
+
2 2 2.2 C
|
884
|
+
3 3 NaN D
|
885
|
+
4 (nil) (nil) (nil)
|
814
886
|
|
815
887
|
# update :float
|
816
888
|
# assigner by an Array
|
@@ -821,13 +893,13 @@ penguins.to_rover
|
|
821
893
|
|
822
894
|
# =>
|
823
895
|
#<RedAmber::DataFrame : 5 x 3 Vectors, 0x00000000000dfffc>
|
824
|
-
index float string
|
825
|
-
<uint8> <double> <string>
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
896
|
+
index float string
|
897
|
+
<uint8> <double> <string>
|
898
|
+
0 0 -0.0 A
|
899
|
+
1 1 -1.1 B
|
900
|
+
2 2 -2.2 C
|
901
|
+
3 3 NaN D
|
902
|
+
4 (nil) (nil) (nil)
|
831
903
|
|
832
904
|
# Or we can use assigner by a Hash
|
833
905
|
df.assign do
|
@@ -852,17 +924,85 @@ penguins.to_rover
|
|
852
924
|
`assign_left` method accepts the same parameters and block as `assign`, but append new columns from leftside.
|
853
925
|
|
854
926
|
```ruby
|
855
|
-
df.assign_left(new_index:
|
927
|
+
df.assign_left(new_index: df.indices(1))
|
856
928
|
|
857
929
|
# =>
|
858
930
|
#<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000001787c>
|
859
931
|
new_index index float string
|
860
932
|
<uint8> <uint8> <double> <string>
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
933
|
+
0 1 0 0.0 A
|
934
|
+
1 2 1 1.1 B
|
935
|
+
2 3 2 2.2 C
|
936
|
+
3 4 3 NaN D
|
937
|
+
4 5 (nil) (nil) (nil)
|
938
|
+
```
|
939
|
+
|
940
|
+
### `slice_by(key, keep_key: false) { block }`
|
941
|
+
|
942
|
+
`slice_by` accepts a key and a block to select rows.
|
943
|
+
|
944
|
+
(Since 0.2.1)
|
945
|
+
|
946
|
+
```ruby
|
947
|
+
df = RedAmber::DataFrame.new(
|
948
|
+
index: [0, 1, 2, 3, nil],
|
949
|
+
float: [0.0, 1.1, 2.2, Float::NAN, nil],
|
950
|
+
string: ['A', 'B', 'C', 'D', nil]
|
951
|
+
)
|
952
|
+
df
|
953
|
+
|
954
|
+
# =>
|
955
|
+
#<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000069e60>
|
956
|
+
index float string
|
957
|
+
<uint8> <double> <string>
|
958
|
+
0 0 0.0 A
|
959
|
+
1 1 1.1 B
|
960
|
+
2 2 2.2 C
|
961
|
+
3 3 NaN D
|
962
|
+
4 (nil) (nil) (nil)
|
963
|
+
|
964
|
+
df.slice_by(:string) { ["A", "C"] }
|
965
|
+
|
966
|
+
# =>
|
967
|
+
#<RedAmber::DataFrame : 2 x 2 Vectors, 0x000000000001b1ac>
|
968
|
+
index float
|
969
|
+
<uint8> <double>
|
970
|
+
0 0 0.0
|
971
|
+
1 2 2.2
|
972
|
+
```
|
973
|
+
|
974
|
+
It is the same behavior as;
|
975
|
+
|
976
|
+
```ruby
|
977
|
+
df.slice { [string.index("A"), string.index("C")] }.drop(:string)
|
978
|
+
```
|
979
|
+
|
980
|
+
`slice_by` also accepts a Range.
|
981
|
+
|
982
|
+
```ruby
|
983
|
+
df.slice_by(:string) { "A".."C" }
|
984
|
+
|
985
|
+
# =>
|
986
|
+
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000069668>
|
987
|
+
index float
|
988
|
+
<uint8> <double>
|
989
|
+
0 0 0.0
|
990
|
+
1 1 1.1
|
991
|
+
2 2 2.2
|
992
|
+
```
|
993
|
+
|
994
|
+
When the option `keep_key: true` used, the column `key` will be preserved.
|
995
|
+
|
996
|
+
```ruby
|
997
|
+
df.slice_by(:string, keep_key: true) { "A".."C" }
|
998
|
+
|
999
|
+
# =>
|
1000
|
+
#<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000073c44>
|
1001
|
+
index float string
|
1002
|
+
<uint8> <double> <string>
|
1003
|
+
0 0 0.0 A
|
1004
|
+
1 1 1.1 B
|
1005
|
+
2 2 2.2 C
|
866
1006
|
```
|
867
1007
|
|
868
1008
|
## Updating
|
@@ -874,22 +1014,22 @@ penguins.to_rover
|
|
874
1014
|
- "-key" denotes descending order
|
875
1015
|
|
876
1016
|
```ruby
|
877
|
-
df = RedAmber::DataFrame.new(
|
1017
|
+
df = RedAmber::DataFrame.new(
|
878
1018
|
index: [1, 1, 0, nil, 0],
|
879
1019
|
string: ['C', 'B', nil, 'A', 'B'],
|
880
1020
|
bool: [nil, true, false, true, false],
|
881
|
-
|
1021
|
+
)
|
882
1022
|
df.sort(:index, '-bool')
|
883
1023
|
|
884
1024
|
# =>
|
885
1025
|
#<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000009b03c>
|
886
1026
|
index string bool
|
887
1027
|
<uint8> <string> <boolean>
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
1028
|
+
0 0 (nil) false
|
1029
|
+
1 0 B false
|
1030
|
+
2 1 B true
|
1031
|
+
3 1 C (nil)
|
1032
|
+
4 (nil) A true
|
893
1033
|
```
|
894
1034
|
|
895
1035
|
- [ ] Clamp
|
@@ -906,7 +1046,7 @@ penguins.to_rover
|
|
906
1046
|
|
907
1047
|
### `group(group_keys)`
|
908
1048
|
|
909
|
-
`group` creates a class `Group
|
1049
|
+
`group` creates a instance of class `Group`. `Group` accepts functions below as a method.
|
910
1050
|
Method accepts options as `group_keys`.
|
911
1051
|
|
912
1052
|
Available functions are:
|
@@ -933,23 +1073,22 @@ penguins.to_rover
|
|
933
1073
|
This is an example of grouping of famous STARWARS dataset.
|
934
1074
|
|
935
1075
|
```ruby
|
936
|
-
|
937
|
-
|
938
|
-
starwars
|
1076
|
+
uri = URI("https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv")
|
1077
|
+
starwars = RedAmber::DataFrame.load(uri)
|
939
1078
|
|
940
1079
|
# =>
|
941
1080
|
#<RedAmber::DataFrame : 87 x 12 Vectors, 0x0000000000005a50>
|
942
1081
|
unnamed1 name height mass hair_color skin_color eye_color ... species
|
943
1082
|
<int64> <string> <int64> <double> <string> <string> <string> ... <string>
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
1083
|
+
0 1 Luke Skywalker 172 77.0 blond fair blue ... Human
|
1084
|
+
1 2 C-3PO 167 75.0 NA gold yellow ... Droid
|
1085
|
+
2 3 R2-D2 96 32.0 NA white, blue red ... Droid
|
1086
|
+
3 4 Darth Vader 202 136.0 none white yellow ... Human
|
1087
|
+
4 5 Leia Organa 150 49.0 brown light brown ... Human
|
949
1088
|
: : : : : : : : ... :
|
950
|
-
|
951
|
-
|
952
|
-
|
1089
|
+
84 85 BB8 (nil) (nil) none none black ... Droid
|
1090
|
+
85 86 Captain Phasma (nil) (nil) unknown unknown unknown ... NA
|
1091
|
+
86 87 Padmé Amidala 165 45.0 brown light brown ... Human
|
953
1092
|
|
954
1093
|
starwars.tdr(12)
|
955
1094
|
|
@@ -957,58 +1096,60 @@ penguins.to_rover
|
|
957
1096
|
RedAmber::DataFrame : 87 x 12 Vectors
|
958
1097
|
Vectors : 4 numeric, 8 strings
|
959
1098
|
# key type level data_preview
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
1099
|
+
0 :unnamed1 int64 87 [1, 2, 3, 4, 5, ... ]
|
1100
|
+
1 :name string 87 ["Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Organa", ... ]
|
1101
|
+
2 :height int64 46 [172, 167, 96, 202, 150, ... ], 6 nils
|
1102
|
+
3 :mass double 39 [77.0, 75.0, 32.0, 136.0, 49.0, ... ], 28 nils
|
1103
|
+
4 :hair_color string 13 ["blond", "NA", "NA", "none", "brown", ... ]
|
1104
|
+
5 :skin_color string 31 ["fair", "gold", "white, blue", "white", "light", ... ]
|
1105
|
+
6 :eye_color string 15 ["blue", "yellow", "red", "yellow", "brown", ... ]
|
1106
|
+
7 :birth_year double 37 [19.0, 112.0, 33.0, 41.9, 19.0, ... ], 44 nils
|
1107
|
+
8 :sex string 5 {"male"=>60, "none"=>6, "female"=>16, "hermaphroditic"=>1, "NA"=>4}
|
1108
|
+
9 :gender string 3 {"masculine"=>66, "feminine"=>17, "NA"=>4}
|
1109
|
+
10 :homeworld string 49 ["Tatooine", "Tatooine", "Naboo", "Tatooine", "Alderaan", ... ]
|
1110
|
+
11 :species string 38 ["Human", "Droid", "Droid", "Human", "Human", ... ]
|
972
1111
|
```
|
973
1112
|
|
974
1113
|
We can group by `:species` and calculate the count.
|
975
1114
|
|
976
1115
|
```ruby
|
977
|
-
starwars.
|
1116
|
+
starwars.remove { species == "NA" }
|
1117
|
+
.group(:species).count(:species)
|
978
1118
|
|
979
1119
|
# =>
|
980
|
-
#<RedAmber::DataFrame :
|
1120
|
+
#<RedAmber::DataFrame : 37 x 2 Vectors, 0x000000000000ffa0>
|
981
1121
|
species count
|
982
1122
|
<string> <int64>
|
983
|
-
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
1123
|
+
0 Human 35
|
1124
|
+
1 Droid 6
|
1125
|
+
2 Wookiee 2
|
1126
|
+
3 Rodian 1
|
1127
|
+
4 Hutt 1
|
988
1128
|
: : :
|
989
|
-
|
990
|
-
|
991
|
-
|
1129
|
+
34 Kaleesh 1
|
1130
|
+
35 Pau'an 1
|
1131
|
+
36 Kel Dor 1
|
992
1132
|
```
|
993
1133
|
|
994
1134
|
We can also calculate the mean of `:mass` and `:height` together.
|
995
1135
|
|
996
1136
|
```ruby
|
997
|
-
grouped = starwars.
|
1137
|
+
grouped = starwars.remove { species == "NA" }
|
1138
|
+
.group(:species) { [count(:species), mean(:height, :mass)] }
|
998
1139
|
|
999
1140
|
# =>
|
1000
|
-
#<RedAmber::DataFrame :
|
1001
|
-
|
1002
|
-
<
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1008
|
-
: :
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1141
|
+
#<RedAmber::DataFrame : 37 x 4 Vectors, 0x000000000000fff0>
|
1142
|
+
species count mean(height) mean(mass)
|
1143
|
+
<string> <int64> <double> <double>
|
1144
|
+
0 Human 35 176.65 82.78
|
1145
|
+
1 Droid 6 131.2 69.75
|
1146
|
+
2 Wookiee 2 231.0 124.0
|
1147
|
+
3 Rodian 1 173.0 74.0
|
1148
|
+
4 Hutt 1 175.0 1358.0
|
1149
|
+
: : : : :
|
1150
|
+
34 Kaleesh 1 216.0 159.0
|
1151
|
+
35 Pau'an 1 206.0 80.0
|
1152
|
+
36 Kel Dor 1 188.0 80.0
|
1012
1153
|
```
|
1013
1154
|
|
1014
1155
|
Select rows for count > 1.
|
@@ -1017,25 +1158,26 @@ penguins.to_rover
|
|
1017
1158
|
grouped.slice(grouped[:count] > 1)
|
1018
1159
|
|
1019
1160
|
# =>
|
1020
|
-
#<RedAmber::DataFrame :
|
1161
|
+
#<RedAmber::DataFrame : 8 x 4 Vectors, 0x000000000001002c>
|
1021
1162
|
species count mean(height) mean(mass)
|
1022
1163
|
<string> <int64> <double> <double>
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
9 Kaminoan 2 221.0 88.0
|
1164
|
+
0 Human 35 176.65 82.78
|
1165
|
+
1 Droid 6 131.2 69.75
|
1166
|
+
2 Wookiee 2 231.0 124.0
|
1167
|
+
3 Gungan 3 208.67 74.0
|
1168
|
+
4 Zabrak 2 173.0 80.0
|
1169
|
+
5 Twi'lek 2 179.0 55.0
|
1170
|
+
6 Mirialan 2 168.0 53.1
|
1171
|
+
7 Kaminoan 2 221.0 88.0
|
1032
1172
|
```
|
1033
1173
|
|
1034
1174
|
## Reshape
|
1035
1175
|
|
1176
|
+

|
1177
|
+
|
1036
1178
|
### `transpose`
|
1037
1179
|
|
1038
|
-
Creates transposed DataFrame for wide
|
1180
|
+
Creates transposed DataFrame for the wide (messy) dataframe.
|
1039
1181
|
|
1040
1182
|
```ruby
|
1041
1183
|
import_cars = RedAmber::DataFrame.load('test/entity/import_cars.tsv')
|
@@ -1044,31 +1186,31 @@ penguins.to_rover
|
|
1044
1186
|
#<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
|
1045
1187
|
Year Audi BMW BMW_MINI Mercedes-Benz VW
|
1046
1188
|
<int64> <int64> <int64> <int64> <int64> <int64>
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1189
|
+
0 2017 28336 52527 25427 68221 49040
|
1190
|
+
1 2018 26473 50982 25984 67554 51961
|
1191
|
+
2 2019 24222 46814 23813 66553 46794
|
1192
|
+
3 2020 22304 35712 20196 57041 36576
|
1193
|
+
4 2021 22535 35905 18211 51722 35215
|
1052
1194
|
|
1053
|
-
import_cars.transpose
|
1195
|
+
import_cars.transpose(name: :Manufacturer)
|
1054
1196
|
|
1055
1197
|
# =>
|
1056
|
-
#<RedAmber::DataFrame : 5 x 6 Vectors,
|
1057
|
-
|
1058
|
-
<
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1198
|
+
#<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
|
1199
|
+
Manufacturer 2017 2018 2019 2020 2021
|
1200
|
+
<string> <uint32> <uint32> <uint32> <uint16> <uint16>
|
1201
|
+
0 Audi 28336 26473 24222 22304 22535
|
1202
|
+
1 BMW 52527 50982 46814 35712 35905
|
1203
|
+
2 BMW_MINI 25427 25984 23813 20196 18211
|
1204
|
+
3 Mercedes-Benz 68221 67554 66553 57041 51722
|
1205
|
+
4 VW 49040 51961 46794 36576 35215
|
1064
1206
|
```
|
1065
1207
|
|
1066
1208
|
The leftmost column is created by original keys. Key name of the column is
|
1067
|
-
named by
|
1209
|
+
named by parameter `:name`. If `:name` is not specified, `:NAME` is used for the key.
|
1068
1210
|
|
1069
1211
|
### `to_long(*keep_keys)`
|
1070
1212
|
|
1071
|
-
Creates a 'long' DataFrame.
|
1213
|
+
Creates a 'long' (tidy) DataFrame from a 'wide' DataFrame.
|
1072
1214
|
|
1073
1215
|
- Parameter `keep_keys` specifies the key names to keep.
|
1074
1216
|
|
@@ -1076,62 +1218,66 @@ penguins.to_rover
|
|
1076
1218
|
import_cars.to_long(:Year)
|
1077
1219
|
|
1078
1220
|
# =>
|
1079
|
-
#<RedAmber::DataFrame : 25 x 3 Vectors,
|
1080
|
-
Year
|
1081
|
-
<uint16> <
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1221
|
+
#<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
|
1222
|
+
Year NAME VALUE
|
1223
|
+
<uint16> <string> <uint32>
|
1224
|
+
0 2017 Audi 28336
|
1225
|
+
1 2017 BMW 52527
|
1226
|
+
2 2017 BMW_MINI 25427
|
1227
|
+
3 2017 Mercedes-Benz 68221
|
1228
|
+
4 2017 VW 49040
|
1087
1229
|
: : : :
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1230
|
+
22 2021 BMW_MINI 18211
|
1231
|
+
23 2021 Mercedes-Benz 51722
|
1232
|
+
24 2021 VW 35215
|
1091
1233
|
```
|
1092
1234
|
|
1093
|
-
- Option `:name`
|
1094
|
-
|
1235
|
+
- Option `:name` is the key of the column which came **from key names**.
|
1236
|
+
The default value is `:NAME` if it is not specified.
|
1237
|
+
- Option `:value` is the key of the column which came **from values**.
|
1238
|
+
The default value is `:VALUE` if it is not specified.
|
1095
1239
|
|
1096
1240
|
```ruby
|
1097
1241
|
import_cars.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
|
1098
1242
|
|
1099
1243
|
# =>
|
1100
|
-
#<RedAmber::DataFrame : 25 x 3 Vectors,
|
1244
|
+
#<RedAmber::DataFrame : 25 x 3 Vectors, 0x000000000001359c>
|
1101
1245
|
Year Manufacturer Num_of_imported
|
1102
|
-
<uint16> <
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1246
|
+
<uint16> <string> <uint32>
|
1247
|
+
0 2017 Audi 28336
|
1248
|
+
1 2017 BMW 52527
|
1249
|
+
2 2017 BMW_MINI 25427
|
1250
|
+
3 2017 Mercedes-Benz 68221
|
1251
|
+
4 2017 VW 49040
|
1108
1252
|
: : : :
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1253
|
+
22 2021 BMW_MINI 18211
|
1254
|
+
23 2021 Mercedes-Benz 51722
|
1255
|
+
24 2021 VW 35215
|
1112
1256
|
```
|
1113
1257
|
|
1114
1258
|
### `to_wide`
|
1115
1259
|
|
1116
|
-
Creates a 'wide' DataFrame.
|
1260
|
+
Creates a 'wide' (messy) DataFrame from a 'long' DataFrame.
|
1117
1261
|
|
1118
|
-
- Option `:name`
|
1119
|
-
|
1262
|
+
- Option `:name` is the key of the column which will be expanded **to key names**.
|
1263
|
+
The default value is `:NAME` if it is not specified.
|
1264
|
+
- Option `:value` is the key of the column which will be expanded **to values**.
|
1265
|
+
The default value is `:VALUE` if it is not specified.
|
1120
1266
|
|
1121
1267
|
```ruby
|
1122
1268
|
import_cars.to_long(:Year).to_wide
|
1123
|
-
# import_cars.to_long(:Year).to_wide(name: :
|
1269
|
+
# import_cars.to_long(:Year).to_wide(name: :N, value: :V)
|
1124
1270
|
# is also OK
|
1125
1271
|
|
1126
1272
|
# =>
|
1127
1273
|
#<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000f0f0>
|
1128
1274
|
Year Audi BMW BMW_MINI Mercedes-Benz VW
|
1129
1275
|
<uint16> <uint16> <uint16> <uint16> <uint32> <uint16>
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1276
|
+
0 2017 28336 52527 25427 68221 49040
|
1277
|
+
1 2018 26473 50982 25984 67554 51961
|
1278
|
+
2 2019 24222 46814 23813 66553 46794
|
1279
|
+
3 2020 22304 35712 20196 57041 36576
|
1280
|
+
4 2021 22535 35905 18211 51722 35215
|
1135
1281
|
```
|
1136
1282
|
|
1137
1283
|
## Combine
|