red_amber 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +69 -2
- data/README.md +83 -280
- data/doc/DataFrame.md +279 -265
- data/doc/Vector.md +28 -36
- data/doc/image/basic_verbs.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/assign_operation.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/pick_operation.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/rename_operation.png +0 -0
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe/slice_operation.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/group_operation.png +0 -0
- data/doc/image/replace-if_then.png +0 -0
- data/doc/image/reshaping_dataframe.png +0 -0
- data/doc/image/screenshot.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red_amber/data_frame.rb +10 -37
- data/lib/red_amber/data_frame_displayable.rb +56 -3
- data/lib/red_amber/data_frame_loadsave.rb +36 -0
- data/lib/red_amber/data_frame_reshaping.rb +8 -6
- data/lib/red_amber/data_frame_variable_operation.rb +25 -19
- data/lib/red_amber/group.rb +5 -3
- data/lib/red_amber/helper.rb +20 -18
- data/lib/red_amber/vector.rb +49 -30
- data/lib/red_amber/vector_selectable.rb +9 -1
- data/lib/red_amber/vector_updatable.rb +6 -3
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -0
- metadata +13 -3
- data/doc/examples_of_red_amber.ipynb +0 -8979
data/doc/DataFrame.md
CHANGED
@@ -14,30 +14,38 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
14
14
|
### `new` from a Hash
|
15
15
|
|
16
16
|
```ruby
|
17
|
-
RedAmber::DataFrame.new(x: [1, 2, 3])
|
17
|
+
df = RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C])
|
18
18
|
```
|
19
19
|
|
20
20
|
### `new` from a schema (by Hash) and data (by Array)
|
21
21
|
|
22
22
|
```ruby
|
23
|
-
RedAmber::DataFrame.new({:
|
23
|
+
RedAmber::DataFrame.new({x: :uint8, y: :string}, [[1, "A"], [2, "B"], [3, "C"]])
|
24
24
|
```
|
25
25
|
|
26
26
|
### `new` from an Arrow::Table
|
27
27
|
|
28
28
|
|
29
29
|
```ruby
|
30
|
-
table = Arrow::Table.new(x: [1, 2, 3])
|
30
|
+
table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
|
31
31
|
RedAmber::DataFrame.new(table)
|
32
32
|
```
|
33
33
|
|
34
|
+
### `new` from an Object which responds to `to_arrow`
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
require "datasets-arrow"
|
38
|
+
dataset = Datasets::Penguins.new
|
39
|
+
RedAmber::DataFrame.new(dataset)
|
40
|
+
```
|
41
|
+
|
34
42
|
### `new` from a Rover::DataFrame
|
35
43
|
|
36
44
|
|
37
45
|
```ruby
|
38
46
|
require 'rover'
|
39
47
|
|
40
|
-
rover = Rover::DataFrame.new(x: [1, 2, 3])
|
48
|
+
rover = Rover::DataFrame.new(x: [1, 2, 3], y: %w[A B C])
|
41
49
|
RedAmber::DataFrame.new(rover)
|
42
50
|
```
|
43
51
|
|
@@ -63,7 +71,7 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
63
71
|
```ruby
|
64
72
|
require 'parquet'
|
65
73
|
|
66
|
-
|
74
|
+
df = RedAmber::DataFrame.load("file.parquet")
|
67
75
|
```
|
68
76
|
|
69
77
|
### `save` (instance method)
|
@@ -79,7 +87,7 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
79
87
|
```ruby
|
80
88
|
require 'parquet'
|
81
89
|
|
82
|
-
|
90
|
+
df.save("file.parquet")
|
83
91
|
```
|
84
92
|
|
85
93
|
## Properties
|
@@ -210,15 +218,15 @@ puts penguins.to_s
|
|
210
218
|
# =>
|
211
219
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
212
220
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
221
|
+
0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
222
|
+
1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
223
|
+
2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
224
|
+
3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
225
|
+
4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
218
226
|
: : : : : : ... :
|
219
|
-
|
220
|
-
|
221
|
-
|
227
|
+
341 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
228
|
+
342 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
229
|
+
343 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
222
230
|
```
|
223
231
|
### `inspect`
|
224
232
|
|
@@ -235,11 +243,11 @@ puts penguins.summary.to_s(width: 82) # needs more width to show all stats in th
|
|
235
243
|
# =>
|
236
244
|
variables count mean std min 25% median 75% max
|
237
245
|
<dictionary> <uint16> <double> <double> <double> <double> <double> <double> <double>
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
246
|
+
0 bill_length_mm 342 43.92 5.46 32.1 39.23 44.38 48.5 59.6
|
247
|
+
1 bill_depth_mm 342 17.15 1.97 13.1 15.6 17.32 18.7 21.5
|
248
|
+
2 flipper_length_mm 342 200.92 14.06 172.0 190.0 197.0 213.0 231.0
|
249
|
+
3 body_mass_g 342 4201.75 801.95 2700.0 3550.0 4031.5 4750.0 6300.0
|
250
|
+
4 year 344 2008.03 0.82 2007.0 2007.0 2008.0 2009.0 2009.0
|
243
251
|
```
|
244
252
|
|
245
253
|
### `to_rover`
|
@@ -265,21 +273,22 @@ penguins.to_rover
|
|
265
273
|
require 'red_amber'
|
266
274
|
require 'datasets-arrow'
|
267
275
|
|
268
|
-
|
269
|
-
|
276
|
+
dataset = Datasets::Penguins.new
|
277
|
+
# (From 0.2.2) responsible to the object which has `to_arrow` method.
|
278
|
+
RedAmber::DataFrame.new(dataset).tdr
|
270
279
|
|
271
280
|
# =>
|
272
281
|
RedAmber::DataFrame : 344 x 8 Vectors
|
273
282
|
Vectors : 5 numeric, 3 strings
|
274
283
|
# key type level data_preview
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
284
|
+
0 :species string 3 {"Adelie"=>152, "Chinstrap"=>68, "Gentoo"=>124}
|
285
|
+
1 :island string 3 {"Torgersen"=>52, "Biscoe"=>168, "Dream"=>124}
|
286
|
+
2 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils
|
287
|
+
3 :bill_depth_mm double 81 [18.7, 17.4, 18.0, nil, 19.3, ... ], 2 nils
|
288
|
+
4 :flipper_length_mm uint8 56 [181, 186, 195, nil, 193, ... ], 2 nils
|
289
|
+
5 :body_mass_g uint16 95 [3750, 3800, 3250, nil, 3450, ... ], 2 nils
|
290
|
+
6 :sex string 3 {"male"=>168, "female"=>165, nil=>11}
|
291
|
+
7 :year uint16 3 {2007=>110, 2008=>114, 2009=>120}
|
283
292
|
```
|
284
293
|
|
285
294
|
- limit: limit of variables to show. Default value is 10.
|
@@ -311,9 +320,9 @@ penguins.to_rover
|
|
311
320
|
#<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000328fc>
|
312
321
|
b c a
|
313
322
|
<string> <double> <uint8>
|
314
|
-
|
315
|
-
|
316
|
-
|
323
|
+
0 A 1.0 1
|
324
|
+
1 B 2.0 2
|
325
|
+
2 C 3.0 3
|
317
326
|
```
|
318
327
|
|
319
328
|
If `#[]` represents single variable (column), it returns a Vector object.
|
@@ -359,10 +368,10 @@ penguins.to_rover
|
|
359
368
|
#<RedAmber::DataFrame : 4 x 3 Vectors, 0x0000000000033270>
|
360
369
|
a b c
|
361
370
|
<uint8> <string> <double>
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
371
|
+
0 3 C 3.0
|
372
|
+
1 1 A 1.0
|
373
|
+
2 2 B 2.0
|
374
|
+
3 3 C 3.0
|
366
375
|
```
|
367
376
|
|
368
377
|
- Select obs. by a boolean Array or a boolean RedAmber::Vector at same size as self.
|
@@ -405,15 +414,15 @@ penguins.to_rover
|
|
405
414
|
#<RedAmber::DataFrame : 344 x 2 Vectors, 0x0000000000035ebc>
|
406
415
|
species bill_length_mm
|
407
416
|
<string> <double>
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
417
|
+
0 Adelie 39.1
|
418
|
+
1 Adelie 39.5
|
419
|
+
2 Adelie 40.3
|
420
|
+
3 Adelie (nil)
|
421
|
+
4 Adelie 36.7
|
413
422
|
: : :
|
414
|
-
|
415
|
-
|
416
|
-
|
423
|
+
341 Gentoo 50.4
|
424
|
+
342 Gentoo 45.2
|
425
|
+
343 Gentoo 49.9
|
417
426
|
```
|
418
427
|
|
419
428
|
- Indices as arguments
|
@@ -427,15 +436,15 @@ penguins.to_rover
|
|
427
436
|
#<RedAmber::DataFrame : 344 x 4 Vectors, 0x0000000000055ce4>
|
428
437
|
species island bill_length_mm year
|
429
438
|
<string> <string> <double> <uint16>
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
439
|
+
0 Adelie Torgersen 39.1 2007
|
440
|
+
1 Adelie Torgersen 39.5 2007
|
441
|
+
2 Adelie Torgersen 40.3 2007
|
442
|
+
3 Adelie Torgersen (nil) 2007
|
443
|
+
4 Adelie Torgersen 36.7 2007
|
435
444
|
: : : : :
|
436
|
-
|
437
|
-
|
438
|
-
|
445
|
+
341 Gentoo Biscoe 50.4 2009
|
446
|
+
342 Gentoo Biscoe 45.2 2009
|
447
|
+
343 Gentoo Biscoe 49.9 2009
|
439
448
|
```
|
440
449
|
|
441
450
|
- Booleans as arguments
|
@@ -443,21 +452,21 @@ penguins.to_rover
|
|
443
452
|
`pick(booleans)` accepts booleans as arguments in an Array. Booleans must be same length as `n_keys`.
|
444
453
|
|
445
454
|
```ruby
|
446
|
-
penguins.pick(penguins.
|
455
|
+
penguins.pick(penguins.vectors.map(&:string?))
|
447
456
|
|
448
457
|
# =>
|
449
458
|
#<RedAmber::DataFrame : 344 x 3 Vectors, 0x00000000000387ac>
|
450
459
|
species island sex
|
451
460
|
<string> <string> <string>
|
452
|
-
|
461
|
+
0 Adelie Torgersen male
|
462
|
+
1 Adelie Torgersen female
|
453
463
|
2 Adelie Torgersen female
|
454
|
-
3 Adelie Torgersen
|
455
|
-
4 Adelie Torgersen
|
456
|
-
5 Adelie Torgersen female
|
464
|
+
3 Adelie Torgersen (nil)
|
465
|
+
4 Adelie Torgersen female
|
457
466
|
: : : :
|
458
|
-
|
459
|
-
|
460
|
-
|
467
|
+
341 Gentoo Biscoe male
|
468
|
+
342 Gentoo Biscoe female
|
469
|
+
343 Gentoo Biscoe male
|
461
470
|
```
|
462
471
|
|
463
472
|
- Keys or booleans by a block
|
@@ -471,15 +480,15 @@ penguins.to_rover
|
|
471
480
|
#<RedAmber::DataFrame : 344 x 3 Vectors, 0x000000000003dd4c>
|
472
481
|
bill_length_mm bill_depth_mm flipper_length_mm
|
473
482
|
<double> <double> <uint8>
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
483
|
+
0 39.1 18.7 181
|
484
|
+
1 39.5 17.4 186
|
485
|
+
2 40.3 18.0 195
|
486
|
+
3 (nil) (nil) (nil)
|
487
|
+
4 36.7 19.3 193
|
479
488
|
: : : :
|
480
|
-
|
481
|
-
|
482
|
-
|
489
|
+
341 50.4 15.7 222
|
490
|
+
342 45.2 14.8 212
|
491
|
+
343 49.9 16.1 213
|
483
492
|
```
|
484
493
|
|
485
494
|
### `drop ` - pick and drop -
|
@@ -526,9 +535,9 @@ penguins.to_rover
|
|
526
535
|
#<RedAmber::DataFrame : 3 x 1 Vector, 0x000000000003f4bc>
|
527
536
|
a
|
528
537
|
<uint8>
|
529
|
-
|
530
|
-
|
531
|
-
|
538
|
+
0 1
|
539
|
+
1 2
|
540
|
+
2 3
|
532
541
|
|
533
542
|
df[:a]
|
534
543
|
|
@@ -566,17 +575,17 @@ penguins.to_rover
|
|
566
575
|
|
567
576
|
# =>
|
568
577
|
#<RedAmber::DataFrame : 10 x 8 Vectors, 0x0000000000042be4>
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
578
|
+
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
579
|
+
<string> <string> <double> <double> <uint8> ... <uint16>
|
580
|
+
0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
581
|
+
1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
582
|
+
2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
583
|
+
3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
584
|
+
4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
585
|
+
: : : : : : ... :
|
586
|
+
7 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
587
|
+
8 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
588
|
+
9 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
580
589
|
```
|
581
590
|
|
582
591
|
- Booleans as an argument
|
@@ -591,15 +600,15 @@ penguins.to_rover
|
|
591
600
|
#<RedAmber::DataFrame : 242 x 8 Vectors, 0x0000000000043d3c>
|
592
601
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
593
602
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
603
|
+
0 Adelie Torgersen 40.3 18.0 195 ... 2007
|
604
|
+
1 Adelie Torgersen 42.0 20.2 190 ... 2007
|
605
|
+
2 Adelie Torgersen 41.1 17.6 182 ... 2007
|
606
|
+
3 Adelie Torgersen 42.5 20.7 197 ... 2007
|
607
|
+
4 Adelie Torgersen 46.0 21.5 194 ... 2007
|
599
608
|
: : : : : : ... :
|
600
|
-
|
601
|
-
|
602
|
-
|
609
|
+
239 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
610
|
+
240 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
611
|
+
241 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
603
612
|
```
|
604
613
|
|
605
614
|
- Indices or booleans by a block
|
@@ -619,15 +628,15 @@ penguins.to_rover
|
|
619
628
|
#<RedAmber::DataFrame : 204 x 8 Vectors, 0x0000000000047a40>
|
620
629
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
621
630
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
631
|
+
0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
632
|
+
1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
633
|
+
2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
634
|
+
3 Adelie Torgersen 39.3 20.6 190 ... 2007
|
635
|
+
4 Adelie Torgersen 38.9 17.8 181 ... 2007
|
627
636
|
: : : : : : ... :
|
628
|
-
|
629
|
-
|
630
|
-
|
637
|
+
201 Gentoo Biscoe 47.2 13.7 214 ... 2009
|
638
|
+
202 Gentoo Biscoe 46.8 14.3 215 ... 2009
|
639
|
+
203 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
631
640
|
```
|
632
641
|
|
633
642
|
- Notice: nil option
|
@@ -674,15 +683,15 @@ penguins.to_rover
|
|
674
683
|
#<RedAmber::DataFrame : 334 x 8 Vectors, 0x00000000000487c4>
|
675
684
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
676
685
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
686
|
+
0 Adelie Torgersen 39.3 20.6 190 ... 2007
|
687
|
+
1 Adelie Torgersen 38.9 17.8 181 ... 2007
|
688
|
+
2 Adelie Torgersen 39.2 19.6 195 ... 2007
|
689
|
+
3 Adelie Torgersen 34.1 18.1 193 ... 2007
|
690
|
+
4 Adelie Torgersen 42.0 20.2 190 ... 2007
|
682
691
|
: : : : : : ... :
|
683
|
-
|
684
|
-
|
685
|
-
|
692
|
+
331 Gentoo Biscoe 44.5 15.7 217 ... 2009
|
693
|
+
332 Gentoo Biscoe 48.8 16.2 222 ... 2009
|
694
|
+
333 Gentoo Biscoe 47.2 13.7 214 ... 2009
|
686
695
|
```
|
687
696
|
|
688
697
|
- Booleans as an argument
|
@@ -698,15 +707,15 @@ penguins.to_rover
|
|
698
707
|
#<RedAmber::DataFrame : 333 x 8 Vectors, 0x0000000000049fac>
|
699
708
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
700
709
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
710
|
+
0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
711
|
+
1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
712
|
+
2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
713
|
+
3 Adelie Torgersen 36.7 19.3 193 ... 2007
|
714
|
+
4 Adelie Torgersen 39.3 20.6 190 ... 2007
|
706
715
|
: : : : : : ... :
|
707
|
-
|
708
|
-
|
709
|
-
|
716
|
+
330 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
717
|
+
331 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
718
|
+
332 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
710
719
|
```
|
711
720
|
|
712
721
|
- Indices or booleans by a block
|
@@ -727,15 +736,15 @@ penguins.to_rover
|
|
727
736
|
#<RedAmber::DataFrame : 140 x 8 Vectors, 0x000000000004de40>
|
728
737
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
729
738
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
739
|
+
0 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
740
|
+
1 Adelie Torgersen 36.7 19.3 193 ... 2007
|
741
|
+
2 Adelie Torgersen 34.1 18.1 193 ... 2007
|
742
|
+
3 Adelie Torgersen 37.8 17.1 186 ... 2007
|
743
|
+
4 Adelie Torgersen 37.8 17.3 180 ... 2007
|
735
744
|
: : : : : : ... :
|
736
|
-
|
737
|
-
|
738
|
-
|
745
|
+
137 Gentoo Biscoe (nil) (nil) (nil) ... 2009
|
746
|
+
138 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
747
|
+
139 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
739
748
|
```
|
740
749
|
|
741
750
|
- Notice for nil
|
@@ -770,8 +779,8 @@ penguins.to_rover
|
|
770
779
|
#<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000005df98>
|
771
780
|
a b c
|
772
781
|
<uint8> <string> <double>
|
773
|
-
|
774
|
-
|
782
|
+
0 1 A 1.0
|
783
|
+
1 (nil) C 3.0
|
775
784
|
```
|
776
785
|
|
777
786
|
### `rename`
|
@@ -792,9 +801,9 @@ penguins.to_rover
|
|
792
801
|
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000060838>
|
793
802
|
name age_in_1993
|
794
803
|
<string> <uint8>
|
795
|
-
|
796
|
-
|
797
|
-
|
804
|
+
0 Yasuko 68
|
805
|
+
1 Rui 49
|
806
|
+
2 Hinata 28
|
798
807
|
```
|
799
808
|
|
800
809
|
- Key pairs by a block
|
@@ -832,9 +841,9 @@ penguins.to_rover
|
|
832
841
|
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000062804>
|
833
842
|
name age
|
834
843
|
<string> <uint8>
|
835
|
-
|
836
|
-
|
837
|
-
|
844
|
+
0 Yasuko 68
|
845
|
+
1 Rui 49
|
846
|
+
2 Hinata 28
|
838
847
|
|
839
848
|
# update :age and add :brother
|
840
849
|
df.assign do
|
@@ -848,9 +857,9 @@ penguins.to_rover
|
|
848
857
|
#<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000658b0>
|
849
858
|
name age brother
|
850
859
|
<string> <uint8> <string>
|
851
|
-
|
852
|
-
|
853
|
-
|
860
|
+
0 Yasuko 97 Santa
|
861
|
+
1 Rui 78 (nil)
|
862
|
+
2 Hinata 57 Momotaro
|
854
863
|
```
|
855
864
|
|
856
865
|
- Key pairs by a block
|
@@ -869,11 +878,11 @@ penguins.to_rover
|
|
869
878
|
#<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000069e60>
|
870
879
|
index float string
|
871
880
|
<uint8> <double> <string>
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
881
|
+
0 0 0.0 A
|
882
|
+
1 1 1.1 B
|
883
|
+
2 2 2.2 C
|
884
|
+
3 3 NaN D
|
885
|
+
4 (nil) (nil) (nil)
|
877
886
|
|
878
887
|
# update :float
|
879
888
|
# assigner by an Array
|
@@ -886,11 +895,11 @@ penguins.to_rover
|
|
886
895
|
#<RedAmber::DataFrame : 5 x 3 Vectors, 0x00000000000dfffc>
|
887
896
|
index float string
|
888
897
|
<uint8> <double> <string>
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
898
|
+
0 0 -0.0 A
|
899
|
+
1 1 -1.1 B
|
900
|
+
2 2 -2.2 C
|
901
|
+
3 3 NaN D
|
902
|
+
4 (nil) (nil) (nil)
|
894
903
|
|
895
904
|
# Or we can use assigner by a Hash
|
896
905
|
df.assign do
|
@@ -921,11 +930,11 @@ penguins.to_rover
|
|
921
930
|
#<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000001787c>
|
922
931
|
new_index index float string
|
923
932
|
<uint8> <uint8> <double> <string>
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
933
|
+
0 1 0 0.0 A
|
934
|
+
1 2 1 1.1 B
|
935
|
+
2 3 2 2.2 C
|
936
|
+
3 4 3 NaN D
|
937
|
+
4 5 (nil) (nil) (nil)
|
929
938
|
```
|
930
939
|
|
931
940
|
### `slice_by(key, keep_key: false) { block }`
|
@@ -946,11 +955,11 @@ penguins.to_rover
|
|
946
955
|
#<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000069e60>
|
947
956
|
index float string
|
948
957
|
<uint8> <double> <string>
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
958
|
+
0 0 0.0 A
|
959
|
+
1 1 1.1 B
|
960
|
+
2 2 2.2 C
|
961
|
+
3 3 NaN D
|
962
|
+
4 (nil) (nil) (nil)
|
954
963
|
|
955
964
|
df.slice_by(:string) { ["A", "C"] }
|
956
965
|
|
@@ -958,8 +967,8 @@ penguins.to_rover
|
|
958
967
|
#<RedAmber::DataFrame : 2 x 2 Vectors, 0x000000000001b1ac>
|
959
968
|
index float
|
960
969
|
<uint8> <double>
|
961
|
-
|
962
|
-
|
970
|
+
0 0 0.0
|
971
|
+
1 2 2.2
|
963
972
|
```
|
964
973
|
|
965
974
|
It is the same behavior as;
|
@@ -977,9 +986,9 @@ It is the same behavior as;
|
|
977
986
|
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000069668>
|
978
987
|
index float
|
979
988
|
<uint8> <double>
|
980
|
-
|
981
|
-
|
982
|
-
|
989
|
+
0 0 0.0
|
990
|
+
1 1 1.1
|
991
|
+
2 2 2.2
|
983
992
|
```
|
984
993
|
|
985
994
|
When the option `keep_key: true` used, the column `key` will be preserved.
|
@@ -991,9 +1000,9 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
991
1000
|
#<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000073c44>
|
992
1001
|
index float string
|
993
1002
|
<uint8> <double> <string>
|
994
|
-
|
995
|
-
|
996
|
-
|
1003
|
+
0 0 0.0 A
|
1004
|
+
1 1 1.1 B
|
1005
|
+
2 2 2.2 C
|
997
1006
|
```
|
998
1007
|
|
999
1008
|
## Updating
|
@@ -1016,11 +1025,11 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1016
1025
|
#<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000009b03c>
|
1017
1026
|
index string bool
|
1018
1027
|
<uint8> <string> <boolean>
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1028
|
+
0 0 (nil) false
|
1029
|
+
1 0 B false
|
1030
|
+
2 1 B true
|
1031
|
+
3 1 C (nil)
|
1032
|
+
4 (nil) A true
|
1024
1033
|
```
|
1025
1034
|
|
1026
1035
|
- [ ] Clamp
|
@@ -1037,7 +1046,7 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1037
1046
|
|
1038
1047
|
### `group(group_keys)`
|
1039
1048
|
|
1040
|
-
`group` creates a class `Group
|
1049
|
+
`group` creates a instance of class `Group`. `Group` accepts functions below as a method.
|
1041
1050
|
Method accepts options as `group_keys`.
|
1042
1051
|
|
1043
1052
|
Available functions are:
|
@@ -1064,23 +1073,22 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1064
1073
|
This is an example of grouping of famous STARWARS dataset.
|
1065
1074
|
|
1066
1075
|
```ruby
|
1067
|
-
|
1068
|
-
|
1069
|
-
starwars
|
1076
|
+
uri = URI("https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv")
|
1077
|
+
starwars = RedAmber::DataFrame.load(uri)
|
1070
1078
|
|
1071
1079
|
# =>
|
1072
1080
|
#<RedAmber::DataFrame : 87 x 12 Vectors, 0x0000000000005a50>
|
1073
1081
|
unnamed1 name height mass hair_color skin_color eye_color ... species
|
1074
1082
|
<int64> <string> <int64> <double> <string> <string> <string> ... <string>
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1083
|
+
0 1 Luke Skywalker 172 77.0 blond fair blue ... Human
|
1084
|
+
1 2 C-3PO 167 75.0 NA gold yellow ... Droid
|
1085
|
+
2 3 R2-D2 96 32.0 NA white, blue red ... Droid
|
1086
|
+
3 4 Darth Vader 202 136.0 none white yellow ... Human
|
1087
|
+
4 5 Leia Organa 150 49.0 brown light brown ... Human
|
1080
1088
|
: : : : : : : : ... :
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1089
|
+
84 85 BB8 (nil) (nil) none none black ... Droid
|
1090
|
+
85 86 Captain Phasma (nil) (nil) unknown unknown unknown ... NA
|
1091
|
+
86 87 Padmé Amidala 165 45.0 brown light brown ... Human
|
1084
1092
|
|
1085
1093
|
starwars.tdr(12)
|
1086
1094
|
|
@@ -1088,58 +1096,60 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1088
1096
|
RedAmber::DataFrame : 87 x 12 Vectors
|
1089
1097
|
Vectors : 4 numeric, 8 strings
|
1090
1098
|
# key type level data_preview
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1099
|
+
0 :unnamed1 int64 87 [1, 2, 3, 4, 5, ... ]
|
1100
|
+
1 :name string 87 ["Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Organa", ... ]
|
1101
|
+
2 :height int64 46 [172, 167, 96, 202, 150, ... ], 6 nils
|
1102
|
+
3 :mass double 39 [77.0, 75.0, 32.0, 136.0, 49.0, ... ], 28 nils
|
1103
|
+
4 :hair_color string 13 ["blond", "NA", "NA", "none", "brown", ... ]
|
1104
|
+
5 :skin_color string 31 ["fair", "gold", "white, blue", "white", "light", ... ]
|
1105
|
+
6 :eye_color string 15 ["blue", "yellow", "red", "yellow", "brown", ... ]
|
1106
|
+
7 :birth_year double 37 [19.0, 112.0, 33.0, 41.9, 19.0, ... ], 44 nils
|
1107
|
+
8 :sex string 5 {"male"=>60, "none"=>6, "female"=>16, "hermaphroditic"=>1, "NA"=>4}
|
1108
|
+
9 :gender string 3 {"masculine"=>66, "feminine"=>17, "NA"=>4}
|
1109
|
+
10 :homeworld string 49 ["Tatooine", "Tatooine", "Naboo", "Tatooine", "Alderaan", ... ]
|
1110
|
+
11 :species string 38 ["Human", "Droid", "Droid", "Human", "Human", ... ]
|
1103
1111
|
```
|
1104
1112
|
|
1105
1113
|
We can group by `:species` and calculate the count.
|
1106
1114
|
|
1107
1115
|
```ruby
|
1108
|
-
starwars.
|
1116
|
+
starwars.remove { species == "NA" }
|
1117
|
+
.group(:species).count(:species)
|
1109
1118
|
|
1110
1119
|
# =>
|
1111
|
-
#<RedAmber::DataFrame :
|
1120
|
+
#<RedAmber::DataFrame : 37 x 2 Vectors, 0x000000000000ffa0>
|
1112
1121
|
species count
|
1113
1122
|
<string> <int64>
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1123
|
+
0 Human 35
|
1124
|
+
1 Droid 6
|
1125
|
+
2 Wookiee 2
|
1126
|
+
3 Rodian 1
|
1127
|
+
4 Hutt 1
|
1119
1128
|
: : :
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1129
|
+
34 Kaleesh 1
|
1130
|
+
35 Pau'an 1
|
1131
|
+
36 Kel Dor 1
|
1123
1132
|
```
|
1124
1133
|
|
1125
1134
|
We can also calculate the mean of `:mass` and `:height` together.
|
1126
1135
|
|
1127
1136
|
```ruby
|
1128
|
-
grouped = starwars.
|
1137
|
+
grouped = starwars.remove { species == "NA" }
|
1138
|
+
.group(:species) { [count(:species), mean(:height, :mass)] }
|
1129
1139
|
|
1130
1140
|
# =>
|
1131
|
-
#<RedAmber::DataFrame :
|
1132
|
-
|
1133
|
-
<
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1139
|
-
: :
|
1140
|
-
|
1141
|
-
|
1142
|
-
|
1141
|
+
#<RedAmber::DataFrame : 37 x 4 Vectors, 0x000000000000fff0>
|
1142
|
+
species count mean(height) mean(mass)
|
1143
|
+
<string> <int64> <double> <double>
|
1144
|
+
0 Human 35 176.65 82.78
|
1145
|
+
1 Droid 6 131.2 69.75
|
1146
|
+
2 Wookiee 2 231.0 124.0
|
1147
|
+
3 Rodian 1 173.0 74.0
|
1148
|
+
4 Hutt 1 175.0 1358.0
|
1149
|
+
: : : : :
|
1150
|
+
34 Kaleesh 1 216.0 159.0
|
1151
|
+
35 Pau'an 1 206.0 80.0
|
1152
|
+
36 Kel Dor 1 188.0 80.0
|
1143
1153
|
```
|
1144
1154
|
|
1145
1155
|
Select rows for count > 1.
|
@@ -1148,22 +1158,23 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1148
1158
|
grouped.slice(grouped[:count] > 1)
|
1149
1159
|
|
1150
1160
|
# =>
|
1151
|
-
#<RedAmber::DataFrame :
|
1161
|
+
#<RedAmber::DataFrame : 8 x 4 Vectors, 0x000000000001002c>
|
1152
1162
|
species count mean(height) mean(mass)
|
1153
1163
|
<string> <int64> <double> <double>
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
|
1161
|
-
|
1162
|
-
9 Kaminoan 2 221.0 88.0
|
1164
|
+
0 Human 35 176.65 82.78
|
1165
|
+
1 Droid 6 131.2 69.75
|
1166
|
+
2 Wookiee 2 231.0 124.0
|
1167
|
+
3 Gungan 3 208.67 74.0
|
1168
|
+
4 Zabrak 2 173.0 80.0
|
1169
|
+
5 Twi'lek 2 179.0 55.0
|
1170
|
+
6 Mirialan 2 168.0 53.1
|
1171
|
+
7 Kaminoan 2 221.0 88.0
|
1163
1172
|
```
|
1164
1173
|
|
1165
1174
|
## Reshape
|
1166
1175
|
|
1176
|
+
![dataframe reshapeing image](doc/../image/reshaping_dataframe.png)
|
1177
|
+
|
1167
1178
|
### `transpose`
|
1168
1179
|
|
1169
1180
|
Creates transposed DataFrame for the wide (messy) dataframe.
|
@@ -1175,26 +1186,27 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1175
1186
|
#<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
|
1176
1187
|
Year Audi BMW BMW_MINI Mercedes-Benz VW
|
1177
1188
|
<int64> <int64> <int64> <int64> <int64> <int64>
|
1178
|
-
|
1179
|
-
|
1180
|
-
|
1181
|
-
|
1182
|
-
|
1183
|
-
|
1189
|
+
0 2017 28336 52527 25427 68221 49040
|
1190
|
+
1 2018 26473 50982 25984 67554 51961
|
1191
|
+
2 2019 24222 46814 23813 66553 46794
|
1192
|
+
3 2020 22304 35712 20196 57041 36576
|
1193
|
+
4 2021 22535 35905 18211 51722 35215
|
1194
|
+
|
1195
|
+
import_cars.transpose(name: :Manufacturer)
|
1184
1196
|
|
1185
1197
|
# =>
|
1186
|
-
#<RedAmber::DataFrame : 5 x 6 Vectors,
|
1198
|
+
#<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
|
1187
1199
|
Manufacturer 2017 2018 2019 2020 2021
|
1188
|
-
<
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1200
|
+
<string> <uint32> <uint32> <uint32> <uint16> <uint16>
|
1201
|
+
0 Audi 28336 26473 24222 22304 22535
|
1202
|
+
1 BMW 52527 50982 46814 35712 35905
|
1203
|
+
2 BMW_MINI 25427 25984 23813 20196 18211
|
1204
|
+
3 Mercedes-Benz 68221 67554 66553 57041 51722
|
1205
|
+
4 VW 49040 51961 46794 36576 35215
|
1194
1206
|
```
|
1195
1207
|
|
1196
1208
|
The leftmost column is created by original keys. Key name of the column is
|
1197
|
-
named by parameter `:name`. If `:name` is not specified, `:
|
1209
|
+
named by parameter `:name`. If `:name` is not specified, `:NAME` is used for the key.
|
1198
1210
|
|
1199
1211
|
### `to_long(*keep_keys)`
|
1200
1212
|
|
@@ -1206,39 +1218,41 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1206
1218
|
import_cars.to_long(:Year)
|
1207
1219
|
|
1208
1220
|
# =>
|
1209
|
-
#<RedAmber::DataFrame : 25 x 3 Vectors,
|
1210
|
-
Year
|
1211
|
-
<uint16> <
|
1212
|
-
|
1213
|
-
|
1214
|
-
|
1215
|
-
|
1216
|
-
|
1221
|
+
#<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
|
1222
|
+
Year NAME VALUE
|
1223
|
+
<uint16> <string> <uint32>
|
1224
|
+
0 2017 Audi 28336
|
1225
|
+
1 2017 BMW 52527
|
1226
|
+
2 2017 BMW_MINI 25427
|
1227
|
+
3 2017 Mercedes-Benz 68221
|
1228
|
+
4 2017 VW 49040
|
1217
1229
|
: : : :
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1230
|
+
22 2021 BMW_MINI 18211
|
1231
|
+
23 2021 Mercedes-Benz 51722
|
1232
|
+
24 2021 VW 35215
|
1221
1233
|
```
|
1222
1234
|
|
1223
1235
|
- Option `:name` is the key of the column which came **from key names**.
|
1236
|
+
The default value is `:NAME` if it is not specified.
|
1224
1237
|
- Option `:value` is the key of the column which came **from values**.
|
1238
|
+
The default value is `:VALUE` if it is not specified.
|
1225
1239
|
|
1226
1240
|
```ruby
|
1227
1241
|
import_cars.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
|
1228
1242
|
|
1229
1243
|
# =>
|
1230
|
-
#<RedAmber::DataFrame : 25 x 3 Vectors,
|
1244
|
+
#<RedAmber::DataFrame : 25 x 3 Vectors, 0x000000000001359c>
|
1231
1245
|
Year Manufacturer Num_of_imported
|
1232
|
-
<uint16> <
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1246
|
+
<uint16> <string> <uint32>
|
1247
|
+
0 2017 Audi 28336
|
1248
|
+
1 2017 BMW 52527
|
1249
|
+
2 2017 BMW_MINI 25427
|
1250
|
+
3 2017 Mercedes-Benz 68221
|
1251
|
+
4 2017 VW 49040
|
1238
1252
|
: : : :
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1253
|
+
22 2021 BMW_MINI 18211
|
1254
|
+
23 2021 Mercedes-Benz 51722
|
1255
|
+
24 2021 VW 35215
|
1242
1256
|
```
|
1243
1257
|
|
1244
1258
|
### `to_wide`
|
@@ -1246,7 +1260,9 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1246
1260
|
Creates a 'wide' (messy) DataFrame from a 'long' DataFrame.
|
1247
1261
|
|
1248
1262
|
- Option `:name` is the key of the column which will be expanded **to key names**.
|
1263
|
+
The default value is `:NAME` if it is not specified.
|
1249
1264
|
- Option `:value` is the key of the column which will be expanded **to values**.
|
1265
|
+
The default value is `:VALUE` if it is not specified.
|
1250
1266
|
|
1251
1267
|
```ruby
|
1252
1268
|
import_cars.to_long(:Year).to_wide
|
@@ -1257,13 +1273,11 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1257
1273
|
#<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000f0f0>
|
1258
1274
|
Year Audi BMW BMW_MINI Mercedes-Benz VW
|
1259
1275
|
<uint16> <uint16> <uint16> <uint16> <uint32> <uint16>
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1265
|
-
|
1266
|
-
# == import_cars
|
1276
|
+
0 2017 28336 52527 25427 68221 49040
|
1277
|
+
1 2018 26473 50982 25984 67554 51961
|
1278
|
+
2 2019 24222 46814 23813 66553 46794
|
1279
|
+
3 2020 22304 35712 20196 57041 36576
|
1280
|
+
4 2021 22535 35905 18211 51722 35215
|
1267
1281
|
```
|
1268
1282
|
|
1269
1283
|
## Combine
|