red_amber 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +69 -2
- data/README.md +83 -280
- data/doc/DataFrame.md +279 -265
- data/doc/Vector.md +28 -36
- data/doc/image/basic_verbs.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/assign_operation.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/pick_operation.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/rename_operation.png +0 -0
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe/slice_operation.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/group_operation.png +0 -0
- data/doc/image/replace-if_then.png +0 -0
- data/doc/image/reshaping_dataframe.png +0 -0
- data/doc/image/screenshot.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red_amber/data_frame.rb +10 -37
- data/lib/red_amber/data_frame_displayable.rb +56 -3
- data/lib/red_amber/data_frame_loadsave.rb +36 -0
- data/lib/red_amber/data_frame_reshaping.rb +8 -6
- data/lib/red_amber/data_frame_variable_operation.rb +25 -19
- data/lib/red_amber/group.rb +5 -3
- data/lib/red_amber/helper.rb +20 -18
- data/lib/red_amber/vector.rb +49 -30
- data/lib/red_amber/vector_selectable.rb +9 -1
- data/lib/red_amber/vector_updatable.rb +6 -3
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -0
- metadata +13 -3
- data/doc/examples_of_red_amber.ipynb +0 -8979
data/doc/DataFrame.md
CHANGED
@@ -14,30 +14,38 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
14
14
|
### `new` from a Hash
|
15
15
|
|
16
16
|
```ruby
|
17
|
-
RedAmber::DataFrame.new(x: [1, 2, 3])
|
17
|
+
df = RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C])
|
18
18
|
```
|
19
19
|
|
20
20
|
### `new` from a schema (by Hash) and data (by Array)
|
21
21
|
|
22
22
|
```ruby
|
23
|
-
RedAmber::DataFrame.new({:
|
23
|
+
RedAmber::DataFrame.new({x: :uint8, y: :string}, [[1, "A"], [2, "B"], [3, "C"]])
|
24
24
|
```
|
25
25
|
|
26
26
|
### `new` from an Arrow::Table
|
27
27
|
|
28
28
|
|
29
29
|
```ruby
|
30
|
-
table = Arrow::Table.new(x: [1, 2, 3])
|
30
|
+
table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
|
31
31
|
RedAmber::DataFrame.new(table)
|
32
32
|
```
|
33
33
|
|
34
|
+
### `new` from an Object which responds to `to_arrow`
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
require "datasets-arrow"
|
38
|
+
dataset = Datasets::Penguins.new
|
39
|
+
RedAmber::DataFrame.new(dataset)
|
40
|
+
```
|
41
|
+
|
34
42
|
### `new` from a Rover::DataFrame
|
35
43
|
|
36
44
|
|
37
45
|
```ruby
|
38
46
|
require 'rover'
|
39
47
|
|
40
|
-
rover = Rover::DataFrame.new(x: [1, 2, 3])
|
48
|
+
rover = Rover::DataFrame.new(x: [1, 2, 3], y: %w[A B C])
|
41
49
|
RedAmber::DataFrame.new(rover)
|
42
50
|
```
|
43
51
|
|
@@ -63,7 +71,7 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
63
71
|
```ruby
|
64
72
|
require 'parquet'
|
65
73
|
|
66
|
-
|
74
|
+
df = RedAmber::DataFrame.load("file.parquet")
|
67
75
|
```
|
68
76
|
|
69
77
|
### `save` (instance method)
|
@@ -79,7 +87,7 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
79
87
|
```ruby
|
80
88
|
require 'parquet'
|
81
89
|
|
82
|
-
|
90
|
+
df.save("file.parquet")
|
83
91
|
```
|
84
92
|
|
85
93
|
## Properties
|
@@ -210,15 +218,15 @@ puts penguins.to_s
|
|
210
218
|
# =>
|
211
219
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
212
220
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
221
|
+
0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
222
|
+
1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
223
|
+
2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
224
|
+
3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
225
|
+
4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
218
226
|
: : : : : : ... :
|
219
|
-
|
220
|
-
|
221
|
-
|
227
|
+
341 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
228
|
+
342 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
229
|
+
343 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
222
230
|
```
|
223
231
|
### `inspect`
|
224
232
|
|
@@ -235,11 +243,11 @@ puts penguins.summary.to_s(width: 82) # needs more width to show all stats in th
|
|
235
243
|
# =>
|
236
244
|
variables count mean std min 25% median 75% max
|
237
245
|
<dictionary> <uint16> <double> <double> <double> <double> <double> <double> <double>
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
246
|
+
0 bill_length_mm 342 43.92 5.46 32.1 39.23 44.38 48.5 59.6
|
247
|
+
1 bill_depth_mm 342 17.15 1.97 13.1 15.6 17.32 18.7 21.5
|
248
|
+
2 flipper_length_mm 342 200.92 14.06 172.0 190.0 197.0 213.0 231.0
|
249
|
+
3 body_mass_g 342 4201.75 801.95 2700.0 3550.0 4031.5 4750.0 6300.0
|
250
|
+
4 year 344 2008.03 0.82 2007.0 2007.0 2008.0 2009.0 2009.0
|
243
251
|
```
|
244
252
|
|
245
253
|
### `to_rover`
|
@@ -265,21 +273,22 @@ penguins.to_rover
|
|
265
273
|
require 'red_amber'
|
266
274
|
require 'datasets-arrow'
|
267
275
|
|
268
|
-
|
269
|
-
|
276
|
+
dataset = Datasets::Penguins.new
|
277
|
+
# (From 0.2.2) responsible to the object which has `to_arrow` method.
|
278
|
+
RedAmber::DataFrame.new(dataset).tdr
|
270
279
|
|
271
280
|
# =>
|
272
281
|
RedAmber::DataFrame : 344 x 8 Vectors
|
273
282
|
Vectors : 5 numeric, 3 strings
|
274
283
|
# key type level data_preview
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
284
|
+
0 :species string 3 {"Adelie"=>152, "Chinstrap"=>68, "Gentoo"=>124}
|
285
|
+
1 :island string 3 {"Torgersen"=>52, "Biscoe"=>168, "Dream"=>124}
|
286
|
+
2 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils
|
287
|
+
3 :bill_depth_mm double 81 [18.7, 17.4, 18.0, nil, 19.3, ... ], 2 nils
|
288
|
+
4 :flipper_length_mm uint8 56 [181, 186, 195, nil, 193, ... ], 2 nils
|
289
|
+
5 :body_mass_g uint16 95 [3750, 3800, 3250, nil, 3450, ... ], 2 nils
|
290
|
+
6 :sex string 3 {"male"=>168, "female"=>165, nil=>11}
|
291
|
+
7 :year uint16 3 {2007=>110, 2008=>114, 2009=>120}
|
283
292
|
```
|
284
293
|
|
285
294
|
- limit: limit of variables to show. Default value is 10.
|
@@ -311,9 +320,9 @@ penguins.to_rover
|
|
311
320
|
#<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000328fc>
|
312
321
|
b c a
|
313
322
|
<string> <double> <uint8>
|
314
|
-
|
315
|
-
|
316
|
-
|
323
|
+
0 A 1.0 1
|
324
|
+
1 B 2.0 2
|
325
|
+
2 C 3.0 3
|
317
326
|
```
|
318
327
|
|
319
328
|
If `#[]` represents single variable (column), it returns a Vector object.
|
@@ -359,10 +368,10 @@ penguins.to_rover
|
|
359
368
|
#<RedAmber::DataFrame : 4 x 3 Vectors, 0x0000000000033270>
|
360
369
|
a b c
|
361
370
|
<uint8> <string> <double>
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
371
|
+
0 3 C 3.0
|
372
|
+
1 1 A 1.0
|
373
|
+
2 2 B 2.0
|
374
|
+
3 3 C 3.0
|
366
375
|
```
|
367
376
|
|
368
377
|
- Select obs. by a boolean Array or a boolean RedAmber::Vector at same size as self.
|
@@ -405,15 +414,15 @@ penguins.to_rover
|
|
405
414
|
#<RedAmber::DataFrame : 344 x 2 Vectors, 0x0000000000035ebc>
|
406
415
|
species bill_length_mm
|
407
416
|
<string> <double>
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
417
|
+
0 Adelie 39.1
|
418
|
+
1 Adelie 39.5
|
419
|
+
2 Adelie 40.3
|
420
|
+
3 Adelie (nil)
|
421
|
+
4 Adelie 36.7
|
413
422
|
: : :
|
414
|
-
|
415
|
-
|
416
|
-
|
423
|
+
341 Gentoo 50.4
|
424
|
+
342 Gentoo 45.2
|
425
|
+
343 Gentoo 49.9
|
417
426
|
```
|
418
427
|
|
419
428
|
- Indices as arguments
|
@@ -427,15 +436,15 @@ penguins.to_rover
|
|
427
436
|
#<RedAmber::DataFrame : 344 x 4 Vectors, 0x0000000000055ce4>
|
428
437
|
species island bill_length_mm year
|
429
438
|
<string> <string> <double> <uint16>
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
439
|
+
0 Adelie Torgersen 39.1 2007
|
440
|
+
1 Adelie Torgersen 39.5 2007
|
441
|
+
2 Adelie Torgersen 40.3 2007
|
442
|
+
3 Adelie Torgersen (nil) 2007
|
443
|
+
4 Adelie Torgersen 36.7 2007
|
435
444
|
: : : : :
|
436
|
-
|
437
|
-
|
438
|
-
|
445
|
+
341 Gentoo Biscoe 50.4 2009
|
446
|
+
342 Gentoo Biscoe 45.2 2009
|
447
|
+
343 Gentoo Biscoe 49.9 2009
|
439
448
|
```
|
440
449
|
|
441
450
|
- Booleans as arguments
|
@@ -443,21 +452,21 @@ penguins.to_rover
|
|
443
452
|
`pick(booleans)` accepts booleans as arguments in an Array. Booleans must be same length as `n_keys`.
|
444
453
|
|
445
454
|
```ruby
|
446
|
-
penguins.pick(penguins.
|
455
|
+
penguins.pick(penguins.vectors.map(&:string?))
|
447
456
|
|
448
457
|
# =>
|
449
458
|
#<RedAmber::DataFrame : 344 x 3 Vectors, 0x00000000000387ac>
|
450
459
|
species island sex
|
451
460
|
<string> <string> <string>
|
452
|
-
|
461
|
+
0 Adelie Torgersen male
|
462
|
+
1 Adelie Torgersen female
|
453
463
|
2 Adelie Torgersen female
|
454
|
-
3 Adelie Torgersen
|
455
|
-
4 Adelie Torgersen
|
456
|
-
5 Adelie Torgersen female
|
464
|
+
3 Adelie Torgersen (nil)
|
465
|
+
4 Adelie Torgersen female
|
457
466
|
: : : :
|
458
|
-
|
459
|
-
|
460
|
-
|
467
|
+
341 Gentoo Biscoe male
|
468
|
+
342 Gentoo Biscoe female
|
469
|
+
343 Gentoo Biscoe male
|
461
470
|
```
|
462
471
|
|
463
472
|
- Keys or booleans by a block
|
@@ -471,15 +480,15 @@ penguins.to_rover
|
|
471
480
|
#<RedAmber::DataFrame : 344 x 3 Vectors, 0x000000000003dd4c>
|
472
481
|
bill_length_mm bill_depth_mm flipper_length_mm
|
473
482
|
<double> <double> <uint8>
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
483
|
+
0 39.1 18.7 181
|
484
|
+
1 39.5 17.4 186
|
485
|
+
2 40.3 18.0 195
|
486
|
+
3 (nil) (nil) (nil)
|
487
|
+
4 36.7 19.3 193
|
479
488
|
: : : :
|
480
|
-
|
481
|
-
|
482
|
-
|
489
|
+
341 50.4 15.7 222
|
490
|
+
342 45.2 14.8 212
|
491
|
+
343 49.9 16.1 213
|
483
492
|
```
|
484
493
|
|
485
494
|
### `drop ` - pick and drop -
|
@@ -526,9 +535,9 @@ penguins.to_rover
|
|
526
535
|
#<RedAmber::DataFrame : 3 x 1 Vector, 0x000000000003f4bc>
|
527
536
|
a
|
528
537
|
<uint8>
|
529
|
-
|
530
|
-
|
531
|
-
|
538
|
+
0 1
|
539
|
+
1 2
|
540
|
+
2 3
|
532
541
|
|
533
542
|
df[:a]
|
534
543
|
|
@@ -566,17 +575,17 @@ penguins.to_rover
|
|
566
575
|
|
567
576
|
# =>
|
568
577
|
#<RedAmber::DataFrame : 10 x 8 Vectors, 0x0000000000042be4>
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
578
|
+
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
579
|
+
<string> <string> <double> <double> <uint8> ... <uint16>
|
580
|
+
0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
581
|
+
1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
582
|
+
2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
583
|
+
3 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
584
|
+
4 Adelie Torgersen 36.7 19.3 193 ... 2007
|
585
|
+
: : : : : : ... :
|
586
|
+
7 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
587
|
+
8 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
588
|
+
9 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
580
589
|
```
|
581
590
|
|
582
591
|
- Booleans as an argument
|
@@ -591,15 +600,15 @@ penguins.to_rover
|
|
591
600
|
#<RedAmber::DataFrame : 242 x 8 Vectors, 0x0000000000043d3c>
|
592
601
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
593
602
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
603
|
+
0 Adelie Torgersen 40.3 18.0 195 ... 2007
|
604
|
+
1 Adelie Torgersen 42.0 20.2 190 ... 2007
|
605
|
+
2 Adelie Torgersen 41.1 17.6 182 ... 2007
|
606
|
+
3 Adelie Torgersen 42.5 20.7 197 ... 2007
|
607
|
+
4 Adelie Torgersen 46.0 21.5 194 ... 2007
|
599
608
|
: : : : : : ... :
|
600
|
-
|
601
|
-
|
602
|
-
|
609
|
+
239 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
610
|
+
240 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
611
|
+
241 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
603
612
|
```
|
604
613
|
|
605
614
|
- Indices or booleans by a block
|
@@ -619,15 +628,15 @@ penguins.to_rover
|
|
619
628
|
#<RedAmber::DataFrame : 204 x 8 Vectors, 0x0000000000047a40>
|
620
629
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
621
630
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
631
|
+
0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
632
|
+
1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
633
|
+
2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
634
|
+
3 Adelie Torgersen 39.3 20.6 190 ... 2007
|
635
|
+
4 Adelie Torgersen 38.9 17.8 181 ... 2007
|
627
636
|
: : : : : : ... :
|
628
|
-
|
629
|
-
|
630
|
-
|
637
|
+
201 Gentoo Biscoe 47.2 13.7 214 ... 2009
|
638
|
+
202 Gentoo Biscoe 46.8 14.3 215 ... 2009
|
639
|
+
203 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
631
640
|
```
|
632
641
|
|
633
642
|
- Notice: nil option
|
@@ -674,15 +683,15 @@ penguins.to_rover
|
|
674
683
|
#<RedAmber::DataFrame : 334 x 8 Vectors, 0x00000000000487c4>
|
675
684
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
676
685
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
686
|
+
0 Adelie Torgersen 39.3 20.6 190 ... 2007
|
687
|
+
1 Adelie Torgersen 38.9 17.8 181 ... 2007
|
688
|
+
2 Adelie Torgersen 39.2 19.6 195 ... 2007
|
689
|
+
3 Adelie Torgersen 34.1 18.1 193 ... 2007
|
690
|
+
4 Adelie Torgersen 42.0 20.2 190 ... 2007
|
682
691
|
: : : : : : ... :
|
683
|
-
|
684
|
-
|
685
|
-
|
692
|
+
331 Gentoo Biscoe 44.5 15.7 217 ... 2009
|
693
|
+
332 Gentoo Biscoe 48.8 16.2 222 ... 2009
|
694
|
+
333 Gentoo Biscoe 47.2 13.7 214 ... 2009
|
686
695
|
```
|
687
696
|
|
688
697
|
- Booleans as an argument
|
@@ -698,15 +707,15 @@ penguins.to_rover
|
|
698
707
|
#<RedAmber::DataFrame : 333 x 8 Vectors, 0x0000000000049fac>
|
699
708
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
700
709
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
710
|
+
0 Adelie Torgersen 39.1 18.7 181 ... 2007
|
711
|
+
1 Adelie Torgersen 39.5 17.4 186 ... 2007
|
712
|
+
2 Adelie Torgersen 40.3 18.0 195 ... 2007
|
713
|
+
3 Adelie Torgersen 36.7 19.3 193 ... 2007
|
714
|
+
4 Adelie Torgersen 39.3 20.6 190 ... 2007
|
706
715
|
: : : : : : ... :
|
707
|
-
|
708
|
-
|
709
|
-
|
716
|
+
330 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
717
|
+
331 Gentoo Biscoe 45.2 14.8 212 ... 2009
|
718
|
+
332 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
710
719
|
```
|
711
720
|
|
712
721
|
- Indices or booleans by a block
|
@@ -727,15 +736,15 @@ penguins.to_rover
|
|
727
736
|
#<RedAmber::DataFrame : 140 x 8 Vectors, 0x000000000004de40>
|
728
737
|
species island bill_length_mm bill_depth_mm flipper_length_mm ... year
|
729
738
|
<string> <string> <double> <double> <uint8> ... <uint16>
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
739
|
+
0 Adelie Torgersen (nil) (nil) (nil) ... 2007
|
740
|
+
1 Adelie Torgersen 36.7 19.3 193 ... 2007
|
741
|
+
2 Adelie Torgersen 34.1 18.1 193 ... 2007
|
742
|
+
3 Adelie Torgersen 37.8 17.1 186 ... 2007
|
743
|
+
4 Adelie Torgersen 37.8 17.3 180 ... 2007
|
735
744
|
: : : : : : ... :
|
736
|
-
|
737
|
-
|
738
|
-
|
745
|
+
137 Gentoo Biscoe (nil) (nil) (nil) ... 2009
|
746
|
+
138 Gentoo Biscoe 50.4 15.7 222 ... 2009
|
747
|
+
139 Gentoo Biscoe 49.9 16.1 213 ... 2009
|
739
748
|
```
|
740
749
|
|
741
750
|
- Notice for nil
|
@@ -770,8 +779,8 @@ penguins.to_rover
|
|
770
779
|
#<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000005df98>
|
771
780
|
a b c
|
772
781
|
<uint8> <string> <double>
|
773
|
-
|
774
|
-
|
782
|
+
0 1 A 1.0
|
783
|
+
1 (nil) C 3.0
|
775
784
|
```
|
776
785
|
|
777
786
|
### `rename`
|
@@ -792,9 +801,9 @@ penguins.to_rover
|
|
792
801
|
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000060838>
|
793
802
|
name age_in_1993
|
794
803
|
<string> <uint8>
|
795
|
-
|
796
|
-
|
797
|
-
|
804
|
+
0 Yasuko 68
|
805
|
+
1 Rui 49
|
806
|
+
2 Hinata 28
|
798
807
|
```
|
799
808
|
|
800
809
|
- Key pairs by a block
|
@@ -832,9 +841,9 @@ penguins.to_rover
|
|
832
841
|
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000062804>
|
833
842
|
name age
|
834
843
|
<string> <uint8>
|
835
|
-
|
836
|
-
|
837
|
-
|
844
|
+
0 Yasuko 68
|
845
|
+
1 Rui 49
|
846
|
+
2 Hinata 28
|
838
847
|
|
839
848
|
# update :age and add :brother
|
840
849
|
df.assign do
|
@@ -848,9 +857,9 @@ penguins.to_rover
|
|
848
857
|
#<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000658b0>
|
849
858
|
name age brother
|
850
859
|
<string> <uint8> <string>
|
851
|
-
|
852
|
-
|
853
|
-
|
860
|
+
0 Yasuko 97 Santa
|
861
|
+
1 Rui 78 (nil)
|
862
|
+
2 Hinata 57 Momotaro
|
854
863
|
```
|
855
864
|
|
856
865
|
- Key pairs by a block
|
@@ -869,11 +878,11 @@ penguins.to_rover
|
|
869
878
|
#<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000069e60>
|
870
879
|
index float string
|
871
880
|
<uint8> <double> <string>
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
881
|
+
0 0 0.0 A
|
882
|
+
1 1 1.1 B
|
883
|
+
2 2 2.2 C
|
884
|
+
3 3 NaN D
|
885
|
+
4 (nil) (nil) (nil)
|
877
886
|
|
878
887
|
# update :float
|
879
888
|
# assigner by an Array
|
@@ -886,11 +895,11 @@ penguins.to_rover
|
|
886
895
|
#<RedAmber::DataFrame : 5 x 3 Vectors, 0x00000000000dfffc>
|
887
896
|
index float string
|
888
897
|
<uint8> <double> <string>
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
898
|
+
0 0 -0.0 A
|
899
|
+
1 1 -1.1 B
|
900
|
+
2 2 -2.2 C
|
901
|
+
3 3 NaN D
|
902
|
+
4 (nil) (nil) (nil)
|
894
903
|
|
895
904
|
# Or we can use assigner by a Hash
|
896
905
|
df.assign do
|
@@ -921,11 +930,11 @@ penguins.to_rover
|
|
921
930
|
#<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000001787c>
|
922
931
|
new_index index float string
|
923
932
|
<uint8> <uint8> <double> <string>
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
933
|
+
0 1 0 0.0 A
|
934
|
+
1 2 1 1.1 B
|
935
|
+
2 3 2 2.2 C
|
936
|
+
3 4 3 NaN D
|
937
|
+
4 5 (nil) (nil) (nil)
|
929
938
|
```
|
930
939
|
|
931
940
|
### `slice_by(key, keep_key: false) { block }`
|
@@ -946,11 +955,11 @@ penguins.to_rover
|
|
946
955
|
#<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000069e60>
|
947
956
|
index float string
|
948
957
|
<uint8> <double> <string>
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
958
|
+
0 0 0.0 A
|
959
|
+
1 1 1.1 B
|
960
|
+
2 2 2.2 C
|
961
|
+
3 3 NaN D
|
962
|
+
4 (nil) (nil) (nil)
|
954
963
|
|
955
964
|
df.slice_by(:string) { ["A", "C"] }
|
956
965
|
|
@@ -958,8 +967,8 @@ penguins.to_rover
|
|
958
967
|
#<RedAmber::DataFrame : 2 x 2 Vectors, 0x000000000001b1ac>
|
959
968
|
index float
|
960
969
|
<uint8> <double>
|
961
|
-
|
962
|
-
|
970
|
+
0 0 0.0
|
971
|
+
1 2 2.2
|
963
972
|
```
|
964
973
|
|
965
974
|
It is the same behavior as;
|
@@ -977,9 +986,9 @@ It is the same behavior as;
|
|
977
986
|
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000069668>
|
978
987
|
index float
|
979
988
|
<uint8> <double>
|
980
|
-
|
981
|
-
|
982
|
-
|
989
|
+
0 0 0.0
|
990
|
+
1 1 1.1
|
991
|
+
2 2 2.2
|
983
992
|
```
|
984
993
|
|
985
994
|
When the option `keep_key: true` used, the column `key` will be preserved.
|
@@ -991,9 +1000,9 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
991
1000
|
#<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000073c44>
|
992
1001
|
index float string
|
993
1002
|
<uint8> <double> <string>
|
994
|
-
|
995
|
-
|
996
|
-
|
1003
|
+
0 0 0.0 A
|
1004
|
+
1 1 1.1 B
|
1005
|
+
2 2 2.2 C
|
997
1006
|
```
|
998
1007
|
|
999
1008
|
## Updating
|
@@ -1016,11 +1025,11 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1016
1025
|
#<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000009b03c>
|
1017
1026
|
index string bool
|
1018
1027
|
<uint8> <string> <boolean>
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1028
|
+
0 0 (nil) false
|
1029
|
+
1 0 B false
|
1030
|
+
2 1 B true
|
1031
|
+
3 1 C (nil)
|
1032
|
+
4 (nil) A true
|
1024
1033
|
```
|
1025
1034
|
|
1026
1035
|
- [ ] Clamp
|
@@ -1037,7 +1046,7 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1037
1046
|
|
1038
1047
|
### `group(group_keys)`
|
1039
1048
|
|
1040
|
-
`group` creates a class `Group
|
1049
|
+
`group` creates a instance of class `Group`. `Group` accepts functions below as a method.
|
1041
1050
|
Method accepts options as `group_keys`.
|
1042
1051
|
|
1043
1052
|
Available functions are:
|
@@ -1064,23 +1073,22 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1064
1073
|
This is an example of grouping of famous STARWARS dataset.
|
1065
1074
|
|
1066
1075
|
```ruby
|
1067
|
-
|
1068
|
-
|
1069
|
-
starwars
|
1076
|
+
uri = URI("https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv")
|
1077
|
+
starwars = RedAmber::DataFrame.load(uri)
|
1070
1078
|
|
1071
1079
|
# =>
|
1072
1080
|
#<RedAmber::DataFrame : 87 x 12 Vectors, 0x0000000000005a50>
|
1073
1081
|
unnamed1 name height mass hair_color skin_color eye_color ... species
|
1074
1082
|
<int64> <string> <int64> <double> <string> <string> <string> ... <string>
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1083
|
+
0 1 Luke Skywalker 172 77.0 blond fair blue ... Human
|
1084
|
+
1 2 C-3PO 167 75.0 NA gold yellow ... Droid
|
1085
|
+
2 3 R2-D2 96 32.0 NA white, blue red ... Droid
|
1086
|
+
3 4 Darth Vader 202 136.0 none white yellow ... Human
|
1087
|
+
4 5 Leia Organa 150 49.0 brown light brown ... Human
|
1080
1088
|
: : : : : : : : ... :
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1089
|
+
84 85 BB8 (nil) (nil) none none black ... Droid
|
1090
|
+
85 86 Captain Phasma (nil) (nil) unknown unknown unknown ... NA
|
1091
|
+
86 87 Padmé Amidala 165 45.0 brown light brown ... Human
|
1084
1092
|
|
1085
1093
|
starwars.tdr(12)
|
1086
1094
|
|
@@ -1088,58 +1096,60 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1088
1096
|
RedAmber::DataFrame : 87 x 12 Vectors
|
1089
1097
|
Vectors : 4 numeric, 8 strings
|
1090
1098
|
# key type level data_preview
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1099
|
+
0 :unnamed1 int64 87 [1, 2, 3, 4, 5, ... ]
|
1100
|
+
1 :name string 87 ["Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Organa", ... ]
|
1101
|
+
2 :height int64 46 [172, 167, 96, 202, 150, ... ], 6 nils
|
1102
|
+
3 :mass double 39 [77.0, 75.0, 32.0, 136.0, 49.0, ... ], 28 nils
|
1103
|
+
4 :hair_color string 13 ["blond", "NA", "NA", "none", "brown", ... ]
|
1104
|
+
5 :skin_color string 31 ["fair", "gold", "white, blue", "white", "light", ... ]
|
1105
|
+
6 :eye_color string 15 ["blue", "yellow", "red", "yellow", "brown", ... ]
|
1106
|
+
7 :birth_year double 37 [19.0, 112.0, 33.0, 41.9, 19.0, ... ], 44 nils
|
1107
|
+
8 :sex string 5 {"male"=>60, "none"=>6, "female"=>16, "hermaphroditic"=>1, "NA"=>4}
|
1108
|
+
9 :gender string 3 {"masculine"=>66, "feminine"=>17, "NA"=>4}
|
1109
|
+
10 :homeworld string 49 ["Tatooine", "Tatooine", "Naboo", "Tatooine", "Alderaan", ... ]
|
1110
|
+
11 :species string 38 ["Human", "Droid", "Droid", "Human", "Human", ... ]
|
1103
1111
|
```
|
1104
1112
|
|
1105
1113
|
We can group by `:species` and calculate the count.
|
1106
1114
|
|
1107
1115
|
```ruby
|
1108
|
-
starwars.
|
1116
|
+
starwars.remove { species == "NA" }
|
1117
|
+
.group(:species).count(:species)
|
1109
1118
|
|
1110
1119
|
# =>
|
1111
|
-
#<RedAmber::DataFrame :
|
1120
|
+
#<RedAmber::DataFrame : 37 x 2 Vectors, 0x000000000000ffa0>
|
1112
1121
|
species count
|
1113
1122
|
<string> <int64>
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1123
|
+
0 Human 35
|
1124
|
+
1 Droid 6
|
1125
|
+
2 Wookiee 2
|
1126
|
+
3 Rodian 1
|
1127
|
+
4 Hutt 1
|
1119
1128
|
: : :
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1129
|
+
34 Kaleesh 1
|
1130
|
+
35 Pau'an 1
|
1131
|
+
36 Kel Dor 1
|
1123
1132
|
```
|
1124
1133
|
|
1125
1134
|
We can also calculate the mean of `:mass` and `:height` together.
|
1126
1135
|
|
1127
1136
|
```ruby
|
1128
|
-
grouped = starwars.
|
1137
|
+
grouped = starwars.remove { species == "NA" }
|
1138
|
+
.group(:species) { [count(:species), mean(:height, :mass)] }
|
1129
1139
|
|
1130
1140
|
# =>
|
1131
|
-
#<RedAmber::DataFrame :
|
1132
|
-
|
1133
|
-
<
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1139
|
-
: :
|
1140
|
-
|
1141
|
-
|
1142
|
-
|
1141
|
+
#<RedAmber::DataFrame : 37 x 4 Vectors, 0x000000000000fff0>
|
1142
|
+
species count mean(height) mean(mass)
|
1143
|
+
<string> <int64> <double> <double>
|
1144
|
+
0 Human 35 176.65 82.78
|
1145
|
+
1 Droid 6 131.2 69.75
|
1146
|
+
2 Wookiee 2 231.0 124.0
|
1147
|
+
3 Rodian 1 173.0 74.0
|
1148
|
+
4 Hutt 1 175.0 1358.0
|
1149
|
+
: : : : :
|
1150
|
+
34 Kaleesh 1 216.0 159.0
|
1151
|
+
35 Pau'an 1 206.0 80.0
|
1152
|
+
36 Kel Dor 1 188.0 80.0
|
1143
1153
|
```
|
1144
1154
|
|
1145
1155
|
Select rows for count > 1.
|
@@ -1148,22 +1158,23 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1148
1158
|
grouped.slice(grouped[:count] > 1)
|
1149
1159
|
|
1150
1160
|
# =>
|
1151
|
-
#<RedAmber::DataFrame :
|
1161
|
+
#<RedAmber::DataFrame : 8 x 4 Vectors, 0x000000000001002c>
|
1152
1162
|
species count mean(height) mean(mass)
|
1153
1163
|
<string> <int64> <double> <double>
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
|
1161
|
-
|
1162
|
-
9 Kaminoan 2 221.0 88.0
|
1164
|
+
0 Human 35 176.65 82.78
|
1165
|
+
1 Droid 6 131.2 69.75
|
1166
|
+
2 Wookiee 2 231.0 124.0
|
1167
|
+
3 Gungan 3 208.67 74.0
|
1168
|
+
4 Zabrak 2 173.0 80.0
|
1169
|
+
5 Twi'lek 2 179.0 55.0
|
1170
|
+
6 Mirialan 2 168.0 53.1
|
1171
|
+
7 Kaminoan 2 221.0 88.0
|
1163
1172
|
```
|
1164
1173
|
|
1165
1174
|
## Reshape
|
1166
1175
|
|
1176
|
+

|
1177
|
+
|
1167
1178
|
### `transpose`
|
1168
1179
|
|
1169
1180
|
Creates transposed DataFrame for the wide (messy) dataframe.
|
@@ -1175,26 +1186,27 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1175
1186
|
#<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
|
1176
1187
|
Year Audi BMW BMW_MINI Mercedes-Benz VW
|
1177
1188
|
<int64> <int64> <int64> <int64> <int64> <int64>
|
1178
|
-
|
1179
|
-
|
1180
|
-
|
1181
|
-
|
1182
|
-
|
1183
|
-
|
1189
|
+
0 2017 28336 52527 25427 68221 49040
|
1190
|
+
1 2018 26473 50982 25984 67554 51961
|
1191
|
+
2 2019 24222 46814 23813 66553 46794
|
1192
|
+
3 2020 22304 35712 20196 57041 36576
|
1193
|
+
4 2021 22535 35905 18211 51722 35215
|
1194
|
+
|
1195
|
+
import_cars.transpose(name: :Manufacturer)
|
1184
1196
|
|
1185
1197
|
# =>
|
1186
|
-
#<RedAmber::DataFrame : 5 x 6 Vectors,
|
1198
|
+
#<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
|
1187
1199
|
Manufacturer 2017 2018 2019 2020 2021
|
1188
|
-
<
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1200
|
+
<string> <uint32> <uint32> <uint32> <uint16> <uint16>
|
1201
|
+
0 Audi 28336 26473 24222 22304 22535
|
1202
|
+
1 BMW 52527 50982 46814 35712 35905
|
1203
|
+
2 BMW_MINI 25427 25984 23813 20196 18211
|
1204
|
+
3 Mercedes-Benz 68221 67554 66553 57041 51722
|
1205
|
+
4 VW 49040 51961 46794 36576 35215
|
1194
1206
|
```
|
1195
1207
|
|
1196
1208
|
The leftmost column is created by original keys. Key name of the column is
|
1197
|
-
named by parameter `:name`. If `:name` is not specified, `:
|
1209
|
+
named by parameter `:name`. If `:name` is not specified, `:NAME` is used for the key.
|
1198
1210
|
|
1199
1211
|
### `to_long(*keep_keys)`
|
1200
1212
|
|
@@ -1206,39 +1218,41 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1206
1218
|
import_cars.to_long(:Year)
|
1207
1219
|
|
1208
1220
|
# =>
|
1209
|
-
#<RedAmber::DataFrame : 25 x 3 Vectors,
|
1210
|
-
Year
|
1211
|
-
<uint16> <
|
1212
|
-
|
1213
|
-
|
1214
|
-
|
1215
|
-
|
1216
|
-
|
1221
|
+
#<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
|
1222
|
+
Year NAME VALUE
|
1223
|
+
<uint16> <string> <uint32>
|
1224
|
+
0 2017 Audi 28336
|
1225
|
+
1 2017 BMW 52527
|
1226
|
+
2 2017 BMW_MINI 25427
|
1227
|
+
3 2017 Mercedes-Benz 68221
|
1228
|
+
4 2017 VW 49040
|
1217
1229
|
: : : :
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1230
|
+
22 2021 BMW_MINI 18211
|
1231
|
+
23 2021 Mercedes-Benz 51722
|
1232
|
+
24 2021 VW 35215
|
1221
1233
|
```
|
1222
1234
|
|
1223
1235
|
- Option `:name` is the key of the column which came **from key names**.
|
1236
|
+
The default value is `:NAME` if it is not specified.
|
1224
1237
|
- Option `:value` is the key of the column which came **from values**.
|
1238
|
+
The default value is `:VALUE` if it is not specified.
|
1225
1239
|
|
1226
1240
|
```ruby
|
1227
1241
|
import_cars.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
|
1228
1242
|
|
1229
1243
|
# =>
|
1230
|
-
#<RedAmber::DataFrame : 25 x 3 Vectors,
|
1244
|
+
#<RedAmber::DataFrame : 25 x 3 Vectors, 0x000000000001359c>
|
1231
1245
|
Year Manufacturer Num_of_imported
|
1232
|
-
<uint16> <
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1246
|
+
<uint16> <string> <uint32>
|
1247
|
+
0 2017 Audi 28336
|
1248
|
+
1 2017 BMW 52527
|
1249
|
+
2 2017 BMW_MINI 25427
|
1250
|
+
3 2017 Mercedes-Benz 68221
|
1251
|
+
4 2017 VW 49040
|
1238
1252
|
: : : :
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1253
|
+
22 2021 BMW_MINI 18211
|
1254
|
+
23 2021 Mercedes-Benz 51722
|
1255
|
+
24 2021 VW 35215
|
1242
1256
|
```
|
1243
1257
|
|
1244
1258
|
### `to_wide`
|
@@ -1246,7 +1260,9 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1246
1260
|
Creates a 'wide' (messy) DataFrame from a 'long' DataFrame.
|
1247
1261
|
|
1248
1262
|
- Option `:name` is the key of the column which will be expanded **to key names**.
|
1263
|
+
The default value is `:NAME` if it is not specified.
|
1249
1264
|
- Option `:value` is the key of the column which will be expanded **to values**.
|
1265
|
+
The default value is `:VALUE` if it is not specified.
|
1250
1266
|
|
1251
1267
|
```ruby
|
1252
1268
|
import_cars.to_long(:Year).to_wide
|
@@ -1257,13 +1273,11 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1257
1273
|
#<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000f0f0>
|
1258
1274
|
Year Audi BMW BMW_MINI Mercedes-Benz VW
|
1259
1275
|
<uint16> <uint16> <uint16> <uint16> <uint32> <uint16>
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1265
|
-
|
1266
|
-
# == import_cars
|
1276
|
+
0 2017 28336 52527 25427 68221 49040
|
1277
|
+
1 2018 26473 50982 25984 67554 51961
|
1278
|
+
2 2019 24222 46814 23813 66553 46794
|
1279
|
+
3 2020 22304 35712 20196 57041 36576
|
1280
|
+
4 2021 22535 35905 18211 51722 35215
|
1267
1281
|
```
|
1268
1282
|
|
1269
1283
|
## Combine
|