red_amber 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +5 -0
  3. data/CHANGELOG.md +125 -0
  4. data/README.md +86 -269
  5. data/doc/DataFrame.md +427 -281
  6. data/doc/Vector.md +35 -54
  7. data/doc/image/basic_verbs.png +0 -0
  8. data/doc/image/dataframe/assign.png +0 -0
  9. data/doc/image/dataframe/assign_operation.png +0 -0
  10. data/doc/image/dataframe/drop.png +0 -0
  11. data/doc/image/dataframe/pick.png +0 -0
  12. data/doc/image/dataframe/pick_operation.png +0 -0
  13. data/doc/image/dataframe/remove.png +0 -0
  14. data/doc/image/dataframe/rename.png +0 -0
  15. data/doc/image/dataframe/rename_operation.png +0 -0
  16. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  17. data/doc/image/dataframe/slice.png +0 -0
  18. data/doc/image/dataframe/slice_operation.png +0 -0
  19. data/doc/image/dataframe_model.png +0 -0
  20. data/doc/image/group_operation.png +0 -0
  21. data/doc/image/replace-if_then.png +0 -0
  22. data/doc/image/reshaping_dataframe.png +0 -0
  23. data/doc/image/screenshot.png +0 -0
  24. data/doc/image/vector/binary_element_wise.png +0 -0
  25. data/doc/image/vector/unary_aggregation.png +0 -0
  26. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  27. data/doc/image/vector/unary_element_wise.png +0 -0
  28. data/lib/red_amber/data_frame.rb +33 -41
  29. data/lib/red_amber/data_frame_displayable.rb +59 -6
  30. data/lib/red_amber/data_frame_loadsave.rb +36 -0
  31. data/lib/red_amber/data_frame_reshaping.rb +12 -10
  32. data/lib/red_amber/data_frame_selectable.rb +53 -9
  33. data/lib/red_amber/data_frame_variable_operation.rb +57 -20
  34. data/lib/red_amber/group.rb +5 -3
  35. data/lib/red_amber/helper.rb +20 -18
  36. data/lib/red_amber/vector.rb +50 -31
  37. data/lib/red_amber/vector_functions.rb +21 -24
  38. data/lib/red_amber/vector_selectable.rb +18 -9
  39. data/lib/red_amber/vector_updatable.rb +6 -3
  40. data/lib/red_amber/version.rb +1 -1
  41. data/lib/red_amber.rb +1 -0
  42. metadata +13 -3
  43. data/doc/examples_of_red_amber.ipynb +0 -6783
data/doc/DataFrame.md CHANGED
@@ -14,30 +14,38 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
14
14
  ### `new` from a Hash
15
15
 
16
16
  ```ruby
17
- RedAmber::DataFrame.new(x: [1, 2, 3])
17
+ df = RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C])
18
18
  ```
19
19
 
20
20
  ### `new` from a schema (by Hash) and data (by Array)
21
21
 
22
22
  ```ruby
23
- RedAmber::DataFrame.new({:x=>:uint8}, [[1], [2], [3]])
23
+ RedAmber::DataFrame.new({x: :uint8, y: :string}, [[1, "A"], [2, "B"], [3, "C"]])
24
24
  ```
25
25
 
26
26
  ### `new` from an Arrow::Table
27
27
 
28
28
 
29
29
  ```ruby
30
- table = Arrow::Table.new(x: [1, 2, 3])
30
+ table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])
31
31
  RedAmber::DataFrame.new(table)
32
32
  ```
33
33
 
34
+ ### `new` from an Object which responds to `to_arrow`
35
+
36
+ ```ruby
37
+ require "datasets-arrow"
38
+ dataset = Datasets::Penguins.new
39
+ RedAmber::DataFrame.new(dataset)
40
+ ```
41
+
34
42
  ### `new` from a Rover::DataFrame
35
43
 
36
44
 
37
45
  ```ruby
38
46
  require 'rover'
39
47
 
40
- rover = Rover::DataFrame.new(x: [1, 2, 3])
48
+ rover = Rover::DataFrame.new(x: [1, 2, 3], y: %w[A B C])
41
49
  RedAmber::DataFrame.new(rover)
42
50
  ```
43
51
 
@@ -63,7 +71,7 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
63
71
  ```ruby
64
72
  require 'parquet'
65
73
 
66
- dataframe = RedAmber::DataFrame.load("file.parquet")
74
+ df = RedAmber::DataFrame.load("file.parquet")
67
75
  ```
68
76
 
69
77
  ### `save` (instance method)
@@ -79,7 +87,7 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
79
87
  ```ruby
80
88
  require 'parquet'
81
89
 
82
- dataframe.save("file.parquet")
90
+ df.save("file.parquet")
83
91
  ```
84
92
 
85
93
  ## Properties
@@ -155,7 +163,25 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
155
163
 
156
164
  ### `indices`, `indexes`
157
165
 
158
- - Returns all indexes in an Array.
166
+ - Returns indexes in an Array.
167
+ Accepts an option `start` as the first of indexes.
168
+
169
+ ```ruby
170
+ df = RedAmber::DataFrame.new(x: [1, 2, 3, 4, 5])
171
+ df.indices
172
+
173
+ # =>
174
+ [0, 1, 2, 3, 4]
175
+
176
+ df.indices(1)
177
+
178
+ # =>
179
+ [1, 2, 3, 4, 5]
180
+
181
+ df.indices(:a)
182
+ # =>
183
+ [:a, :b, :c, :d, :e]
184
+ ```
159
185
 
160
186
  ### `to_h`
161
187
 
@@ -192,15 +218,15 @@ puts penguins.to_s
192
218
  # =>
193
219
  species island bill_length_mm bill_depth_mm flipper_length_mm ... year
194
220
  <string> <string> <double> <double> <uint8> ... <uint16>
195
- 1 Adelie Torgersen 39.1 18.7 181 ... 2007
196
- 2 Adelie Torgersen 39.5 17.4 186 ... 2007
197
- 3 Adelie Torgersen 40.3 18.0 195 ... 2007
198
- 4 Adelie Torgersen (nil) (nil) (nil) ... 2007
199
- 5 Adelie Torgersen 36.7 19.3 193 ... 2007
221
+ 0 Adelie Torgersen 39.1 18.7 181 ... 2007
222
+ 1 Adelie Torgersen 39.5 17.4 186 ... 2007
223
+ 2 Adelie Torgersen 40.3 18.0 195 ... 2007
224
+ 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
225
+ 4 Adelie Torgersen 36.7 19.3 193 ... 2007
200
226
  : : : : : : ... :
201
- 342 Gentoo Biscoe 50.4 15.7 222 ... 2009
202
- 343 Gentoo Biscoe 45.2 14.8 212 ... 2009
203
- 344 Gentoo Biscoe 49.9 16.1 213 ... 2009
227
+ 341 Gentoo Biscoe 50.4 15.7 222 ... 2009
228
+ 342 Gentoo Biscoe 45.2 14.8 212 ... 2009
229
+ 343 Gentoo Biscoe 49.9 16.1 213 ... 2009
204
230
  ```
205
231
  ### `inspect`
206
232
 
@@ -217,11 +243,11 @@ puts penguins.summary.to_s(width: 82) # needs more width to show all stats in th
217
243
  # =>
218
244
  variables count mean std min 25% median 75% max
219
245
  <dictionary> <uint16> <double> <double> <double> <double> <double> <double> <double>
220
- 1 bill_length_mm 342 43.92 5.46 32.1 39.23 44.38 48.5 59.6
221
- 2 bill_depth_mm 342 17.15 1.97 13.1 15.6 17.32 18.7 21.5
222
- 3 flipper_length_mm 342 200.92 14.06 172.0 190.0 197.0 213.0 231.0
223
- 4 body_mass_g 342 4201.75 801.95 2700.0 3550.0 4031.5 4750.0 6300.0
224
- 5 year 344 2008.03 0.82 2007.0 2007.0 2008.0 2009.0 2009.0
246
+ 0 bill_length_mm 342 43.92 5.46 32.1 39.23 44.38 48.5 59.6
247
+ 1 bill_depth_mm 342 17.15 1.97 13.1 15.6 17.32 18.7 21.5
248
+ 2 flipper_length_mm 342 200.92 14.06 172.0 190.0 197.0 213.0 231.0
249
+ 3 body_mass_g 342 4201.75 801.95 2700.0 3550.0 4031.5 4750.0 6300.0
250
+ 4 year 344 2008.03 0.82 2007.0 2007.0 2008.0 2009.0 2009.0
225
251
  ```
226
252
 
227
253
  ### `to_rover`
@@ -247,21 +273,22 @@ penguins.to_rover
247
273
  require 'red_amber'
248
274
  require 'datasets-arrow'
249
275
 
250
- penguins = Datasets::Penguins.new.to_arrow
251
- RedAmber::DataFrame.new(penguins).tdr
276
+ dataset = Datasets::Penguins.new
277
+ # (From 0.2.2) responsible to the object which has `to_arrow` method.
278
+ RedAmber::DataFrame.new(dataset).tdr
252
279
 
253
280
  # =>
254
281
  RedAmber::DataFrame : 344 x 8 Vectors
255
282
  Vectors : 5 numeric, 3 strings
256
283
  # key type level data_preview
257
- 1 :species string 3 {"Adelie"=>152, "Chinstrap"=>68, "Gentoo"=>124}
258
- 2 :island string 3 {"Torgersen"=>52, "Biscoe"=>168, "Dream"=>124}
259
- 3 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils
260
- 4 :bill_depth_mm double 81 [18.7, 17.4, 18.0, nil, 19.3, ... ], 2 nils
261
- 5 :flipper_length_mm uint8 56 [181, 186, 195, nil, 193, ... ], 2 nils
262
- 6 :body_mass_g uint16 95 [3750, 3800, 3250, nil, 3450, ... ], 2 nils
263
- 7 :sex string 3 {"male"=>168, "female"=>165, nil=>11}
264
- 8 :year uint16 3 {2007=>110, 2008=>114, 2009=>120}
284
+ 0 :species string 3 {"Adelie"=>152, "Chinstrap"=>68, "Gentoo"=>124}
285
+ 1 :island string 3 {"Torgersen"=>52, "Biscoe"=>168, "Dream"=>124}
286
+ 2 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils
287
+ 3 :bill_depth_mm double 81 [18.7, 17.4, 18.0, nil, 19.3, ... ], 2 nils
288
+ 4 :flipper_length_mm uint8 56 [181, 186, 195, nil, 193, ... ], 2 nils
289
+ 5 :body_mass_g uint16 95 [3750, 3800, 3250, nil, 3450, ... ], 2 nils
290
+ 6 :sex string 3 {"male"=>168, "female"=>165, nil=>11}
291
+ 7 :year uint16 3 {2007=>110, 2008=>114, 2009=>120}
265
292
  ```
266
293
 
267
294
  - limit: limit of variables to show. Default value is 10.
@@ -293,9 +320,9 @@ penguins.to_rover
293
320
  #<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000328fc>
294
321
  b c a
295
322
  <string> <double> <uint8>
296
- 1 A 1.0 1
297
- 2 B 2.0 2
298
- 3 C 3.0 3
323
+ 0 A 1.0 1
324
+ 1 B 2.0 2
325
+ 2 C 3.0 3
299
326
  ```
300
327
 
301
328
  If `#[]` represents single variable (column), it returns a Vector object.
@@ -341,10 +368,10 @@ penguins.to_rover
341
368
  #<RedAmber::DataFrame : 4 x 3 Vectors, 0x0000000000033270>
342
369
  a b c
343
370
  <uint8> <string> <double>
344
- 1 3 C 3.0
345
- 2 1 A 1.0
346
- 3 2 B 2.0
347
- 4 3 C 3.0
371
+ 0 3 C 3.0
372
+ 1 1 A 1.0
373
+ 2 2 B 2.0
374
+ 3 3 C 3.0
348
375
  ```
349
376
 
350
377
  - Select obs. by a boolean Array or a boolean RedAmber::Vector at same size as self.
@@ -372,13 +399,13 @@ penguins.to_rover
372
399
 
373
400
  ### `pick ` - pick up variables by key label -
374
401
 
375
- Pick up some variables (columns) to create a sub DataFrame.
402
+ Pick up some columns (variables) to create a sub DataFrame.
376
403
 
377
404
  ![pick method image](doc/../image/dataframe/pick.png)
378
405
 
379
406
  - Keys as arguments
380
407
 
381
- `pick(keys)` accepts keys as arguments in an Array.
408
+ `pick(keys)` accepts keys as arguments in an Array or a Range.
382
409
 
383
410
  ```ruby
384
411
  penguins.pick(:species, :bill_length_mm)
@@ -387,42 +414,64 @@ penguins.to_rover
387
414
  #<RedAmber::DataFrame : 344 x 2 Vectors, 0x0000000000035ebc>
388
415
  species bill_length_mm
389
416
  <string> <double>
390
- 1 Adelie 39.1
391
- 2 Adelie 39.5
392
- 3 Adelie 40.3
393
- 4 Adelie (nil)
394
- 5 Adelie 36.7
417
+ 0 Adelie 39.1
418
+ 1 Adelie 39.5
419
+ 2 Adelie 40.3
420
+ 3 Adelie (nil)
421
+ 4 Adelie 36.7
395
422
  : : :
396
- 342 Gentoo 50.4
397
- 343 Gentoo 45.2
398
- 344 Gentoo 49.9
423
+ 341 Gentoo 50.4
424
+ 342 Gentoo 45.2
425
+ 343 Gentoo 49.9
426
+ ```
427
+
428
+ - Indices as arguments
429
+
430
+ `pick(indices)` accepts indices as arguments. Indices should be Integers, Floats or Ranges of Integers.
431
+
432
+ ```ruby
433
+ penguins.pick(0..2, -1)
434
+
435
+ # =>
436
+ #<RedAmber::DataFrame : 344 x 4 Vectors, 0x0000000000055ce4>
437
+ species island bill_length_mm year
438
+ <string> <string> <double> <uint16>
439
+ 0 Adelie Torgersen 39.1 2007
440
+ 1 Adelie Torgersen 39.5 2007
441
+ 2 Adelie Torgersen 40.3 2007
442
+ 3 Adelie Torgersen (nil) 2007
443
+ 4 Adelie Torgersen 36.7 2007
444
+ : : : : :
445
+ 341 Gentoo Biscoe 50.4 2009
446
+ 342 Gentoo Biscoe 45.2 2009
447
+ 343 Gentoo Biscoe 49.9 2009
399
448
  ```
400
449
 
401
- - Booleans as a argument
450
+ - Booleans as arguments
402
451
 
403
- `pick(booleans)` accepts booleans as a argument in an Array. Booleans must be same length as `n_keys`.
452
+ `pick(booleans)` accepts booleans as arguments in an Array. Booleans must be same length as `n_keys`.
404
453
 
405
454
  ```ruby
406
- penguins.pick(penguins.types.map { |type| type == :string })
455
+ penguins.pick(penguins.vectors.map(&:string?))
407
456
 
408
457
  # =>
409
458
  #<RedAmber::DataFrame : 344 x 3 Vectors, 0x00000000000387ac>
410
459
  species island sex
411
460
  <string> <string> <string>
412
- 1 Adelie Torgersen male
461
+ 0 Adelie Torgersen male
462
+ 1 Adelie Torgersen female
413
463
  2 Adelie Torgersen female
414
- 3 Adelie Torgersen female
415
- 4 Adelie Torgersen (nil)
416
- 5 Adelie Torgersen female
464
+ 3 Adelie Torgersen (nil)
465
+ 4 Adelie Torgersen female
417
466
  : : : :
418
- 342 Gentoo Biscoe male
419
- 343 Gentoo Biscoe female
420
- 344 Gentoo Biscoe male
467
+ 341 Gentoo Biscoe male
468
+ 342 Gentoo Biscoe female
469
+ 343 Gentoo Biscoe male
421
470
  ```
422
471
 
423
- - Keys or booleans by a block
472
+ - Keys or booleans by a block
424
473
 
425
- `pick {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return keys, or a boolean Array with a same length as `n_keys`. Block is called in the context of self.
474
+ `pick {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return keys, indices or a boolean Array with a same length as `n_keys`. Block is called in the context of self.
426
475
 
427
476
  ```ruby
428
477
  penguins.pick { keys.map { |key| key.end_with?('mm') } }
@@ -431,34 +480,38 @@ penguins.to_rover
431
480
  #<RedAmber::DataFrame : 344 x 3 Vectors, 0x000000000003dd4c>
432
481
  bill_length_mm bill_depth_mm flipper_length_mm
433
482
  <double> <double> <uint8>
434
- 1 39.1 18.7 181
435
- 2 39.5 17.4 186
436
- 3 40.3 18.0 195
437
- 4 (nil) (nil) (nil)
438
- 5 36.7 19.3 193
483
+ 0 39.1 18.7 181
484
+ 1 39.5 17.4 186
485
+ 2 40.3 18.0 195
486
+ 3 (nil) (nil) (nil)
487
+ 4 36.7 19.3 193
439
488
  : : : :
440
- 342 50.4 15.7 222
441
- 343 45.2 14.8 212
442
- 344 49.9 16.1 213
489
+ 341 50.4 15.7 222
490
+ 342 45.2 14.8 212
491
+ 343 49.9 16.1 213
443
492
  ```
444
493
 
445
494
  ### `drop ` - pick and drop -
446
495
 
447
- Drop some variables (columns) to create a remainer DataFrame.
496
+ Drop some columns (variables) to create a remainer DataFrame.
448
497
 
449
498
  ![drop method image](doc/../image/dataframe/drop.png)
450
499
 
451
500
  - Keys as arguments
452
501
 
453
- `drop(keys)` accepts keys as arguments in an Array.
502
+ `drop(keys)` accepts keys as arguments in an Array or a Range.
503
+
504
+ - Indices as arguments
505
+
506
+ `drop(indices)` accepts indices as a arguments. Indices should be Integers, Floats or Ranges of Integers.
454
507
 
455
- - Booleans as a argument
508
+ - Booleans as arguments
456
509
 
457
- `drop(booleans)` accepts booleans as a argument in an Array. Booleans must be same length as `n_keys`.
510
+ `drop(booleans)` accepts booleans as an argument in an Array. Booleans must be same length as `n_keys`.
458
511
 
459
512
  - Keys or booleans by a block
460
513
 
461
- `drop {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return keys, or a boolean Array with a same length as `n_keys`. Block is called in the context of self.
514
+ `drop {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return keys, indices or a boolean Array with a same length as `n_keys`. Block is called in the context of self.
462
515
 
463
516
  - Notice for nil
464
517
 
@@ -482,9 +535,9 @@ penguins.to_rover
482
535
  #<RedAmber::DataFrame : 3 x 1 Vector, 0x000000000003f4bc>
483
536
  a
484
537
  <uint8>
485
- 1 1
486
- 2 2
487
- 3 3
538
+ 0 1
539
+ 1 2
540
+ 2 3
488
541
 
489
542
  df[:a]
490
543
 
@@ -493,9 +546,20 @@ penguins.to_rover
493
546
  [1, 2, 3]
494
547
  ```
495
548
 
549
+ A simple key name is usable as a method of the DataFrame if the key name is acceptable as a method name.
550
+ It returns a Vector same as `[]`.
551
+
552
+ ```ruby
553
+ df.a
554
+
555
+ # =>
556
+ #<RedAmber::Vector(:uint8, size=3):0x000000000000f258>
557
+ [1, 2, 3]
558
+ ```
559
+
496
560
  ### `slice ` - to cut vertically is slice -
497
561
 
498
- Slice and select observations (rows) to create a sub DataFrame.
562
+ Slice and select rows (observations) to create a sub DataFrame.
499
563
 
500
564
  ![slice method image](doc/../image/dataframe/slice.png)
501
565
 
@@ -511,22 +575,22 @@ penguins.to_rover
511
575
 
512
576
  # =>
513
577
  #<RedAmber::DataFrame : 10 x 8 Vectors, 0x0000000000042be4>
514
- species island bill_length_mm bill_depth_mm flipper_length_mm ... year
515
- <string> <string> <double> <double> <uint8> ... <uint16>
516
- 1 Adelie Torgersen 39.1 18.7 181 ... 2007
517
- 2 Adelie Torgersen 39.5 17.4 186 ... 2007
518
- 3 Adelie Torgersen 40.3 18.0 195 ... 2007
519
- 4 Adelie Torgersen (nil) (nil) (nil) ... 2007
520
- 5 Adelie Torgersen 36.7 19.3 193 ... 2007
521
- : : : : : : ... :
522
- 8 Gentoo Biscoe 50.4 15.7 222 ... 2009
523
- 9 Gentoo Biscoe 45.2 14.8 212 ... 2009
524
- 10 Gentoo Biscoe 49.9 16.1 213 ... 2009
578
+ species island bill_length_mm bill_depth_mm flipper_length_mm ... year
579
+ <string> <string> <double> <double> <uint8> ... <uint16>
580
+ 0 Adelie Torgersen 39.1 18.7 181 ... 2007
581
+ 1 Adelie Torgersen 39.5 17.4 186 ... 2007
582
+ 2 Adelie Torgersen 40.3 18.0 195 ... 2007
583
+ 3 Adelie Torgersen (nil) (nil) (nil) ... 2007
584
+ 4 Adelie Torgersen 36.7 19.3 193 ... 2007
585
+ : : : : : : ... :
586
+ 7 Gentoo Biscoe 50.4 15.7 222 ... 2009
587
+ 8 Gentoo Biscoe 45.2 14.8 212 ... 2009
588
+ 9 Gentoo Biscoe 49.9 16.1 213 ... 2009
525
589
  ```
526
590
 
527
591
  - Booleans as an argument
528
592
 
529
- `slice(booleans)` accepts booleans as a argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `size`.
593
+ `slice(booleans)` accepts booleans as an argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `size`.
530
594
 
531
595
  ```ruby
532
596
  vector = penguins[:bill_length_mm]
@@ -536,15 +600,15 @@ penguins.to_rover
536
600
  #<RedAmber::DataFrame : 242 x 8 Vectors, 0x0000000000043d3c>
537
601
  species island bill_length_mm bill_depth_mm flipper_length_mm ... year
538
602
  <string> <string> <double> <double> <uint8> ... <uint16>
539
- 1 Adelie Torgersen 40.3 18.0 195 ... 2007
540
- 2 Adelie Torgersen 42.0 20.2 190 ... 2007
541
- 3 Adelie Torgersen 41.1 17.6 182 ... 2007
542
- 4 Adelie Torgersen 42.5 20.7 197 ... 2007
543
- 5 Adelie Torgersen 46.0 21.5 194 ... 2007
603
+ 0 Adelie Torgersen 40.3 18.0 195 ... 2007
604
+ 1 Adelie Torgersen 42.0 20.2 190 ... 2007
605
+ 2 Adelie Torgersen 41.1 17.6 182 ... 2007
606
+ 3 Adelie Torgersen 42.5 20.7 197 ... 2007
607
+ 4 Adelie Torgersen 46.0 21.5 194 ... 2007
544
608
  : : : : : : ... :
545
- 240 Gentoo Biscoe 50.4 15.7 222 ... 2009
546
- 241 Gentoo Biscoe 45.2 14.8 212 ... 2009
547
- 242 Gentoo Biscoe 49.9 16.1 213 ... 2009
609
+ 239 Gentoo Biscoe 50.4 15.7 222 ... 2009
610
+ 240 Gentoo Biscoe 45.2 14.8 212 ... 2009
611
+ 241 Gentoo Biscoe 49.9 16.1 213 ... 2009
548
612
  ```
549
613
 
550
614
  - Indices or booleans by a block
@@ -564,15 +628,15 @@ penguins.to_rover
564
628
  #<RedAmber::DataFrame : 204 x 8 Vectors, 0x0000000000047a40>
565
629
  species island bill_length_mm bill_depth_mm flipper_length_mm ... year
566
630
  <string> <string> <double> <double> <uint8> ... <uint16>
567
- 1 Adelie Torgersen 39.1 18.7 181 ... 2007
568
- 2 Adelie Torgersen 39.5 17.4 186 ... 2007
569
- 3 Adelie Torgersen 40.3 18.0 195 ... 2007
570
- 4 Adelie Torgersen 39.3 20.6 190 ... 2007
571
- 5 Adelie Torgersen 38.9 17.8 181 ... 2007
631
+ 0 Adelie Torgersen 39.1 18.7 181 ... 2007
632
+ 1 Adelie Torgersen 39.5 17.4 186 ... 2007
633
+ 2 Adelie Torgersen 40.3 18.0 195 ... 2007
634
+ 3 Adelie Torgersen 39.3 20.6 190 ... 2007
635
+ 4 Adelie Torgersen 38.9 17.8 181 ... 2007
572
636
  : : : : : : ... :
573
- 202 Gentoo Biscoe 47.2 13.7 214 ... 2009
574
- 203 Gentoo Biscoe 46.8 14.3 215 ... 2009
575
- 204 Gentoo Biscoe 45.2 14.8 212 ... 2009
637
+ 201 Gentoo Biscoe 47.2 13.7 214 ... 2009
638
+ 202 Gentoo Biscoe 46.8 14.3 215 ... 2009
639
+ 203 Gentoo Biscoe 45.2 14.8 212 ... 2009
576
640
  ```
577
641
 
578
642
  - Notice: nil option
@@ -603,7 +667,7 @@ penguins.to_rover
603
667
 
604
668
  ### `remove`
605
669
 
606
- Slice and reject observations (rows) to create a remainer DataFrame.
670
+ Slice and reject rows (observations) to create a remainer DataFrame.
607
671
 
608
672
  ![remove method image](doc/../image/dataframe/remove.png)
609
673
 
@@ -619,20 +683,20 @@ penguins.to_rover
619
683
  #<RedAmber::DataFrame : 334 x 8 Vectors, 0x00000000000487c4>
620
684
  species island bill_length_mm bill_depth_mm flipper_length_mm ... year
621
685
  <string> <string> <double> <double> <uint8> ... <uint16>
622
- 1 Adelie Torgersen 39.3 20.6 190 ... 2007
623
- 2 Adelie Torgersen 38.9 17.8 181 ... 2007
624
- 3 Adelie Torgersen 39.2 19.6 195 ... 2007
625
- 4 Adelie Torgersen 34.1 18.1 193 ... 2007
626
- 5 Adelie Torgersen 42.0 20.2 190 ... 2007
686
+ 0 Adelie Torgersen 39.3 20.6 190 ... 2007
687
+ 1 Adelie Torgersen 38.9 17.8 181 ... 2007
688
+ 2 Adelie Torgersen 39.2 19.6 195 ... 2007
689
+ 3 Adelie Torgersen 34.1 18.1 193 ... 2007
690
+ 4 Adelie Torgersen 42.0 20.2 190 ... 2007
627
691
  : : : : : : ... :
628
- 332 Gentoo Biscoe 44.5 15.7 217 ... 2009
629
- 333 Gentoo Biscoe 48.8 16.2 222 ... 2009
630
- 334 Gentoo Biscoe 47.2 13.7 214 ... 2009
692
+ 331 Gentoo Biscoe 44.5 15.7 217 ... 2009
693
+ 332 Gentoo Biscoe 48.8 16.2 222 ... 2009
694
+ 333 Gentoo Biscoe 47.2 13.7 214 ... 2009
631
695
  ```
632
696
 
633
697
  - Booleans as an argument
634
698
 
635
- `remove(booleans)` accepts booleans as a argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `size`.
699
+ `remove(booleans)` accepts booleans as an argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `size`.
636
700
 
637
701
  ```ruby
638
702
  # remove all observation contains nil
@@ -643,15 +707,15 @@ penguins.to_rover
643
707
  #<RedAmber::DataFrame : 333 x 8 Vectors, 0x0000000000049fac>
644
708
  species island bill_length_mm bill_depth_mm flipper_length_mm ... year
645
709
  <string> <string> <double> <double> <uint8> ... <uint16>
646
- 1 Adelie Torgersen 39.1 18.7 181 ... 2007
647
- 2 Adelie Torgersen 39.5 17.4 186 ... 2007
648
- 3 Adelie Torgersen 40.3 18.0 195 ... 2007
649
- 4 Adelie Torgersen 36.7 19.3 193 ... 2007
650
- 5 Adelie Torgersen 39.3 20.6 190 ... 2007
710
+ 0 Adelie Torgersen 39.1 18.7 181 ... 2007
711
+ 1 Adelie Torgersen 39.5 17.4 186 ... 2007
712
+ 2 Adelie Torgersen 40.3 18.0 195 ... 2007
713
+ 3 Adelie Torgersen 36.7 19.3 193 ... 2007
714
+ 4 Adelie Torgersen 39.3 20.6 190 ... 2007
651
715
  : : : : : : ... :
652
- 331 Gentoo Biscoe 50.4 15.7 222 ... 2009
653
- 332 Gentoo Biscoe 45.2 14.8 212 ... 2009
654
- 333 Gentoo Biscoe 49.9 16.1 213 ... 2009
716
+ 330 Gentoo Biscoe 50.4 15.7 222 ... 2009
717
+ 331 Gentoo Biscoe 45.2 14.8 212 ... 2009
718
+ 332 Gentoo Biscoe 49.9 16.1 213 ... 2009
655
719
  ```
656
720
 
657
721
  - Indices or booleans by a block
@@ -660,26 +724,29 @@ penguins.to_rover
660
724
 
661
725
  ```ruby
662
726
  penguins.remove do
663
- vector = self[:bill_length_mm]
664
- min = vector.mean - vector.std
665
- max = vector.mean + vector.std
666
- vector.to_a.map { |e| (min..max).include? e }
727
+ # We will use another style shown in slice
728
+ # self.bill_length_mm returns Vector
729
+ mean = bill_length_mm.mean
730
+ min = mean - bill_length_mm.std
731
+ max = mean + bill_length_mm.std
732
+ bill_length_mm.to_a.map { |e| (min..max).include? e }
667
733
  end
668
734
 
669
735
  # =>
670
736
  #<RedAmber::DataFrame : 140 x 8 Vectors, 0x000000000004de40>
671
737
  species island bill_length_mm bill_depth_mm flipper_length_mm ... year
672
738
  <string> <string> <double> <double> <uint8> ... <uint16>
673
- 1 Adelie Torgersen (nil) (nil) (nil) ... 2007
674
- 2 Adelie Torgersen 36.7 19.3 193 ... 2007
675
- 3 Adelie Torgersen 34.1 18.1 193 ... 2007
676
- 4 Adelie Torgersen 37.8 17.1 186 ... 2007
677
- 5 Adelie Torgersen 37.8 17.3 180 ... 2007
739
+ 0 Adelie Torgersen (nil) (nil) (nil) ... 2007
740
+ 1 Adelie Torgersen 36.7 19.3 193 ... 2007
741
+ 2 Adelie Torgersen 34.1 18.1 193 ... 2007
742
+ 3 Adelie Torgersen 37.8 17.1 186 ... 2007
743
+ 4 Adelie Torgersen 37.8 17.3 180 ... 2007
678
744
  : : : : : : ... :
679
- 138 Gentoo Biscoe (nil) (nil) (nil) ... 2009
680
- 139 Gentoo Biscoe 50.4 15.7 222 ... 2009
681
- 140 Gentoo Biscoe 49.9 16.1 213 ... 2009
745
+ 137 Gentoo Biscoe (nil) (nil) (nil) ... 2009
746
+ 138 Gentoo Biscoe 50.4 15.7 222 ... 2009
747
+ 139 Gentoo Biscoe 49.9 16.1 213 ... 2009
682
748
  ```
749
+
683
750
  - Notice for nil
684
751
  - When `remove` used with booleans, nil in booleans is treated as false. This behavior is aligned with Ruby's `nil#!`.
685
752
 
@@ -712,8 +779,8 @@ penguins.to_rover
712
779
  #<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000005df98>
713
780
  a b c
714
781
  <uint8> <string> <double>
715
- 1 1 A 1.0
716
- 2 (nil) C 3.0
782
+ 0 1 A 1.0
783
+ 1 (nil) C 3.0
717
784
  ```
718
785
 
719
786
  ### `rename`
@@ -734,9 +801,9 @@ penguins.to_rover
734
801
  #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000060838>
735
802
  name age_in_1993
736
803
  <string> <uint8>
737
- 1 Yasuko 68
738
- 2 Rui 49
739
- 3 Hinata 28
804
+ 0 Yasuko 68
805
+ 1 Rui 49
806
+ 2 Hinata 28
740
807
  ```
741
808
 
742
809
  - Key pairs by a block
@@ -772,23 +839,27 @@ penguins.to_rover
772
839
 
773
840
  # =>
774
841
  #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000062804>
775
- name age
776
- <string> <uint8>
777
- 1 Yasuko 68
778
- 2 Rui 49
779
- 3 Hinata 28
842
+ name age
843
+ <string> <uint8>
844
+ 0 Yasuko 68
845
+ 1 Rui 49
846
+ 2 Hinata 28
780
847
 
781
848
  # update :age and add :brother
782
- assigner = { age: [97, 78, 57], brother: ['Santa', nil, 'Momotaro'] }
783
- df.assign(assigner)
849
+ df.assign do
850
+ {
851
+ age: age + 29,
852
+ brother: ['Santa', nil, 'Momotaro']
853
+ }
854
+ end
784
855
 
785
856
  # =>
786
857
  #<RedAmber::DataFrame : 3 x 3 Vectors, 0x00000000000658b0>
787
858
  name age brother
788
859
  <string> <uint8> <string>
789
- 1 Yasuko 97 Santa
790
- 2 Rui 78 (nil)
791
- 3 Hinata 57 Momotaro
860
+ 0 Yasuko 97 Santa
861
+ 1 Rui 78 (nil)
862
+ 2 Hinata 57 Momotaro
792
863
  ```
793
864
 
794
865
  - Key pairs by a block
@@ -799,18 +870,19 @@ penguins.to_rover
799
870
  df = RedAmber::DataFrame.new(
800
871
  index: [0, 1, 2, 3, nil],
801
872
  float: [0.0, 1.1, 2.2, Float::NAN, nil],
802
- string: ['A', 'B', 'C', 'D', nil])
873
+ string: ['A', 'B', 'C', 'D', nil]
874
+ )
803
875
  df
804
876
 
805
877
  # =>
806
878
  #<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000069e60>
807
879
  index float string
808
880
  <uint8> <double> <string>
809
- 1 0 0.0 A
810
- 2 1 1.1 B
811
- 3 2 2.2 C
812
- 4 3 NaN D
813
- 5 (nil) (nil) (nil)
881
+ 0 0 0.0 A
882
+ 1 1 1.1 B
883
+ 2 2 2.2 C
884
+ 3 3 NaN D
885
+ 4 (nil) (nil) (nil)
814
886
 
815
887
  # update :float
816
888
  # assigner by an Array
@@ -821,13 +893,13 @@ penguins.to_rover
821
893
 
822
894
  # =>
823
895
  #<RedAmber::DataFrame : 5 x 3 Vectors, 0x00000000000dfffc>
824
- index float string
825
- <uint8> <double> <string>
826
- 1 0 -0.0 A
827
- 2 1 -1.1 B
828
- 3 2 -2.2 C
829
- 4 3 NaN D
830
- 5 (nil) (nil) (nil)
896
+ index float string
897
+ <uint8> <double> <string>
898
+ 0 0 -0.0 A
899
+ 1 1 -1.1 B
900
+ 2 2 -2.2 C
901
+ 3 3 NaN D
902
+ 4 (nil) (nil) (nil)
831
903
 
832
904
  # Or we can use assigner by a Hash
833
905
  df.assign do
@@ -852,17 +924,85 @@ penguins.to_rover
852
924
  `assign_left` method accepts the same parameters and block as `assign`, but append new columns from leftside.
853
925
 
854
926
  ```ruby
855
- df.assign_left(new_index: [1, 2, 3, 4, 5])
927
+ df.assign_left(new_index: df.indices(1))
856
928
 
857
929
  # =>
858
930
  #<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000001787c>
859
931
  new_index index float string
860
932
  <uint8> <uint8> <double> <string>
861
- 1 1 0 0.0 A
862
- 2 2 1 1.1 B
863
- 3 3 2 2.2 C
864
- 4 4 3 NaN D
865
- 5 5 (nil) (nil) (nil)
933
+ 0 1 0 0.0 A
934
+ 1 2 1 1.1 B
935
+ 2 3 2 2.2 C
936
+ 3 4 3 NaN D
937
+ 4 5 (nil) (nil) (nil)
938
+ ```
939
+
940
+ ### `slice_by(key, keep_key: false) { block }`
941
+
942
+ `slice_by` accepts a key and a block to select rows.
943
+
944
+ (Since 0.2.1)
945
+
946
+ ```ruby
947
+ df = RedAmber::DataFrame.new(
948
+ index: [0, 1, 2, 3, nil],
949
+ float: [0.0, 1.1, 2.2, Float::NAN, nil],
950
+ string: ['A', 'B', 'C', 'D', nil]
951
+ )
952
+ df
953
+
954
+ # =>
955
+ #<RedAmber::DataFrame : 5 x 3 Vectors, 0x0000000000069e60>
956
+ index float string
957
+ <uint8> <double> <string>
958
+ 0 0 0.0 A
959
+ 1 1 1.1 B
960
+ 2 2 2.2 C
961
+ 3 3 NaN D
962
+ 4 (nil) (nil) (nil)
963
+
964
+ df.slice_by(:string) { ["A", "C"] }
965
+
966
+ # =>
967
+ #<RedAmber::DataFrame : 2 x 2 Vectors, 0x000000000001b1ac>
968
+ index float
969
+ <uint8> <double>
970
+ 0 0 0.0
971
+ 1 2 2.2
972
+ ```
973
+
974
+ It is the same behavior as;
975
+
976
+ ```ruby
977
+ df.slice { [string.index("A"), string.index("C")] }.drop(:string)
978
+ ```
979
+
980
+ `slice_by` also accepts a Range.
981
+
982
+ ```ruby
983
+ df.slice_by(:string) { "A".."C" }
984
+
985
+ # =>
986
+ #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000069668>
987
+ index float
988
+ <uint8> <double>
989
+ 0 0 0.0
990
+ 1 1 1.1
991
+ 2 2 2.2
992
+ ```
993
+
994
+ When the option `keep_key: true` used, the column `key` will be preserved.
995
+
996
+ ```ruby
997
+ df.slice_by(:string, keep_key: true) { "A".."C" }
998
+
999
+ # =>
1000
+ #<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000073c44>
1001
+ index float string
1002
+ <uint8> <double> <string>
1003
+ 0 0 0.0 A
1004
+ 1 1 1.1 B
1005
+ 2 2 2.2 C
866
1006
  ```
867
1007
 
868
1008
  ## Updating
@@ -874,22 +1014,22 @@ penguins.to_rover
874
1014
  - "-key" denotes descending order
875
1015
 
876
1016
  ```ruby
877
- df = RedAmber::DataFrame.new({
1017
+ df = RedAmber::DataFrame.new(
878
1018
  index: [1, 1, 0, nil, 0],
879
1019
  string: ['C', 'B', nil, 'A', 'B'],
880
1020
  bool: [nil, true, false, true, false],
881
- })
1021
+ )
882
1022
  df.sort(:index, '-bool')
883
1023
 
884
1024
  # =>
885
1025
  #<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000009b03c>
886
1026
  index string bool
887
1027
  <uint8> <string> <boolean>
888
- 1 0 (nil) false
889
- 2 0 B false
890
- 3 1 B true
891
- 4 1 C (nil)
892
- 5 (nil) A true
1028
+ 0 0 (nil) false
1029
+ 1 0 B false
1030
+ 2 1 B true
1031
+ 3 1 C (nil)
1032
+ 4 (nil) A true
893
1033
  ```
894
1034
 
895
1035
  - [ ] Clamp
@@ -906,7 +1046,7 @@ penguins.to_rover
906
1046
 
907
1047
  ### `group(group_keys)`
908
1048
 
909
- `group` creates a class `Group` object. `Group` accepts functions below as a method.
1049
+ `group` creates a instance of class `Group`. `Group` accepts functions below as a method.
910
1050
  Method accepts options as `group_keys`.
911
1051
 
912
1052
  Available functions are:
@@ -933,23 +1073,22 @@ penguins.to_rover
933
1073
  This is an example of grouping of famous STARWARS dataset.
934
1074
 
935
1075
  ```ruby
936
- starwars =
937
- RedAmber::DataFrame.load(URI("https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv"))
938
- starwars
1076
+ uri = URI("https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv")
1077
+ starwars = RedAmber::DataFrame.load(uri)
939
1078
 
940
1079
  # =>
941
1080
  #<RedAmber::DataFrame : 87 x 12 Vectors, 0x0000000000005a50>
942
1081
  unnamed1 name height mass hair_color skin_color eye_color ... species
943
1082
  <int64> <string> <int64> <double> <string> <string> <string> ... <string>
944
- 1 1 Luke Skywalker 172 77.0 blond fair blue ... Human
945
- 2 2 C-3PO 167 75.0 NA gold yellow ... Droid
946
- 3 3 R2-D2 96 32.0 NA white, blue red ... Droid
947
- 4 4 Darth Vader 202 136.0 none white yellow ... Human
948
- 5 5 Leia Organa 150 49.0 brown light brown ... Human
1083
+ 0 1 Luke Skywalker 172 77.0 blond fair blue ... Human
1084
+ 1 2 C-3PO 167 75.0 NA gold yellow ... Droid
1085
+ 2 3 R2-D2 96 32.0 NA white, blue red ... Droid
1086
+ 3 4 Darth Vader 202 136.0 none white yellow ... Human
1087
+ 4 5 Leia Organa 150 49.0 brown light brown ... Human
949
1088
  : : : : : : : : ... :
950
- 85 85 BB8 (nil) (nil) none none black ... Droid
951
- 86 86 Captain Phasma (nil) (nil) unknown unknown unknown ... NA
952
- 87 87 Padmé Amidala 165 45.0 brown light brown ... Human
1089
+ 84 85 BB8 (nil) (nil) none none black ... Droid
1090
+ 85 86 Captain Phasma (nil) (nil) unknown unknown unknown ... NA
1091
+ 86 87 Padmé Amidala 165 45.0 brown light brown ... Human
953
1092
 
954
1093
  starwars.tdr(12)
955
1094
 
@@ -957,58 +1096,60 @@ penguins.to_rover
957
1096
  RedAmber::DataFrame : 87 x 12 Vectors
958
1097
  Vectors : 4 numeric, 8 strings
959
1098
  # key type level data_preview
960
- 1 :unnamed1 int64 87 [1, 2, 3, 4, 5, ... ]
961
- 2 :name string 87 ["Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Organa", ... ]
962
- 3 :height int64 46 [172, 167, 96, 202, 150, ... ], 6 nils
963
- 4 :mass double 39 [77.0, 75.0, 32.0, 136.0, 49.0, ... ], 28 nils
964
- 5 :hair_color string 13 ["blond", "NA", "NA", "none", "brown", ... ]
965
- 6 :skin_color string 31 ["fair", "gold", "white, blue", "white", "light", ... ]
966
- 7 :eye_color string 15 ["blue", "yellow", "red", "yellow", "brown", ... ]
967
- 8 :birth_year double 37 [19.0, 112.0, 33.0, 41.9, 19.0, ... ], 44 nils
968
- 9 :sex string 5 {"male"=>60, "none"=>6, "female"=>16, "hermaphroditic"=>1, "NA"=>4}
969
- 10 :gender string 3 {"masculine"=>66, "feminine"=>17, "NA"=>4}
970
- 11 :homeworld string 49 ["Tatooine", "Tatooine", "Naboo", "Tatooine", "Alderaan", ... ]
971
- 12 :species string 38 ["Human", "Droid", "Droid", "Human", "Human", ... ]
1099
+ 0 :unnamed1 int64 87 [1, 2, 3, 4, 5, ... ]
1100
+ 1 :name string 87 ["Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Organa", ... ]
1101
+ 2 :height int64 46 [172, 167, 96, 202, 150, ... ], 6 nils
1102
+ 3 :mass double 39 [77.0, 75.0, 32.0, 136.0, 49.0, ... ], 28 nils
1103
+ 4 :hair_color string 13 ["blond", "NA", "NA", "none", "brown", ... ]
1104
+ 5 :skin_color string 31 ["fair", "gold", "white, blue", "white", "light", ... ]
1105
+ 6 :eye_color string 15 ["blue", "yellow", "red", "yellow", "brown", ... ]
1106
+ 7 :birth_year double 37 [19.0, 112.0, 33.0, 41.9, 19.0, ... ], 44 nils
1107
+ 8 :sex string 5 {"male"=>60, "none"=>6, "female"=>16, "hermaphroditic"=>1, "NA"=>4}
1108
+ 9 :gender string 3 {"masculine"=>66, "feminine"=>17, "NA"=>4}
1109
+ 10 :homeworld string 49 ["Tatooine", "Tatooine", "Naboo", "Tatooine", "Alderaan", ... ]
1110
+ 11 :species string 38 ["Human", "Droid", "Droid", "Human", "Human", ... ]
972
1111
  ```
973
1112
 
974
1113
  We can group by `:species` and calculate the count.
975
1114
 
976
1115
  ```ruby
977
- starwars.group(:species).count(:species)
1116
+ starwars.remove { species == "NA" }
1117
+ .group(:species).count(:species)
978
1118
 
979
1119
  # =>
980
- #<RedAmber::DataFrame : 38 x 2 Vectors, 0x000000000001d6f0>
1120
+ #<RedAmber::DataFrame : 37 x 2 Vectors, 0x000000000000ffa0>
981
1121
  species count
982
1122
  <string> <int64>
983
- 1 Human 35
984
- 2 Droid 6
985
- 3 Wookiee 2
986
- 4 Rodian 1
987
- 5 Hutt 1
1123
+ 0 Human 35
1124
+ 1 Droid 6
1125
+ 2 Wookiee 2
1126
+ 3 Rodian 1
1127
+ 4 Hutt 1
988
1128
  : : :
989
- 36 Kaleesh 1
990
- 37 Pau'an 1
991
- 38 Kel Dor 1
1129
+ 34 Kaleesh 1
1130
+ 35 Pau'an 1
1131
+ 36 Kel Dor 1
992
1132
  ```
993
1133
 
994
1134
  We can also calculate the mean of `:mass` and `:height` together.
995
1135
 
996
1136
  ```ruby
997
- grouped = starwars.group(:species) { [count(:species), mean(:height, :mass)] }
1137
+ grouped = starwars.remove { species == "NA" }
1138
+ .group(:species) { [count(:species), mean(:height, :mass)] }
998
1139
 
999
1140
  # =>
1000
- #<RedAmber::DataFrame : 38 x 4 Vectors, 0x00000000000407cc>
1001
- specie s count mean(height) mean(mass)
1002
- <strin g> <int64> <double> <double>
1003
- 1 Human 35 176.6 82.8
1004
- 2 Droid 6 131.2 69.8
1005
- 3 Wookie e 2 231.0 124.0
1006
- 4 Rodian 1 173.0 74.0
1007
- 5 Hutt 1 175.0 1358.0
1008
- : : : : :
1009
- 36 Kalees h 1 216.0 159.0
1010
- 37 Pau'an 1 206.0 80.0
1011
- 38 Kel Dor 1 188.0 80.0
1141
+ #<RedAmber::DataFrame : 37 x 4 Vectors, 0x000000000000fff0>
1142
+ species count mean(height) mean(mass)
1143
+ <string> <int64> <double> <double>
1144
+ 0 Human 35 176.65 82.78
1145
+ 1 Droid 6 131.2 69.75
1146
+ 2 Wookiee 2 231.0 124.0
1147
+ 3 Rodian 1 173.0 74.0
1148
+ 4 Hutt 1 175.0 1358.0
1149
+ : : : : :
1150
+ 34 Kaleesh 1 216.0 159.0
1151
+ 35 Pau'an 1 206.0 80.0
1152
+ 36 Kel Dor 1 188.0 80.0
1012
1153
  ```
1013
1154
 
1014
1155
  Select rows for count > 1.
@@ -1017,25 +1158,26 @@ penguins.to_rover
1017
1158
  grouped.slice(grouped[:count] > 1)
1018
1159
 
1019
1160
  # =>
1020
- #<RedAmber::DataFrame : 9 x 4 Vectors, 0x000000000004c270>
1161
+ #<RedAmber::DataFrame : 8 x 4 Vectors, 0x000000000001002c>
1021
1162
  species count mean(height) mean(mass)
1022
1163
  <string> <int64> <double> <double>
1023
- 1 Human 35 176.6 82.8
1024
- 2 Droid 6 131.2 69.8
1025
- 3 Wookiee 2 231.0 124.0
1026
- 4 Gungan 3 208.7 74.0
1027
- 5 NA 4 181.3 48.0
1028
- : : : : :
1029
- 7 Twi'lek 2 179.0 55.0
1030
- 8 Mirialan 2 168.0 53.1
1031
- 9 Kaminoan 2 221.0 88.0
1164
+ 0 Human 35 176.65 82.78
1165
+ 1 Droid 6 131.2 69.75
1166
+ 2 Wookiee 2 231.0 124.0
1167
+ 3 Gungan 3 208.67 74.0
1168
+ 4 Zabrak 2 173.0 80.0
1169
+ 5 Twi'lek 2 179.0 55.0
1170
+ 6 Mirialan 2 168.0 53.1
1171
+ 7 Kaminoan 2 221.0 88.0
1032
1172
  ```
1033
1173
 
1034
1174
  ## Reshape
1035
1175
 
1176
+ ![dataframe reshapeing image](doc/../image/reshaping_dataframe.png)
1177
+
1036
1178
  ### `transpose`
1037
1179
 
1038
- Creates transposed DataFrame for wide type dataframe.
1180
+ Creates transposed DataFrame for the wide (messy) dataframe.
1039
1181
 
1040
1182
  ```ruby
1041
1183
  import_cars = RedAmber::DataFrame.load('test/entity/import_cars.tsv')
@@ -1044,31 +1186,31 @@ penguins.to_rover
1044
1186
  #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000d520>
1045
1187
  Year Audi BMW BMW_MINI Mercedes-Benz VW
1046
1188
  <int64> <int64> <int64> <int64> <int64> <int64>
1047
- 1 2021 22535 35905 18211 51722 35215
1048
- 2 2020 22304 35712 20196 57041 36576
1049
- 3 2019 24222 46814 23813 66553 46794
1050
- 4 2018 26473 50982 25984 67554 51961
1051
- 5 2017 28336 52527 25427 68221 49040
1189
+ 0 2017 28336 52527 25427 68221 49040
1190
+ 1 2018 26473 50982 25984 67554 51961
1191
+ 2 2019 24222 46814 23813 66553 46794
1192
+ 3 2020 22304 35712 20196 57041 36576
1193
+ 4 2021 22535 35905 18211 51722 35215
1052
1194
 
1053
- import_cars.transpose
1195
+ import_cars.transpose(name: :Manufacturer)
1054
1196
 
1055
1197
  # =>
1056
- #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000ef74>
1057
- name 2021 2020 2019 2018 2017
1058
- <dictionary> <uint16> <uint16> <uint32> <uint32> <uint32>
1059
- 1 Audi 22535 22304 24222 26473 28336
1060
- 2 BMW 35905 35712 46814 50982 52527
1061
- 3 BMW_MINI 18211 20196 23813 25984 25427
1062
- 4 Mercedes-Benz 51722 57041 66553 67554 68221
1063
- 5 VW 35215 36576 46794 51961 49040
1198
+ #<RedAmber::DataFrame : 5 x 6 Vectors, 0x0000000000010a2c>
1199
+ Manufacturer 2017 2018 2019 2020 2021
1200
+ <string> <uint32> <uint32> <uint32> <uint16> <uint16>
1201
+ 0 Audi 28336 26473 24222 22304 22535
1202
+ 1 BMW 52527 50982 46814 35712 35905
1203
+ 2 BMW_MINI 25427 25984 23813 20196 18211
1204
+ 3 Mercedes-Benz 68221 67554 66553 57041 51722
1205
+ 4 VW 49040 51961 46794 36576 35215
1064
1206
  ```
1065
1207
 
1066
1208
  The leftmost column is created by original keys. Key name of the column is
1067
- named by 'name'.
1209
+ named by parameter `:name`. If `:name` is not specified, `:NAME` is used for the key.
1068
1210
 
1069
1211
  ### `to_long(*keep_keys)`
1070
1212
 
1071
- Creates a 'long' DataFrame.
1213
+ Creates a 'long' (tidy) DataFrame from a 'wide' DataFrame.
1072
1214
 
1073
1215
  - Parameter `keep_keys` specifies the key names to keep.
1074
1216
 
@@ -1076,62 +1218,66 @@ penguins.to_rover
1076
1218
  import_cars.to_long(:Year)
1077
1219
 
1078
1220
  # =>
1079
- #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000012750>
1080
- Year name value
1081
- <uint16> <dictionary> <uint32>
1082
- 1 2021 Audi 22535
1083
- 2 2021 BMW 35905
1084
- 3 2021 BMW_MINI 18211
1085
- 4 2021 Mercedes-Benz 51722
1086
- 5 2021 VW 35215
1221
+ #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000011864>
1222
+ Year NAME VALUE
1223
+ <uint16> <string> <uint32>
1224
+ 0 2017 Audi 28336
1225
+ 1 2017 BMW 52527
1226
+ 2 2017 BMW_MINI 25427
1227
+ 3 2017 Mercedes-Benz 68221
1228
+ 4 2017 VW 49040
1087
1229
  : : : :
1088
- 23 2017 BMW_MINI 25427
1089
- 24 2017 Mercedes-Benz 68221
1090
- 25 2017 VW 49040
1230
+ 22 2021 BMW_MINI 18211
1231
+ 23 2021 Mercedes-Benz 51722
1232
+ 24 2021 VW 35215
1091
1233
  ```
1092
1234
 
1093
- - Option `:name` : key of the column which is come **from key names**.
1094
- - Option `:value` : key of the column which is come **from values**.
1235
+ - Option `:name` is the key of the column which came **from key names**.
1236
+ The default value is `:NAME` if it is not specified.
1237
+ - Option `:value` is the key of the column which came **from values**.
1238
+ The default value is `:VALUE` if it is not specified.
1095
1239
 
1096
1240
  ```ruby
1097
1241
  import_cars.to_long(:Year, name: :Manufacturer, value: :Num_of_imported)
1098
1242
 
1099
1243
  # =>
1100
- #<RedAmber::DataFrame : 25 x 3 Vectors, 0x0000000000017700>
1244
+ #<RedAmber::DataFrame : 25 x 3 Vectors, 0x000000000001359c>
1101
1245
  Year Manufacturer Num_of_imported
1102
- <uint16> <dictionary> <uint32>
1103
- 1 2021 Audi 22535
1104
- 2 2021 BMW 35905
1105
- 3 2021 BMW_MINI 18211
1106
- 4 2021 Mercedes-Benz 51722
1107
- 5 2021 VW 35215
1246
+ <uint16> <string> <uint32>
1247
+ 0 2017 Audi 28336
1248
+ 1 2017 BMW 52527
1249
+ 2 2017 BMW_MINI 25427
1250
+ 3 2017 Mercedes-Benz 68221
1251
+ 4 2017 VW 49040
1108
1252
  : : : :
1109
- 23 2017 BMW_MINI 25427
1110
- 24 2017 Mercedes-Benz 68221
1111
- 25 2017 VW 49040
1253
+ 22 2021 BMW_MINI 18211
1254
+ 23 2021 Mercedes-Benz 51722
1255
+ 24 2021 VW 35215
1112
1256
  ```
1113
1257
 
1114
1258
  ### `to_wide`
1115
1259
 
1116
- Creates a 'wide' DataFrame.
1260
+ Creates a 'wide' (messy) DataFrame from a 'long' DataFrame.
1117
1261
 
1118
- - Option `:name` : key of the column which will be expanded **to key name**.
1119
- - Option `:value` : key of the column which will be expanded **to values**.
1262
+ - Option `:name` is the key of the column which will be expanded **to key names**.
1263
+ The default value is `:NAME` if it is not specified.
1264
+ - Option `:value` is the key of the column which will be expanded **to values**.
1265
+ The default value is `:VALUE` if it is not specified.
1120
1266
 
1121
1267
  ```ruby
1122
1268
  import_cars.to_long(:Year).to_wide
1123
- # import_cars.to_long(:Year).to_wide(name: :name, value: :value)
1269
+ # import_cars.to_long(:Year).to_wide(name: :N, value: :V)
1124
1270
  # is also OK
1125
1271
 
1126
1272
  # =>
1127
1273
  #<RedAmber::DataFrame : 5 x 6 Vectors, 0x000000000000f0f0>
1128
1274
  Year Audi BMW BMW_MINI Mercedes-Benz VW
1129
1275
  <uint16> <uint16> <uint16> <uint16> <uint32> <uint16>
1130
- 1 2021 22535 35905 18211 51722 35215
1131
- 2 2020 22304 35712 20196 57041 36576
1132
- 3 2019 24222 46814 23813 66553 46794
1133
- 4 2018 26473 50982 25984 67554 51961
1134
- 5 2017 28336 52527 25427 68221 49040
1276
+ 0 2017 28336 52527 25427 68221 49040
1277
+ 1 2018 26473 50982 25984 67554 51961
1278
+ 2 2019 24222 46814 23813 66553 46794
1279
+ 3 2020 22304 35712 20196 57041 36576
1280
+ 4 2021 22535 35905 18211 51722 35215
1135
1281
  ```
1136
1282
 
1137
1283
  ## Combine