red_amber 0.2.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +133 -51
- data/.yardopts +2 -0
- data/CHANGELOG.md +203 -1
- data/Gemfile +2 -1
- data/LICENSE +1 -1
- data/README.md +61 -45
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +63 -0
- data/doc/DataFrame.md +35 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +295 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +537 -68
- data/lib/red_amber/data_frame_combinable.rb +776 -123
- data/lib/red_amber/data_frame_displayable.rb +248 -18
- data/lib/red_amber/data_frame_indexable.rb +122 -19
- data/lib/red_amber/data_frame_loadsave.rb +81 -10
- data/lib/red_amber/data_frame_reshaping.rb +216 -21
- data/lib/red_amber/data_frame_selectable.rb +781 -120
- data/lib/red_amber/data_frame_variable_operation.rb +561 -85
- data/lib/red_amber/group.rb +195 -21
- data/lib/red_amber/helper.rb +114 -32
- data/lib/red_amber/refinements.rb +206 -0
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +435 -58
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +321 -69
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +397 -24
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +15 -1
- data/red_amber.gemspec +4 -3
- metadata +19 -11
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -294
data/doc/Vector.md
CHANGED
@@ -182,6 +182,31 @@ boolean.all(skip_nulls: true) #=> true
|
|
182
182
|
boolean.all(skip_nulls: false) #=> false
|
183
183
|
```
|
184
184
|
|
185
|
+
### Check if `function` is an aggregation function: `Vector.aggregate?(function)`
|
186
|
+
|
187
|
+
Return true if `function` is an unary aggregation function. Otherwise return false.
|
188
|
+
|
189
|
+
### Treat aggregation function as an element-wise function: `propagate(function)`
|
190
|
+
|
191
|
+
Spread the return value of an aggregate function as if it is a element-wise function.
|
192
|
+
|
193
|
+
```ruby
|
194
|
+
vec = Vector.new(1, 2, 3, 4)
|
195
|
+
vec.propagate(:mean)
|
196
|
+
# =>
|
197
|
+
#<RedAmber::Vector(:double, size=4):0x000000000001985c>
|
198
|
+
[2.5, 2.5, 2.5, 2.5]
|
199
|
+
```
|
200
|
+
|
201
|
+
`#propagate` also accepts a block to compute with a customized aggregation function yielding a scalar.
|
202
|
+
|
203
|
+
```ruby
|
204
|
+
vec.propagate { |v| v.mean.round }
|
205
|
+
# =>
|
206
|
+
#<RedAmber::Vector(:uint8, size=4):0x000000000000cb98>
|
207
|
+
[3, 3, 3, 3]
|
208
|
+
```
|
209
|
+
|
185
210
|
### Unary element-wise: `vector.func => vector`
|
186
211
|
|
187
212
|

|
@@ -305,7 +330,7 @@ double.round(n_digits: -1)
|
|
305
330
|
|
306
331
|
Returns index of specified element.
|
307
332
|
|
308
|
-
### `quantiles(probs = [
|
333
|
+
### `quantiles(probs = [0.0, 0.25, 0.5, 0.75, 1.0], interpolation: :linear, skip_nils: true, min_count: 0)`
|
309
334
|
|
310
335
|
Returns quantiles for specified probabilities in a DataFrame.
|
311
336
|
|
@@ -513,3 +538,272 @@ vector.shift(fill: Float::NAN)
|
|
513
538
|
#<RedAmber::Vector(:double, size=5):0x0000000000011d3c>
|
514
539
|
[NaN, 1.0, 2.0, 3.0, 4.0]
|
515
540
|
```
|
541
|
+
|
542
|
+
### `split_to_columns(sep = ' ', limit = 0)`
|
543
|
+
|
544
|
+
Split string type Vector with any ASCII whitespace as separator.
|
545
|
+
Returns an Array of Vectors.
|
546
|
+
|
547
|
+
```ruby
|
548
|
+
vector = Vector.new(['a b', 'c d', 'e f'])
|
549
|
+
vector.split_to_columns
|
550
|
+
|
551
|
+
#=>
|
552
|
+
[#<RedAmber::Vector(:string, size=3):0x00000000000363a8>
|
553
|
+
["a", "c", "e"]
|
554
|
+
,
|
555
|
+
#<RedAmber::Vector(:string, size=3):0x00000000000363bc>
|
556
|
+
["b", "d", "f"]
|
557
|
+
]
|
558
|
+
```
|
559
|
+
It will be used for column splitting in DataFrame.
|
560
|
+
|
561
|
+
```ruby
|
562
|
+
df = DataFrame.new(year_month: %w[2022-01 2022-02 2022-03])
|
563
|
+
.assign(:year, :month) { year_month.split_to_columns('-') }
|
564
|
+
.drop(:year_month)
|
565
|
+
|
566
|
+
#=>
|
567
|
+
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f974>
|
568
|
+
year month
|
569
|
+
<string> <string>
|
570
|
+
0 2022 01
|
571
|
+
1 2022 02
|
572
|
+
2 2022 03
|
573
|
+
```
|
574
|
+
|
575
|
+
### `split_to_rows(sep = ' ', limit = 0)`
|
576
|
+
|
577
|
+
Split string type Vector with any ASCII whitespace as separator.
|
578
|
+
Returns an flattend into rows by Vector.
|
579
|
+
|
580
|
+
```ruby
|
581
|
+
vector = Vector.new(['a b', 'c d', 'e f'])
|
582
|
+
vector.split_to_rows
|
583
|
+
|
584
|
+
#=>
|
585
|
+
#<RedAmber::Vector(:string, size=6):0x000000000002ccf4>
|
586
|
+
["a", "b", "c", "d", "e", "f"]
|
587
|
+
```
|
588
|
+
|
589
|
+
### `merge(other, sep: ' ')`
|
590
|
+
|
591
|
+
Merge String or other string Vector to self using aseparator.
|
592
|
+
Self must be a string Vector.
|
593
|
+
Returns merged string Vector.
|
594
|
+
|
595
|
+
```ruby
|
596
|
+
# with vector
|
597
|
+
vector = Vector.new(%w[a c e])
|
598
|
+
other = Vector.new(%w[b d f])
|
599
|
+
vector.merge(other)
|
600
|
+
|
601
|
+
#=>
|
602
|
+
#<RedAmber::Vector(:string, size=3):0x0000000000038b80>
|
603
|
+
["a b", "c d", "e f"]
|
604
|
+
```
|
605
|
+
|
606
|
+
If other is a String it will be broadcasted.
|
607
|
+
|
608
|
+
```ruby
|
609
|
+
# with vector
|
610
|
+
vector = Vector.new(%w[a c e])
|
611
|
+
|
612
|
+
#=>
|
613
|
+
#<RedAmber::Vector(:string, size=3):0x00000000000446b0>
|
614
|
+
["a x", "c x", "e x"]
|
615
|
+
```
|
616
|
+
|
617
|
+
You can specify separator string by :sep.
|
618
|
+
|
619
|
+
```ruby
|
620
|
+
# with vector
|
621
|
+
vector = Vector.new(%w[a c e])
|
622
|
+
other = Vector.new(%w[b d f])
|
623
|
+
vector.merge(other, sep: '')
|
624
|
+
|
625
|
+
#=>
|
626
|
+
#<RedAmber::Vector(:string, size=3):0x0000000000038b80>
|
627
|
+
["ab", "cd", "ef"]
|
628
|
+
```
|
629
|
+
|
630
|
+
### `concatenate(other)` or `concat(other)`
|
631
|
+
|
632
|
+
Concatenate other array-like to self and return a concatenated Vector.
|
633
|
+
- `other` is one of `Vector`, `Array`, `Arrow::Array` or `Arrow::ChunkedArray`
|
634
|
+
- Different type will be 'resolved'.
|
635
|
+
|
636
|
+
Concatenate to string
|
637
|
+
```ruby
|
638
|
+
string_vector
|
639
|
+
|
640
|
+
# =>
|
641
|
+
#<RedAmber::Vector(:string, size=2):0x00000000000037b4>
|
642
|
+
["A", "B"]
|
643
|
+
|
644
|
+
string_vector.concatenate([1, 2])
|
645
|
+
|
646
|
+
# =>
|
647
|
+
#<RedAmber::Vector(:string, size=4):0x0000000000003818>
|
648
|
+
["A", "B", "1", "2"]
|
649
|
+
```
|
650
|
+
|
651
|
+
Concatenate to integer
|
652
|
+
|
653
|
+
```ruby
|
654
|
+
integer_vector
|
655
|
+
|
656
|
+
# =>
|
657
|
+
#<RedAmber::Vector(:uint8, size=2):0x000000000000382c>
|
658
|
+
[1, 2]
|
659
|
+
|
660
|
+
nteger_vector.concatenate(["A", "B"])
|
661
|
+
# =>
|
662
|
+
#<RedAmber::Vector(:uint8, size=4):0x0000000000003840>
|
663
|
+
[1, 2, 65, 66]
|
664
|
+
```
|
665
|
+
|
666
|
+
### `rank`
|
667
|
+
|
668
|
+
Returns numerical rank of self.
|
669
|
+
- Nil values are considered greater than any value.
|
670
|
+
- NaN values are considered greater than any value but smaller than nil values.
|
671
|
+
- Tiebreakers are ranked in order of appearance.
|
672
|
+
- `RankOptions` in C++ function is not implemented in C GLib yet.
|
673
|
+
This method is currently fixed to the default behavior.
|
674
|
+
|
675
|
+
Returns 0-based rank of self (0...size in range) as a Vector.
|
676
|
+
|
677
|
+
Rank of float Vector
|
678
|
+
```ruby
|
679
|
+
fv = Vector.new(0.1, nil, Float::NAN, 0.2, 0.1); fv
|
680
|
+
# =>
|
681
|
+
#<RedAmber::Vector(:double, size=5):0x000000000000c65c>
|
682
|
+
[0.1, nil, NaN, 0.2, 0.1]
|
683
|
+
|
684
|
+
fv.rank
|
685
|
+
# =>
|
686
|
+
#<RedAmber::Vector(:uint64, size=5):0x0000000000003868>
|
687
|
+
[0, 4, 3, 2, 1]
|
688
|
+
```
|
689
|
+
|
690
|
+
Rank of string Vector
|
691
|
+
```ruby
|
692
|
+
sv = Vector.new("A", "B", nil, "A", "C"); sv
|
693
|
+
# =>
|
694
|
+
#<RedAmber::Vector(:string, size=5):0x0000000000003854>
|
695
|
+
["A", "B", nil, "A", "C"]
|
696
|
+
|
697
|
+
sv.rank
|
698
|
+
# =>
|
699
|
+
#<RedAmber::Vector(:uint64, size=5):0x0000000000003868>
|
700
|
+
[0, 2, 4, 1, 3]
|
701
|
+
```
|
702
|
+
|
703
|
+
### `sample(integer_or_proportion)`
|
704
|
+
|
705
|
+
Pick up elements at random.
|
706
|
+
|
707
|
+
#### `sample` : without agrument
|
708
|
+
|
709
|
+
Return a randomly selected element.
|
710
|
+
This is one of an aggregation function.
|
711
|
+
|
712
|
+
```ruby
|
713
|
+
v = Vector.new('A'..'H'); v
|
714
|
+
# =>
|
715
|
+
#<RedAmber::Vector(:string, size=8):0x0000000000011b20>
|
716
|
+
["A", "B", "C", "D", "E", "F", "G", "H"]
|
717
|
+
|
718
|
+
v.sample
|
719
|
+
# =>
|
720
|
+
"C"
|
721
|
+
```
|
722
|
+
|
723
|
+
#### `sample(n)` : n as a Integer
|
724
|
+
|
725
|
+
Pick up n elements at random.
|
726
|
+
|
727
|
+
- Param `n` is number of elements to pick.
|
728
|
+
- `n` is a positive Integer
|
729
|
+
- If `n` is smaller or equal to size, elements are picked by non-repeating.
|
730
|
+
- If `n` is greater than `size`, elements are picked repeatedly.
|
731
|
+
@return [Vector] sampled elements.
|
732
|
+
- If `n == 1` (in case of `sample(1)`), it returns a Vector of `size == 1` not a scalar.
|
733
|
+
|
734
|
+
```ruby
|
735
|
+
v.sample(1)
|
736
|
+
# =>
|
737
|
+
#<RedAmber::Vector(:string, size=1):0x000000000001a3b0>
|
738
|
+
["H"]
|
739
|
+
```
|
740
|
+
|
741
|
+
Sample same size of self: every element is picked in random order.
|
742
|
+
|
743
|
+
```ruby
|
744
|
+
v.sample(8)
|
745
|
+
# =>
|
746
|
+
#<RedAmber::Vector(:string, size=8):0x000000000001bda0>
|
747
|
+
["H", "D", "B", "F", "E", "A", "G", "C"]
|
748
|
+
```
|
749
|
+
|
750
|
+
Over sampling: "E" and "A" are sampled repeatedly.
|
751
|
+
|
752
|
+
```ruby
|
753
|
+
v.sample(9)
|
754
|
+
# =>
|
755
|
+
#<RedAmber::Vector(:string, size=9):0x000000000001d790>
|
756
|
+
["E", "E", "A", "D", "H", "C", "A", "F", "H"]
|
757
|
+
```
|
758
|
+
|
759
|
+
#### `sample(prop)` : prop as a Float
|
760
|
+
|
761
|
+
Pick up elements by proportion `prop` at random.
|
762
|
+
|
763
|
+
- `prop` is proportion of elements to pick.
|
764
|
+
- `prop` is a positive Float.
|
765
|
+
- Absolute number of elements to pick:`prop*size` is rounded (by `half: :up`).
|
766
|
+
- If `prop` is smaller or equal to 1.0, elements are picked by non-repeating.
|
767
|
+
- If `prop` is greater than 1.0, some elements are picked repeatedly.
|
768
|
+
- Returns sampled elements by a Vector.
|
769
|
+
- If picked element is only one, it returns a Vector of `size == 1` not a scalar.
|
770
|
+
|
771
|
+
Sample same size of self: every element is picked in random order.
|
772
|
+
|
773
|
+
```ruby
|
774
|
+
v.sample(1.0)
|
775
|
+
# =>
|
776
|
+
#<RedAmber::Vector(:string, size=8):0x000000000001bda0>
|
777
|
+
["D", "H", "F", "C", "A", "B", "E", "G"]
|
778
|
+
```
|
779
|
+
|
780
|
+
2 times over sampling.
|
781
|
+
|
782
|
+
```ruby
|
783
|
+
v.sample(2.0)
|
784
|
+
# =>
|
785
|
+
#<RedAmber::Vector(:string, size=16):0x00000000000233e8>
|
786
|
+
["H", "B", "C", "B", "C", "A", "F", "A", "E", "C", "H", "F", "F", "A", ... ]
|
787
|
+
```
|
788
|
+
|
789
|
+
### `sort(integer_or_proportion)`
|
790
|
+
|
791
|
+
Arrange values in Vector.
|
792
|
+
|
793
|
+
- `:+`, `:ascending` or without argument will sort in increasing order.
|
794
|
+
- `:-` or `:descending` will sort in decreasing order.
|
795
|
+
|
796
|
+
```ruby
|
797
|
+
Vector.new(%w[B D A E C]).sort
|
798
|
+
# same as #sort(:+)
|
799
|
+
# same as #sort(:ascending)
|
800
|
+
# =>
|
801
|
+
#<RedAmber::Vector(:string, size=5):0x000000000000c134>
|
802
|
+
["A", "B", "C", "D", "E"]
|
803
|
+
|
804
|
+
Vector.new(%w[B D A E C]).sort(:-)
|
805
|
+
# same as #sort(:descending)
|
806
|
+
# =>
|
807
|
+
#<RedAmber::Vector(:string, size=5):0x000000000000c148>
|
808
|
+
["E", "D", "C", "B", "A"]
|
809
|
+
```
|