red_amber 0.2.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +133 -51
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.md +203 -1
  5. data/Gemfile +2 -1
  6. data/LICENSE +1 -1
  7. data/README.md +61 -45
  8. data/benchmark/basic.yml +11 -4
  9. data/benchmark/combine.yml +3 -4
  10. data/benchmark/dataframe.yml +62 -0
  11. data/benchmark/group.yml +7 -1
  12. data/benchmark/reshape.yml +6 -2
  13. data/benchmark/vector.yml +63 -0
  14. data/doc/DataFrame.md +35 -12
  15. data/doc/DataFrame_Comparison.md +65 -0
  16. data/doc/SubFrames.md +11 -0
  17. data/doc/Vector.md +295 -1
  18. data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
  19. data/lib/red_amber/data_frame.rb +537 -68
  20. data/lib/red_amber/data_frame_combinable.rb +776 -123
  21. data/lib/red_amber/data_frame_displayable.rb +248 -18
  22. data/lib/red_amber/data_frame_indexable.rb +122 -19
  23. data/lib/red_amber/data_frame_loadsave.rb +81 -10
  24. data/lib/red_amber/data_frame_reshaping.rb +216 -21
  25. data/lib/red_amber/data_frame_selectable.rb +781 -120
  26. data/lib/red_amber/data_frame_variable_operation.rb +561 -85
  27. data/lib/red_amber/group.rb +195 -21
  28. data/lib/red_amber/helper.rb +114 -32
  29. data/lib/red_amber/refinements.rb +206 -0
  30. data/lib/red_amber/subframes.rb +1066 -0
  31. data/lib/red_amber/vector.rb +435 -58
  32. data/lib/red_amber/vector_aggregation.rb +312 -0
  33. data/lib/red_amber/vector_binary_element_wise.rb +387 -0
  34. data/lib/red_amber/vector_selectable.rb +321 -69
  35. data/lib/red_amber/vector_unary_element_wise.rb +436 -0
  36. data/lib/red_amber/vector_updatable.rb +397 -24
  37. data/lib/red_amber/version.rb +2 -1
  38. data/lib/red_amber.rb +15 -1
  39. data/red_amber.gemspec +4 -3
  40. metadata +19 -11
  41. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  42. data/lib/red_amber/vector_functions.rb +0 -294
data/doc/Vector.md CHANGED
@@ -182,6 +182,31 @@ boolean.all(skip_nulls: true) #=> true
182
182
  boolean.all(skip_nulls: false) #=> false
183
183
  ```
184
184
 
185
+ ### Check if `function` is an aggregation function: `Vector.aggregate?(function)`
186
+
187
+ Return true if `function` is an unary aggregation function. Otherwise return false.
188
+
189
+ ### Treat aggregation function as an element-wise function: `propagate(function)`
190
+
191
+ Spread the return value of an aggregate function as if it is a element-wise function.
192
+
193
+ ```ruby
194
+ vec = Vector.new(1, 2, 3, 4)
195
+ vec.propagate(:mean)
196
+ # =>
197
+ #<RedAmber::Vector(:double, size=4):0x000000000001985c>
198
+ [2.5, 2.5, 2.5, 2.5]
199
+ ```
200
+
201
+ `#propagate` also accepts a block to compute with a customized aggregation function yielding a scalar.
202
+
203
+ ```ruby
204
+ vec.propagate { |v| v.mean.round }
205
+ # =>
206
+ #<RedAmber::Vector(:uint8, size=4):0x000000000000cb98>
207
+ [3, 3, 3, 3]
208
+ ```
209
+
185
210
  ### Unary element-wise: `vector.func => vector`
186
211
 
187
212
  ![unary element-wise](doc/image/../../image/vector/unary_element_wise.png)
@@ -305,7 +330,7 @@ double.round(n_digits: -1)
305
330
 
306
331
  Returns index of specified element.
307
332
 
308
- ### `quantiles(probs = [1.0, 0.75, 0.5, 0.25, 0.0], interpolation: :linear, skip_nils: true, min_count: 0)`
333
+ ### `quantiles(probs = [0.0, 0.25, 0.5, 0.75, 1.0], interpolation: :linear, skip_nils: true, min_count: 0)`
309
334
 
310
335
  Returns quantiles for specified probabilities in a DataFrame.
311
336
 
@@ -513,3 +538,272 @@ vector.shift(fill: Float::NAN)
513
538
  #<RedAmber::Vector(:double, size=5):0x0000000000011d3c>
514
539
  [NaN, 1.0, 2.0, 3.0, 4.0]
515
540
  ```
541
+
542
+ ### `split_to_columns(sep = ' ', limit = 0)`
543
+
544
+ Split string type Vector with any ASCII whitespace as separator.
545
+ Returns an Array of Vectors.
546
+
547
+ ```ruby
548
+ vector = Vector.new(['a b', 'c d', 'e f'])
549
+ vector.split_to_columns
550
+
551
+ #=>
552
+ [#<RedAmber::Vector(:string, size=3):0x00000000000363a8>
553
+ ["a", "c", "e"]
554
+ ,
555
+ #<RedAmber::Vector(:string, size=3):0x00000000000363bc>
556
+ ["b", "d", "f"]
557
+ ]
558
+ ```
559
+ It will be used for column splitting in DataFrame.
560
+
561
+ ```ruby
562
+ df = DataFrame.new(year_month: %w[2022-01 2022-02 2022-03])
563
+ .assign(:year, :month) { year_month.split_to_columns('-') }
564
+ .drop(:year_month)
565
+
566
+ #=>
567
+ #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f974>
568
+ year month
569
+ <string> <string>
570
+ 0 2022 01
571
+ 1 2022 02
572
+ 2 2022 03
573
+ ```
574
+
575
+ ### `split_to_rows(sep = ' ', limit = 0)`
576
+
577
+ Split string type Vector with any ASCII whitespace as separator.
578
+ Returns an flattend into rows by Vector.
579
+
580
+ ```ruby
581
+ vector = Vector.new(['a b', 'c d', 'e f'])
582
+ vector.split_to_rows
583
+
584
+ #=>
585
+ #<RedAmber::Vector(:string, size=6):0x000000000002ccf4>
586
+ ["a", "b", "c", "d", "e", "f"]
587
+ ```
588
+
589
+ ### `merge(other, sep: ' ')`
590
+
591
+ Merge String or other string Vector to self using aseparator.
592
+ Self must be a string Vector.
593
+ Returns merged string Vector.
594
+
595
+ ```ruby
596
+ # with vector
597
+ vector = Vector.new(%w[a c e])
598
+ other = Vector.new(%w[b d f])
599
+ vector.merge(other)
600
+
601
+ #=>
602
+ #<RedAmber::Vector(:string, size=3):0x0000000000038b80>
603
+ ["a b", "c d", "e f"]
604
+ ```
605
+
606
+ If other is a String it will be broadcasted.
607
+
608
+ ```ruby
609
+ # with vector
610
+ vector = Vector.new(%w[a c e])
611
+
612
+ #=>
613
+ #<RedAmber::Vector(:string, size=3):0x00000000000446b0>
614
+ ["a x", "c x", "e x"]
615
+ ```
616
+
617
+ You can specify separator string by :sep.
618
+
619
+ ```ruby
620
+ # with vector
621
+ vector = Vector.new(%w[a c e])
622
+ other = Vector.new(%w[b d f])
623
+ vector.merge(other, sep: '')
624
+
625
+ #=>
626
+ #<RedAmber::Vector(:string, size=3):0x0000000000038b80>
627
+ ["ab", "cd", "ef"]
628
+ ```
629
+
630
+ ### `concatenate(other)` or `concat(other)`
631
+
632
+ Concatenate other array-like to self and return a concatenated Vector.
633
+ - `other` is one of `Vector`, `Array`, `Arrow::Array` or `Arrow::ChunkedArray`
634
+ - Different type will be 'resolved'.
635
+
636
+ Concatenate to string
637
+ ```ruby
638
+ string_vector
639
+
640
+ # =>
641
+ #<RedAmber::Vector(:string, size=2):0x00000000000037b4>
642
+ ["A", "B"]
643
+
644
+ string_vector.concatenate([1, 2])
645
+
646
+ # =>
647
+ #<RedAmber::Vector(:string, size=4):0x0000000000003818>
648
+ ["A", "B", "1", "2"]
649
+ ```
650
+
651
+ Concatenate to integer
652
+
653
+ ```ruby
654
+ integer_vector
655
+
656
+ # =>
657
+ #<RedAmber::Vector(:uint8, size=2):0x000000000000382c>
658
+ [1, 2]
659
+
660
+ nteger_vector.concatenate(["A", "B"])
661
+ # =>
662
+ #<RedAmber::Vector(:uint8, size=4):0x0000000000003840>
663
+ [1, 2, 65, 66]
664
+ ```
665
+
666
+ ### `rank`
667
+
668
+ Returns numerical rank of self.
669
+ - Nil values are considered greater than any value.
670
+ - NaN values are considered greater than any value but smaller than nil values.
671
+ - Tiebreakers are ranked in order of appearance.
672
+ - `RankOptions` in C++ function is not implemented in C GLib yet.
673
+ This method is currently fixed to the default behavior.
674
+
675
+ Returns 0-based rank of self (0...size in range) as a Vector.
676
+
677
+ Rank of float Vector
678
+ ```ruby
679
+ fv = Vector.new(0.1, nil, Float::NAN, 0.2, 0.1); fv
680
+ # =>
681
+ #<RedAmber::Vector(:double, size=5):0x000000000000c65c>
682
+ [0.1, nil, NaN, 0.2, 0.1]
683
+
684
+ fv.rank
685
+ # =>
686
+ #<RedAmber::Vector(:uint64, size=5):0x0000000000003868>
687
+ [0, 4, 3, 2, 1]
688
+ ```
689
+
690
+ Rank of string Vector
691
+ ```ruby
692
+ sv = Vector.new("A", "B", nil, "A", "C"); sv
693
+ # =>
694
+ #<RedAmber::Vector(:string, size=5):0x0000000000003854>
695
+ ["A", "B", nil, "A", "C"]
696
+
697
+ sv.rank
698
+ # =>
699
+ #<RedAmber::Vector(:uint64, size=5):0x0000000000003868>
700
+ [0, 2, 4, 1, 3]
701
+ ```
702
+
703
+ ### `sample(integer_or_proportion)`
704
+
705
+ Pick up elements at random.
706
+
707
+ #### `sample` : without agrument
708
+
709
+ Return a randomly selected element.
710
+ This is one of an aggregation function.
711
+
712
+ ```ruby
713
+ v = Vector.new('A'..'H'); v
714
+ # =>
715
+ #<RedAmber::Vector(:string, size=8):0x0000000000011b20>
716
+ ["A", "B", "C", "D", "E", "F", "G", "H"]
717
+
718
+ v.sample
719
+ # =>
720
+ "C"
721
+ ```
722
+
723
+ #### `sample(n)` : n as a Integer
724
+
725
+ Pick up n elements at random.
726
+
727
+ - Param `n` is number of elements to pick.
728
+ - `n` is a positive Integer
729
+ - If `n` is smaller or equal to size, elements are picked by non-repeating.
730
+ - If `n` is greater than `size`, elements are picked repeatedly.
731
+ @return [Vector] sampled elements.
732
+ - If `n == 1` (in case of `sample(1)`), it returns a Vector of `size == 1` not a scalar.
733
+
734
+ ```ruby
735
+ v.sample(1)
736
+ # =>
737
+ #<RedAmber::Vector(:string, size=1):0x000000000001a3b0>
738
+ ["H"]
739
+ ```
740
+
741
+ Sample same size of self: every element is picked in random order.
742
+
743
+ ```ruby
744
+ v.sample(8)
745
+ # =>
746
+ #<RedAmber::Vector(:string, size=8):0x000000000001bda0>
747
+ ["H", "D", "B", "F", "E", "A", "G", "C"]
748
+ ```
749
+
750
+ Over sampling: "E" and "A" are sampled repeatedly.
751
+
752
+ ```ruby
753
+ v.sample(9)
754
+ # =>
755
+ #<RedAmber::Vector(:string, size=9):0x000000000001d790>
756
+ ["E", "E", "A", "D", "H", "C", "A", "F", "H"]
757
+ ```
758
+
759
+ #### `sample(prop)` : prop as a Float
760
+
761
+ Pick up elements by proportion `prop` at random.
762
+
763
+ - `prop` is proportion of elements to pick.
764
+ - `prop` is a positive Float.
765
+ - Absolute number of elements to pick:`prop*size` is rounded (by `half: :up`).
766
+ - If `prop` is smaller or equal to 1.0, elements are picked by non-repeating.
767
+ - If `prop` is greater than 1.0, some elements are picked repeatedly.
768
+ - Returns sampled elements by a Vector.
769
+ - If picked element is only one, it returns a Vector of `size == 1` not a scalar.
770
+
771
+ Sample same size of self: every element is picked in random order.
772
+
773
+ ```ruby
774
+ v.sample(1.0)
775
+ # =>
776
+ #<RedAmber::Vector(:string, size=8):0x000000000001bda0>
777
+ ["D", "H", "F", "C", "A", "B", "E", "G"]
778
+ ```
779
+
780
+ 2 times over sampling.
781
+
782
+ ```ruby
783
+ v.sample(2.0)
784
+ # =>
785
+ #<RedAmber::Vector(:string, size=16):0x00000000000233e8>
786
+ ["H", "B", "C", "B", "C", "A", "F", "A", "E", "C", "H", "F", "F", "A", ... ]
787
+ ```
788
+
789
+ ### `sort(integer_or_proportion)`
790
+
791
+ Arrange values in Vector.
792
+
793
+ - `:+`, `:ascending` or without argument will sort in increasing order.
794
+ - `:-` or `:descending` will sort in decreasing order.
795
+
796
+ ```ruby
797
+ Vector.new(%w[B D A E C]).sort
798
+ # same as #sort(:+)
799
+ # same as #sort(:ascending)
800
+ # =>
801
+ #<RedAmber::Vector(:string, size=5):0x000000000000c134>
802
+ ["A", "B", "C", "D", "E"]
803
+
804
+ Vector.new(%w[B D A E C]).sort(:-)
805
+ # same as #sort(:descending)
806
+ # =>
807
+ #<RedAmber::Vector(:string, size=5):0x000000000000c148>
808
+ ["E", "D", "C", "B", "A"]
809
+ ```
@@ -0,0 +1,6 @@
1
+ /* Override this file with custom rules */
2
+
3
+ /* Use monospace font for code */
4
+ code {
5
+ font-family: "Courier New", Consolas, monospace;
6
+ }