red_amber 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.devcontainer/Dockerfile +75 -0
- data/.devcontainer/devcontainer.json +38 -0
- data/.devcontainer/onCreateCommand.sh +26 -0
- data/.rubocop.yml +3 -3
- data/CHANGELOG.md +102 -18
- data/Gemfile +1 -1
- data/README.ja.md +51 -32
- data/README.md +46 -30
- data/Rakefile +55 -0
- data/doc/DataFrame_Comparison.md +9 -13
- data/doc/DataFrame_Comparison_ja.md +61 -0
- data/doc/Dev_Containers.ja.md +290 -0
- data/doc/Dev_Containers.md +292 -0
- data/doc/qmd/examples_of_red_amber.qmd +4596 -0
- data/doc/qmd/red-amber.qmd +90 -0
- data/docker/Dockerfile +2 -2
- data/docker/Gemfile +1 -1
- data/docker/docker-compose.yml +1 -1
- data/docker/readme.md +5 -5
- data/lib/red_amber/data_frame_displayable.rb +1 -1
- data/lib/red_amber/data_frame_loadsave.rb +1 -1
- data/lib/red_amber/data_frame_selectable.rb +2 -2
- data/lib/red_amber/data_frame_variable_operation.rb +6 -6
- data/lib/red_amber/group.rb +287 -39
- data/lib/red_amber/subframes.rb +6 -6
- data/lib/red_amber/vector.rb +2 -1
- data/lib/red_amber/vector_selectable.rb +68 -35
- data/lib/red_amber/vector_string_function.rb +81 -13
- data/lib/red_amber/version.rb +1 -1
- data/red_amber.gemspec +3 -3
- metadata +15 -11
- data/docker/Gemfile.lock +0 -118
- data/docker/example +0 -86
- data/docker/notebook/examples_of_red_amber.ipynb +0 -8562
- data/docker/notebook/red-amber.ipynb +0 -188
| @@ -0,0 +1,90 @@ | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            title: RedAmber Examples
         | 
| 3 | 
            +
            date: 2023-08-06
         | 
| 4 | 
            +
            author: heronshoes
         | 
| 5 | 
            +
            jupyter: ruby
         | 
| 6 | 
            +
            format:
         | 
| 7 | 
            +
              pdf:
         | 
| 8 | 
            +
                toc: true
         | 
| 9 | 
            +
            ---
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            This notebook walks through the [README of RedAmber](https://github.com/heronshoes/red_amber#readme).
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            ## `RedAmber::DataFrame`
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            ```{ruby}
         | 
| 16 | 
            +
            #| tags: []
         | 
| 17 | 
            +
            require 'red_amber'
         | 
| 18 | 
            +
            include RedAmber
         | 
| 19 | 
            +
            require 'datasets-arrow'
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            {RedAmber: VERSION, Datasets: Datasets::VERSION}
         | 
| 22 | 
            +
            ```
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            ## Example: diamonds dataset
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            For the first loading of Datasets::Diamonds, it will take some time to download.
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            ```{ruby}
         | 
| 29 | 
            +
            #| tags: []
         | 
| 30 | 
            +
            dataset = Datasets::Diamonds.new
         | 
| 31 | 
            +
            diamonds = DataFrame.new(dataset)
         | 
| 32 | 
            +
            ```
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            ```{ruby}
         | 
| 35 | 
            +
            #| tags: []
         | 
| 36 | 
            +
            df = diamonds
         | 
| 37 | 
            +
              .slice { carat > 1 } # or use #filter instead of #slice
         | 
| 38 | 
            +
              .group(:cut)
         | 
| 39 | 
            +
              .mean(:price) # `pick` prior to `group` is not required if `:price` is specified here.
         | 
| 40 | 
            +
              .sort('-mean(price)')
         | 
| 41 | 
            +
            ```
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            ```{ruby}
         | 
| 44 | 
            +
            #| tags: []
         | 
| 45 | 
            +
            usdjpy = 110.0 # when the yen was stronger
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            df.rename('mean(price)': :mean_price_USD)
         | 
| 48 | 
            +
              .assign(:mean_price_JPY) { mean_price_USD * usdjpy }
         | 
| 49 | 
            +
            ```
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            ## Example: starwars dataset
         | 
| 52 | 
            +
             | 
| 53 | 
            +
            ```{ruby}
         | 
| 54 | 
            +
            #| tags: []
         | 
| 55 | 
            +
            uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')
         | 
| 56 | 
            +
             | 
| 57 | 
            +
            starwars = DataFrame.load(uri)
         | 
| 58 | 
            +
            ```
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            ```{ruby}
         | 
| 61 | 
            +
            #| tags: []
         | 
| 62 | 
            +
            starwars
         | 
| 63 | 
            +
              .drop(0) # delete unnecessary index column
         | 
| 64 | 
            +
              .remove { species == "NA" } # delete unnecessary rows
         | 
| 65 | 
            +
              .group(:species) { [count(:species), mean(:height, :mass)] }
         | 
| 66 | 
            +
              .slice { count > 1 } # or use #filter instead of slice
         | 
| 67 | 
            +
            ```
         | 
| 68 | 
            +
             | 
| 69 | 
            +
            ## `RedAmber::Vector`
         | 
| 70 | 
            +
             | 
| 71 | 
            +
            ```{ruby}
         | 
| 72 | 
            +
            #| tags: []
         | 
| 73 | 
            +
            penguins = DataFrame.new(Datasets::Penguins.new)
         | 
| 74 | 
            +
            ```
         | 
| 75 | 
            +
             | 
| 76 | 
            +
            ```{ruby}
         | 
| 77 | 
            +
            #| tags: []
         | 
| 78 | 
            +
            penguins[:bill_length_mm]
         | 
| 79 | 
            +
            ```
         | 
| 80 | 
            +
             | 
| 81 | 
            +
            ```{ruby}
         | 
| 82 | 
            +
            #| tags: []
         | 
| 83 | 
            +
            penguins[:bill_length_mm] < 40
         | 
| 84 | 
            +
            ```
         | 
| 85 | 
            +
             | 
| 86 | 
            +
            ```{ruby}
         | 
| 87 | 
            +
            #| tags: []
         | 
| 88 | 
            +
            penguins[:bill_length_mm].mean
         | 
| 89 | 
            +
            ```
         | 
| 90 | 
            +
             | 
    
        data/docker/Dockerfile
    CHANGED
    
    
    
        data/docker/Gemfile
    CHANGED
    
    
    
        data/docker/docker-compose.yml
    CHANGED
    
    
    
        data/docker/readme.md
    CHANGED
    
    | @@ -6,12 +6,12 @@ This is a docker image containing RedAmber created from | |
| 6 6 | 
             
            ## Contents
         | 
| 7 7 |  | 
| 8 8 | 
             
            - From jupyter/minimal-notebook:
         | 
| 9 | 
            -
              - Based on 2023- | 
| 9 | 
            +
              - Based on 2023-05-15 (513d0cb8a67c)
         | 
| 10 10 | 
             
              - x86-64
         | 
| 11 11 | 
             
              - Ubuntu-22.04
         | 
| 12 | 
            -
              - python-3.10. | 
| 13 | 
            -
              - lab-3.6. | 
| 14 | 
            -
              - notebook-6.5. | 
| 12 | 
            +
              - python-3.10.11
         | 
| 13 | 
            +
              - lab-3.6.3
         | 
| 14 | 
            +
              - notebook-6.5.4
         | 
| 15 15 | 
             
            - System ruby-dev:
         | 
| 16 16 | 
             
              - Ruby 3.0.2
         | 
| 17 17 | 
             
            - Arrow 11.0.0 for Ubuntu:
         | 
| @@ -22,7 +22,7 @@ This is a docker image containing RedAmber created from | |
| 22 22 | 
             
            - Locally installed iruby:
         | 
| 23 23 | 
             
              - Using Ruby 3.0.2
         | 
| 24 24 | 
             
            - Locally installed bundler and Gemfile:
         | 
| 25 | 
            -
              - RedAmber 0. | 
| 25 | 
            +
              - RedAmber 0.5.0
         | 
| 26 26 | 
             
              - Others (see Gemfile)
         | 
| 27 27 |  | 
| 28 28 | 
             
            ## Install
         | 
| @@ -44,7 +44,7 @@ module RedAmber | |
| 44 44 | 
             
                  #   BUFFER
         | 
| 45 45 | 
             
                  #
         | 
| 46 46 | 
             
                  # @example Load from a Buffer skipping comment line
         | 
| 47 | 
            -
                  #   DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines:  | 
| 47 | 
            +
                  #   DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines: /\A#/)
         | 
| 48 48 | 
             
                  #     # comment
         | 
| 49 49 | 
             
                  #     name,age
         | 
| 50 50 | 
             
                  #     Yasuko,68
         | 
| @@ -39,7 +39,7 @@ module RedAmber | |
| 39 39 | 
             
                #     penguins[:bill_length_mm]
         | 
| 40 40 | 
             
                #
         | 
| 41 41 | 
             
                #     # =>
         | 
| 42 | 
            -
                #     #<RedAmber::Vector(:double, size=344): | 
| 42 | 
            +
                #     #<RedAmber::Vector(:double, size=344, chunked):0x0000000000008f0c>
         | 
| 43 43 | 
             
                #     [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
         | 
| 44 44 | 
             
                #
         | 
| 45 45 | 
             
                # @overload [](keys)
         | 
| @@ -173,7 +173,7 @@ module RedAmber | |
| 173 173 | 
             
                #   penguins.v(:bill_length_mm)
         | 
| 174 174 | 
             
                #
         | 
| 175 175 | 
             
                #   # =>
         | 
| 176 | 
            -
                #   #<RedAmber::Vector(:double, size=344): | 
| 176 | 
            +
                #   #<RedAmber::Vector(:double, size=344, chunked):0x0000000000008f0c>
         | 
| 177 177 | 
             
                #   [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
         | 
| 178 178 | 
             
                #
         | 
| 179 179 | 
             
                def v(key)
         | 
| @@ -44,7 +44,7 @@ module RedAmber | |
| 44 44 | 
             
                #     languages[:Language]
         | 
| 45 45 | 
             
                #
         | 
| 46 46 | 
             
                #     # =>
         | 
| 47 | 
            -
                #     #<RedAmber::Vector(:string, size=4):0x000000000010359c>
         | 
| 47 | 
            +
                #     #<RedAmber::Vector(:string, size=4, chunked):0x000000000010359c>
         | 
| 48 48 | 
             
                #     ["Ruby", "Python", "R", "Rust"]
         | 
| 49 49 | 
             
                #
         | 
| 50 50 | 
             
                # @overload pick(booleans)
         | 
| @@ -512,8 +512,8 @@ module RedAmber | |
| 512 512 | 
             
                #     1 Rui           49          78 (nil)
         | 
| 513 513 | 
             
                #     2 Hinata        28          57 Momotaro
         | 
| 514 514 | 
             
                #
         | 
| 515 | 
            -
                def assign( | 
| 516 | 
            -
                  assign_update( | 
| 515 | 
            +
                def assign(...)
         | 
| 516 | 
            +
                  assign_update(false, ...)
         | 
| 517 517 | 
             
                end
         | 
| 518 518 |  | 
| 519 519 | 
             
                # Assign new or updated variables (columns) and create an updated DataFrame.
         | 
| @@ -583,13 +583,13 @@ module RedAmber | |
| 583 583 | 
             
                #   @return [DataFrame]
         | 
| 584 584 | 
             
                #     assigned DataFrame.
         | 
| 585 585 | 
             
                #
         | 
| 586 | 
            -
                def assign_left( | 
| 587 | 
            -
                  assign_update( | 
| 586 | 
            +
                def assign_left(...)
         | 
| 587 | 
            +
                  assign_update(true, ...)
         | 
| 588 588 | 
             
                end
         | 
| 589 589 |  | 
| 590 590 | 
             
                private
         | 
| 591 591 |  | 
| 592 | 
            -
                def assign_update(*assigner,  | 
| 592 | 
            +
                def assign_update(append_to_left, *assigner, &block)
         | 
| 593 593 | 
             
                  if block
         | 
| 594 594 | 
             
                    assigner_from_block = instance_eval(&block)
         | 
| 595 595 | 
             
                    assigner =
         | 
    
        data/lib/red_amber/group.rb
    CHANGED
    
    | @@ -26,12 +26,7 @@ module RedAmber | |
| 26 26 | 
             
                  private
         | 
| 27 27 |  | 
| 28 28 | 
             
                  # @!macro [attach] define_group_aggregation
         | 
| 29 | 
            -
                  #    | 
| 30 | 
            -
                  #     Group aggregation function `$1`.
         | 
| 31 | 
            -
                  #     @param summary_keys [Array<Symbol, String>]
         | 
| 32 | 
            -
                  #       summary keys.
         | 
| 33 | 
            -
                  #     @return [DataFrame]
         | 
| 34 | 
            -
                  #       aggregated DataFrame
         | 
| 29 | 
            +
                  #   Returns aggregated DataFrame.
         | 
| 35 30 | 
             
                  #
         | 
| 36 31 | 
             
                  def define_group_aggregation(function)
         | 
| 37 32 | 
             
                    define_method(function) do |*summary_keys|
         | 
| @@ -55,7 +50,7 @@ module RedAmber | |
| 55 50 | 
             
                # @param group_keys [Array<Symbol, String>]
         | 
| 56 51 | 
             
                #   keys for grouping.
         | 
| 57 52 | 
             
                # @return [Group]
         | 
| 58 | 
            -
                #   Group object.
         | 
| 53 | 
            +
                #   Group object. It inspects grouped columns and its count.
         | 
| 59 54 | 
             
                # @example
         | 
| 60 55 | 
             
                #   Group.new(penguins, :species)
         | 
| 61 56 | 
             
                #
         | 
| @@ -79,13 +74,93 @@ module RedAmber | |
| 79 74 | 
             
                  @group = @dataframe.table.group(*@group_keys)
         | 
| 80 75 | 
             
                end
         | 
| 81 76 |  | 
| 82 | 
            -
                 | 
| 77 | 
            +
                # @!macro group_aggregation
         | 
| 78 | 
            +
                #   @param group_keys [Array<Symbol, String>]
         | 
| 79 | 
            +
                #     keys for grouping.
         | 
| 80 | 
            +
                #   @return [DataFrame]
         | 
| 81 | 
            +
                #     aggregated DataFrame
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                # Whether all elements in each group evaluate to true.
         | 
| 84 | 
            +
                #
         | 
| 85 | 
            +
                # @!method all(*group_keys)
         | 
| 86 | 
            +
                #   @macro group_aggregation
         | 
| 87 | 
            +
                #   @example For boolean columns by default.
         | 
| 88 | 
            +
                #     dataframe
         | 
| 89 | 
            +
                #
         | 
| 90 | 
            +
                #     # =>
         | 
| 91 | 
            +
                #     #<RedAmber::DataFrame : 6 x 3 Vectors, 0x00000000000230dc>
         | 
| 92 | 
            +
                #             x y        z
         | 
| 93 | 
            +
                #       <uint8> <string> <boolean>
         | 
| 94 | 
            +
                #     0       1 A        false
         | 
| 95 | 
            +
                #     1       2 A        true
         | 
| 96 | 
            +
                #     2       3 B        false
         | 
| 97 | 
            +
                #     3       4 B        (nil)
         | 
| 98 | 
            +
                #     4       5 B        true
         | 
| 99 | 
            +
                #     5       6 C        false
         | 
| 100 | 
            +
                #
         | 
| 101 | 
            +
                #     dataframe.group(:y).all
         | 
| 102 | 
            +
                #
         | 
| 103 | 
            +
                #     # =>
         | 
| 104 | 
            +
                #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000fc08>
         | 
| 105 | 
            +
                #       y        all(z)
         | 
| 106 | 
            +
                #       <string> <boolean>
         | 
| 107 | 
            +
                #     0 A        false
         | 
| 108 | 
            +
                #     1 B        false
         | 
| 109 | 
            +
                #     2 C        false
         | 
| 110 | 
            +
                #
         | 
| 111 | 
            +
                define_group_aggregation :all
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                # Whether any elements in each group evaluate to true.
         | 
| 114 | 
            +
                #
         | 
| 115 | 
            +
                # @!method any(*group_keys)
         | 
| 116 | 
            +
                #   @macro group_aggregation
         | 
| 117 | 
            +
                #   @example For boolean columns by default.
         | 
| 118 | 
            +
                #     dataframe.group(:y).any
         | 
| 119 | 
            +
                #
         | 
| 120 | 
            +
                #     # =>
         | 
| 121 | 
            +
                #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000000117ec>
         | 
| 122 | 
            +
                #       y        any(z)
         | 
| 123 | 
            +
                #       <string> <boolean>
         | 
| 124 | 
            +
                #     0 A        true
         | 
| 125 | 
            +
                #     1 B        true
         | 
| 126 | 
            +
                #     2 C        false
         | 
| 127 | 
            +
                #
         | 
| 128 | 
            +
                define_group_aggregation :any
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                # Count the number of non-nil values in each group.
         | 
| 131 | 
            +
                #   If counts are the same (and do not include NaN or nil),
         | 
| 132 | 
            +
                #   columns for counts are unified.
         | 
| 133 | 
            +
                #
         | 
| 134 | 
            +
                # @!method max(*group_keys)
         | 
| 135 | 
            +
                # @macro group_aggregation
         | 
| 136 | 
            +
                # @example Show counts for each group.
         | 
| 137 | 
            +
                #   dataframe.group(:y).count
         | 
| 138 | 
            +
                #
         | 
| 139 | 
            +
                #   # =>
         | 
| 140 | 
            +
                #   #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000011ea04>
         | 
| 141 | 
            +
                #     y        count(x) count(z)
         | 
| 142 | 
            +
                #     <string>  <int64>  <int64>
         | 
| 143 | 
            +
                #   0 A               2        2
         | 
| 144 | 
            +
                #   1 B               3        2
         | 
| 145 | 
            +
                #   2 C               1        1
         | 
| 146 | 
            +
                #
         | 
| 147 | 
            +
                #   dataframe.group(:z).count
         | 
| 148 | 
            +
                #   # same as dataframe.group(:z).count(:x, :y)
         | 
| 149 | 
            +
                #
         | 
| 150 | 
            +
                #   =>
         | 
| 151 | 
            +
                #   #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000122834>
         | 
| 152 | 
            +
                #     z           count
         | 
| 153 | 
            +
                #     <boolean> <int64>
         | 
| 154 | 
            +
                #   0 false           3
         | 
| 155 | 
            +
                #   1 true            2
         | 
| 156 | 
            +
                #   2 (nil)           1
         | 
| 157 | 
            +
                #
         | 
| 158 | 
            +
                define_group_aggregation :count
         | 
| 83 159 | 
             
                alias_method :__count, :count
         | 
| 84 160 | 
             
                private :__count
         | 
| 85 161 |  | 
| 86 | 
            -
                def count(* | 
| 87 | 
            -
                  df = __count( | 
| 88 | 
            -
                  # if counts are the same (and do not include NaN or nil), aggregate count columns.
         | 
| 162 | 
            +
                def count(*group_keys)
         | 
| 163 | 
            +
                  df = __count(group_keys)
         | 
| 89 164 | 
             
                  if df.pick(@group_keys.size..).to_h.values.uniq.size == 1
         | 
| 90 165 | 
             
                    df.pick(0..@group_keys.size).rename { [keys[-1], :count] }
         | 
| 91 166 | 
             
                  else
         | 
| @@ -93,19 +168,213 @@ module RedAmber | |
| 93 168 | 
             
                  end
         | 
| 94 169 | 
             
                end
         | 
| 95 170 |  | 
| 96 | 
            -
                 | 
| 171 | 
            +
                # Returns each record group size as a DataFrame.
         | 
| 172 | 
            +
                #
         | 
| 173 | 
            +
                # @return [DataFrame]
         | 
| 174 | 
            +
                #   DataFrame consists of:
         | 
| 175 | 
            +
                #   - Group key columns.
         | 
| 176 | 
            +
                #   - Result columns by group aggregation.
         | 
| 177 | 
            +
                # @example
         | 
| 178 | 
            +
                #   penguins.group(:species).group_count
         | 
| 179 | 
            +
                #
         | 
| 180 | 
            +
                #   # =>
         | 
| 181 | 
            +
                #   #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003a70>
         | 
| 182 | 
            +
                #     species   group_count
         | 
| 183 | 
            +
                #     <string>      <uint8>
         | 
| 184 | 
            +
                #   0 Adelie            152
         | 
| 185 | 
            +
                #   1 Chinstrap          68
         | 
| 186 | 
            +
                #   2 Gentoo            124
         | 
| 187 | 
            +
                #
         | 
| 188 | 
            +
                def group_count
         | 
| 189 | 
            +
                  DataFrame.create(group_table)
         | 
| 190 | 
            +
                end
         | 
| 191 | 
            +
                alias_method :count_all, :group_count
         | 
| 192 | 
            +
             | 
| 193 | 
            +
                # Count the unique values in each group.
         | 
| 194 | 
            +
                #
         | 
| 195 | 
            +
                # @!method count_uniq(*group_keys)
         | 
| 196 | 
            +
                # @macro group_aggregation
         | 
| 197 | 
            +
                # @example Show counts for each group.
         | 
| 198 | 
            +
                #   dataframe.group(:y).count_uniq
         | 
| 199 | 
            +
                #
         | 
| 200 | 
            +
                #   # =>
         | 
| 201 | 
            +
                #   #<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000011ea04>
         | 
| 202 | 
            +
                #     y        count_uniq(x)
         | 
| 203 | 
            +
                #     <string>       <int64>
         | 
| 204 | 
            +
                #   0 A                    2
         | 
| 205 | 
            +
                #   1 B                    3
         | 
| 206 | 
            +
                #   2 C                    1
         | 
| 207 | 
            +
                #
         | 
| 208 | 
            +
                define_group_aggregation :count_distinct
         | 
| 209 | 
            +
                def count_uniq(*group_keys)
         | 
| 210 | 
            +
                  df = count_distinct(*group_keys)
         | 
| 211 | 
            +
                  df.rename do
         | 
| 212 | 
            +
                    keys_org = keys.select { _1.start_with?('count_distinct') }
         | 
| 213 | 
            +
                    keys_renamed = keys_org.map { _1.to_s.gsub('distinct', 'uniq') }
         | 
| 214 | 
            +
                    keys_org.zip keys_renamed
         | 
| 215 | 
            +
                  end
         | 
| 216 | 
            +
                end
         | 
| 217 | 
            +
             | 
| 218 | 
            +
                # Compute maximum of values in each group for numeric columns.
         | 
| 219 | 
            +
                #
         | 
| 220 | 
            +
                # @!method max(*group_keys)
         | 
| 221 | 
            +
                #   @macro group_aggregation
         | 
| 222 | 
            +
                #   @example
         | 
| 223 | 
            +
                #     dataframe.group(:y).max
         | 
| 224 | 
            +
                #
         | 
| 225 | 
            +
                #     # =>
         | 
| 226 | 
            +
                #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000014ae74>
         | 
| 227 | 
            +
                #       y         max(x)
         | 
| 228 | 
            +
                #       <string> <uint8>
         | 
| 229 | 
            +
                #     0 A              2
         | 
| 230 | 
            +
                #     1 B              5
         | 
| 231 | 
            +
                #     2 C              6
         | 
| 232 | 
            +
                #
         | 
| 233 | 
            +
                define_group_aggregation :max
         | 
| 234 | 
            +
             | 
| 235 | 
            +
                # Compute mean of values in each group for numeric columns.
         | 
| 236 | 
            +
                #
         | 
| 237 | 
            +
                # @!method mean(*group_keys)
         | 
| 238 | 
            +
                #   @macro group_aggregation
         | 
| 239 | 
            +
                #   @example
         | 
| 240 | 
            +
                #     dataframe.group(:y).mean
         | 
| 241 | 
            +
                #
         | 
| 242 | 
            +
                #     # =>
         | 
| 243 | 
            +
                #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000138a8>
         | 
| 244 | 
            +
                #       y         mean(x)
         | 
| 245 | 
            +
                #       <string> <double>
         | 
| 246 | 
            +
                #     0 A             1.5
         | 
| 247 | 
            +
                #     1 B             4.0
         | 
| 248 | 
            +
                #     2 C             6.0
         | 
| 249 | 
            +
                #
         | 
| 250 | 
            +
                define_group_aggregation :mean
         | 
| 251 | 
            +
             | 
| 252 | 
            +
                # Compute median of values in each group for numeric columns.
         | 
| 253 | 
            +
                #
         | 
| 254 | 
            +
                # @!method median(*group_keys)
         | 
| 255 | 
            +
                #   @macro group_aggregation
         | 
| 256 | 
            +
                #   @example
         | 
| 257 | 
            +
                #     dataframe.group(:y).median
         | 
| 258 | 
            +
                #
         | 
| 259 | 
            +
                #     # =>
         | 
| 260 | 
            +
                #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000138a8>
         | 
| 261 | 
            +
                #       y        median(x)
         | 
| 262 | 
            +
                #       <string>  <double>
         | 
| 263 | 
            +
                #     0 A              1.5
         | 
| 264 | 
            +
                #     1 B              4.0
         | 
| 265 | 
            +
                #     2 C              6.0
         | 
| 266 | 
            +
                #
         | 
| 267 | 
            +
                define_group_aggregation :approximate_median
         | 
| 268 | 
            +
                def median(*group_keys)
         | 
| 269 | 
            +
                  df = approximate_median(*group_keys)
         | 
| 270 | 
            +
                  df.rename do
         | 
| 271 | 
            +
                    keys_org = keys.select { _1.start_with?('approximate_') }
         | 
| 272 | 
            +
                    keys_renamed = keys_org.map { _1.to_s.delete_prefix('approximate_') }
         | 
| 273 | 
            +
                    keys_org.zip keys_renamed
         | 
| 274 | 
            +
                  end
         | 
| 275 | 
            +
                end
         | 
| 97 276 |  | 
| 98 | 
            -
                 | 
| 277 | 
            +
                # Compute minimum of values in each group for numeric columns.
         | 
| 278 | 
            +
                #
         | 
| 279 | 
            +
                # @!method min(*group_keys)
         | 
| 280 | 
            +
                #   @macro group_aggregation
         | 
| 281 | 
            +
                #   @example
         | 
| 282 | 
            +
                #     dataframe.group(:y).min
         | 
| 283 | 
            +
                #
         | 
| 284 | 
            +
                #     # =>
         | 
| 285 | 
            +
                #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000018f38>
         | 
| 286 | 
            +
                #       y         min(x)
         | 
| 287 | 
            +
                #       <string> <uint8>
         | 
| 288 | 
            +
                #     0 A              1
         | 
| 289 | 
            +
                #     1 B              3
         | 
| 290 | 
            +
                #     2 C              6
         | 
| 291 | 
            +
                #
         | 
| 292 | 
            +
                define_group_aggregation :min
         | 
| 99 293 |  | 
| 100 | 
            -
                 | 
| 294 | 
            +
                # Get one value from each group.
         | 
| 295 | 
            +
                #
         | 
| 296 | 
            +
                # @!method one(*group_keys)
         | 
| 297 | 
            +
                #   @macro group_aggregation
         | 
| 298 | 
            +
                #   @example
         | 
| 299 | 
            +
                #     dataframe.group(:y).one
         | 
| 300 | 
            +
                #
         | 
| 301 | 
            +
                #     # =>
         | 
| 302 | 
            +
                #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000002885c>
         | 
| 303 | 
            +
                #       y         one(x)
         | 
| 304 | 
            +
                #       <string> <uint8>
         | 
| 305 | 
            +
                #     0 A              1
         | 
| 306 | 
            +
                #     1 B              3
         | 
| 307 | 
            +
                #     2 C              6
         | 
| 308 | 
            +
                #
         | 
| 309 | 
            +
                define_group_aggregation :one
         | 
| 101 310 |  | 
| 102 | 
            -
                 | 
| 311 | 
            +
                # Compute product of values in each group for numeric columns.
         | 
| 312 | 
            +
                #
         | 
| 313 | 
            +
                # @!method product(*group_keys)
         | 
| 314 | 
            +
                #   @macro group_aggregation
         | 
| 315 | 
            +
                #   @example
         | 
| 316 | 
            +
                #     dataframe.group(:y).product
         | 
| 317 | 
            +
                #
         | 
| 318 | 
            +
                #     # =>
         | 
| 319 | 
            +
                #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000021a84>
         | 
| 320 | 
            +
                #       y        product(x)
         | 
| 321 | 
            +
                #       <string>   <uint64>
         | 
| 322 | 
            +
                #     0 A                 2
         | 
| 323 | 
            +
                #     1 B                60
         | 
| 324 | 
            +
                #     2 C                 6
         | 
| 325 | 
            +
                #
         | 
| 326 | 
            +
                define_group_aggregation :product
         | 
| 103 327 |  | 
| 104 | 
            -
                 | 
| 328 | 
            +
                # Compute standard deviation of values in each group for numeric columns.
         | 
| 329 | 
            +
                #
         | 
| 330 | 
            +
                # @!method stddev(*group_keys)
         | 
| 331 | 
            +
                #   @macro group_aggregation
         | 
| 332 | 
            +
                #   @example
         | 
| 333 | 
            +
                #     dataframe.group(:y).stddev
         | 
| 334 | 
            +
                #
         | 
| 335 | 
            +
                #     # =>
         | 
| 336 | 
            +
                #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000002be6c>
         | 
| 337 | 
            +
                #       y        stddev(x)
         | 
| 338 | 
            +
                #       <string>  <double>
         | 
| 339 | 
            +
                #     0 A              0.5
         | 
| 340 | 
            +
                #     1 B            0.082
         | 
| 341 | 
            +
                #     2 C              0.0
         | 
| 342 | 
            +
                #
         | 
| 343 | 
            +
                define_group_aggregation :stddev
         | 
| 105 344 |  | 
| 106 | 
            -
                 | 
| 345 | 
            +
                # Compute sum of values in each group for numeric columns.
         | 
| 346 | 
            +
                #
         | 
| 347 | 
            +
                # @!method sum(*group_keys)
         | 
| 348 | 
            +
                #   @macro group_aggregation
         | 
| 349 | 
            +
                #   @example
         | 
| 350 | 
            +
                #     dataframe.group(:y).sum
         | 
| 351 | 
            +
                #
         | 
| 352 | 
            +
                #     # =>
         | 
| 353 | 
            +
                #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000032a14>
         | 
| 354 | 
            +
                #       y          sum(x)
         | 
| 355 | 
            +
                #       <string> <uint64>
         | 
| 356 | 
            +
                #     0 A               3
         | 
| 357 | 
            +
                #     1 B              12
         | 
| 358 | 
            +
                #     2 C               6
         | 
| 359 | 
            +
                #
         | 
| 360 | 
            +
                define_group_aggregation :sum
         | 
| 107 361 |  | 
| 108 | 
            -
                 | 
| 362 | 
            +
                # Compute variance of values in each group for numeric columns.
         | 
| 363 | 
            +
                #
         | 
| 364 | 
            +
                # @!method variance(*group_keys)
         | 
| 365 | 
            +
                #   @macro group_aggregation
         | 
| 366 | 
            +
                #   @example
         | 
| 367 | 
            +
                #     dataframe.group(:y).variance
         | 
| 368 | 
            +
                #
         | 
| 369 | 
            +
                #     # =>
         | 
| 370 | 
            +
                #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x00000000003b1dc>
         | 
| 371 | 
            +
                #       y        variance(x)
         | 
| 372 | 
            +
                #       <string>    <double>
         | 
| 373 | 
            +
                #     0 A               0.25
         | 
| 374 | 
            +
                #     1 B              0.067
         | 
| 375 | 
            +
                #     2 C                0.0
         | 
| 376 | 
            +
                #
         | 
| 377 | 
            +
                define_group_aggregation :variance
         | 
| 109 378 |  | 
| 110 379 | 
             
                # Returns Array of boolean filters to select each records in the Group.
         | 
| 111 380 | 
             
                #
         | 
| @@ -168,27 +437,6 @@ module RedAmber | |
| 168 437 | 
             
                  @filters.size
         | 
| 169 438 | 
             
                end
         | 
| 170 439 |  | 
| 171 | 
            -
                # Returns each record group size as a DataFrame.
         | 
| 172 | 
            -
                #
         | 
| 173 | 
            -
                # @return [DataFrame]
         | 
| 174 | 
            -
                #   DataFrame consists of:
         | 
| 175 | 
            -
                #   - Group key columns.
         | 
| 176 | 
            -
                #   - Result columns by group aggregation.
         | 
| 177 | 
            -
                # @example
         | 
| 178 | 
            -
                #   penguins.group(:species).group_count
         | 
| 179 | 
            -
                #
         | 
| 180 | 
            -
                #   # =>
         | 
| 181 | 
            -
                #   #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003a70>
         | 
| 182 | 
            -
                #     species   group_count
         | 
| 183 | 
            -
                #     <string>      <uint8>
         | 
| 184 | 
            -
                #   0 Adelie            152
         | 
| 185 | 
            -
                #   1 Chinstrap          68
         | 
| 186 | 
            -
                #   2 Gentoo            124
         | 
| 187 | 
            -
                #
         | 
| 188 | 
            -
                def group_count
         | 
| 189 | 
            -
                  DataFrame.create(group_table)
         | 
| 190 | 
            -
                end
         | 
| 191 | 
            -
             | 
| 192 440 | 
             
                # String representation of self.
         | 
| 193 441 | 
             
                #
         | 
| 194 442 | 
             
                # @return [String]
         | 
    
        data/lib/red_amber/subframes.rb
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 2 |  | 
| 3 3 | 
             
            module RedAmber
         | 
| 4 | 
            -
              # class SubFrames treats  | 
| 4 | 
            +
              # class SubFrames treats subsets of a DataFrame
         | 
| 5 5 | 
             
              # [Experimental feature] Class SubFrames may be removed or be changed in the future.
         | 
| 6 6 | 
             
              class SubFrames
         | 
| 7 7 | 
             
                include Enumerable # may change to use Forwardable.
         | 
| @@ -434,7 +434,7 @@ module RedAmber | |
| 434 434 | 
             
                #   @return [DataFrame]
         | 
| 435 435 | 
             
                #     created DataFrame.
         | 
| 436 436 | 
             
                #   @example Aggregate by key labels in arguments and values from block.
         | 
| 437 | 
            -
                #     subframes.aggregate(:y, :sum_x) { [y. | 
| 437 | 
            +
                #     subframes.aggregate(:y, :sum_x) { [y.one, x.sum] }
         | 
| 438 438 | 
             
                #
         | 
| 439 439 | 
             
                #     # =>
         | 
| 440 440 | 
             
                #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
         | 
| @@ -445,7 +445,7 @@ module RedAmber | |
| 445 445 | 
             
                #     2 C              6
         | 
| 446 446 | 
             
                #
         | 
| 447 447 | 
             
                #   @example Aggregate by key labels in an Array and values from block.
         | 
| 448 | 
            -
                #     subframes.aggregate([:y, :sum_x]) { [y. | 
| 448 | 
            +
                #     subframes.aggregate([:y, :sum_x]) { [y.one, x.sum] }
         | 
| 449 449 | 
             
                #
         | 
| 450 450 | 
             
                #     # =>
         | 
| 451 451 | 
             
                #     #<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000003b24>
         | 
| @@ -457,7 +457,7 @@ module RedAmber | |
| 457 457 | 
             
                #
         | 
| 458 458 | 
             
                # @overload aggregate
         | 
| 459 459 | 
             
                #
         | 
| 460 | 
            -
                #   Aggregate SubFrames creating DataFrame with pairs of key and aggregated  | 
| 460 | 
            +
                #   Aggregate SubFrames creating DataFrame with pairs of key and aggregated values
         | 
| 461 461 | 
             
                #   in Hash from the block.
         | 
| 462 462 | 
             
                #
         | 
| 463 463 | 
             
                #   @yieldparam dataframe [DataFrame]
         | 
| @@ -470,7 +470,7 @@ module RedAmber | |
| 470 470 | 
             
                #     created DataFrame.
         | 
| 471 471 | 
             
                #   @example Aggregate by key and value pairs from block.
         | 
| 472 472 | 
             
                #     subframes.aggregate do
         | 
| 473 | 
            -
                #       { y: y. | 
| 473 | 
            +
                #       { y: y.one, sum_x: x.sum }
         | 
| 474 474 | 
             
                #     end
         | 
| 475 475 | 
             
                #
         | 
| 476 476 | 
             
                #     # =>
         | 
| @@ -712,7 +712,7 @@ module RedAmber | |
| 712 712 | 
             
                #   @example
         | 
| 713 713 | 
             
                #     subframes.assign(:sum_x, :frac_x) do
         | 
| 714 714 | 
             
                #       group_sum = x.sum
         | 
| 715 | 
            -
                #       [[group_sum] * size, x /  | 
| 715 | 
            +
                #       [[group_sum] * size, x / group_sum.to_f]
         | 
| 716 716 | 
             
                #     end
         | 
| 717 717 | 
             
                #
         | 
| 718 718 | 
             
                #     # =>
         | 
    
        data/lib/red_amber/vector.rb
    CHANGED
    
    | @@ -180,7 +180,8 @@ module RedAmber | |
| 180 180 | 
             
                    end
         | 
| 181 181 | 
             
                    sio << ']'
         | 
| 182 182 |  | 
| 183 | 
            -
                     | 
| 183 | 
            +
                    chunked = chunked? ? ', chunked' : ''
         | 
| 184 | 
            +
                    format "#<#{self.class}(:#{type}, size=#{size}#{chunked}):0x%016x>\n%s\n",
         | 
| 184 185 | 
             
                           object_id, sio.string
         | 
| 185 186 | 
             
                  end
         | 
| 186 187 | 
             
                end
         |