red_amber 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +7 -1
- data/.rubocop_todo.yml +2 -15
- data/.yardopts +1 -0
- data/CHANGELOG.md +35 -0
- data/Gemfile +1 -0
- data/README.md +206 -16
- data/doc/DataFrame.md +63 -73
- data/doc/Vector.md +25 -0
- data/doc/{47_examples_of_red_amber.ipynb → examples_of_red_amber.ipynb} +693 -111
- data/lib/red_amber/data_frame.rb +26 -8
- data/lib/red_amber/data_frame_displayable.rb +7 -5
- data/lib/red_amber/group.rb +25 -27
- data/lib/red_amber/vector_selectable.rb +2 -0
- data/lib/red_amber/vector_updatable.rb +22 -1
- data/lib/red_amber/version.rb +1 -1
- metadata +4 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 3853e70f378cac65013a3bcfc51a2d55cb70cc494f3f3b70675bed944cc15b49
         | 
| 4 | 
            +
              data.tar.gz: 3c65999cf978f1edf8c2c7fcce9a0ccb192d4da051f34fa0bf3f66ddc178eb1c
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: fac66ba0bf5955cfe0d21a51b90ec16407182b9053e9b586dfe9f8e2526de4e90efecdd8eba1e8b3c99b12fc44544c82fb2f6af4b666b97876a64a6ee4deedf1
         | 
| 7 | 
            +
              data.tar.gz: 1a4cc526ce9f097438f2b7d018552a4cd6aaa2d900012297cd1777c4b9e39063cc2988af91c138e93f291a56175aefb6a6b00c211f9b9c5bd38d75d6bc40acb9
         | 
    
        data/.rubocop.yml
    CHANGED
    
    | @@ -43,6 +43,11 @@ Lint/BinaryOperatorWithIdenticalOperands: | |
| 43 43 | 
             
              Exclude:
         | 
| 44 44 | 
             
                - 'test/test_vector_function.rb'
         | 
| 45 45 |  | 
| 46 | 
            +
            # Need for test with empty block
         | 
| 47 | 
            +
            Lint/EmptyBlock:
         | 
| 48 | 
            +
              Exclude:
         | 
| 49 | 
            +
                - 'test/test_group.rb'
         | 
| 50 | 
            +
             | 
| 46 51 | 
             
            # Max: 120
         | 
| 47 52 | 
             
            Layout/LineLength:
         | 
| 48 53 | 
             
              Max: 118
         | 
| @@ -78,9 +83,10 @@ Metrics/ClassLength: | |
| 78 83 | 
             
            Metrics/CyclomaticComplexity:
         | 
| 79 84 | 
             
              Max: 12
         | 
| 80 85 | 
             
              Exclude:
         | 
| 86 | 
            +
                - 'lib/red_amber/data_frame_displayable.rb' # Max: 18
         | 
| 81 87 | 
             
                - 'lib/red_amber/data_frame_selectable.rb' # Max: 14
         | 
| 88 | 
            +
                - 'lib/red_amber/vector_selectable.rb' # Max: 13
         | 
| 82 89 | 
             
                - 'lib/red_amber/vector_updatable.rb' # Max: 14
         | 
| 83 | 
            -
                - 'lib/red_amber/data_frame_displayable.rb' # Max: 18
         | 
| 84 90 |  | 
| 85 91 | 
             
            # Max: 10
         | 
| 86 92 | 
             
            Metrics/MethodLength:
         | 
    
        data/.rubocop_todo.yml
    CHANGED
    
    | @@ -1,15 +1,2 @@ | |
| 1 | 
            -
            #  | 
| 2 | 
            -
            #  | 
| 3 | 
            -
            # on 2022-05-08 02:37:36 UTC using RuboCop version 1.27.0.
         | 
| 4 | 
            -
            # The point is for the user to remove these configuration records
         | 
| 5 | 
            -
            # one by one as the offenses are removed from the code base.
         | 
| 6 | 
            -
            # Note that changes in the inspected code, or installation of new
         | 
| 7 | 
            -
            # versions of RuboCop, may require this file to be generated again.
         | 
| 8 | 
            -
             | 
| 9 | 
            -
            # Offense count: 1
         | 
| 10 | 
            -
            # This cop supports unsafe auto-correction (--auto-correct-all).
         | 
| 11 | 
            -
            # Configuration parameters: EnforcedStyle.
         | 
| 12 | 
            -
            # SupportedStyles: forbid_for_all_comparison_operators, forbid_for_equality_operators_only, require_for_all_comparison_operators, require_for_equality_operators_only
         | 
| 13 | 
            -
            Style/YodaCondition:
         | 
| 14 | 
            -
              Exclude:
         | 
| 15 | 
            -
                - 'lib/red_amber/data_frame.rb'
         | 
| 1 | 
            +
            # We will use cops to detect bugs in an early stage
         | 
| 2 | 
            +
            # Feel free to use .rubocop_todo.yml by --auto-gen-config
         | 
    
        data/.yardopts
    ADDED
    
    | @@ -0,0 +1 @@ | |
| 1 | 
            +
            --output-dir doc/yard
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    | @@ -2,6 +2,41 @@ | |
| 2 2 |  | 
| 3 3 | 
             
            - Supports Arrow 9.0.0
         | 
| 4 4 |  | 
| 5 | 
            +
            ## [0.1.8] - 2022-08-04 (experimental)
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            - Bug fixes
         | 
| 8 | 
            +
             | 
| 9 | 
            +
              - Fix unnamed column in table formatter (#52)
         | 
| 10 | 
            +
              - Fix DataFrame#key?, DataFrame#key_index when @keys.nil? (#52)
         | 
| 11 | 
            +
              - Align order of replacer in Vector#replace (#53, resolved #38)
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            - New features and improvements
         | 
| 14 | 
            +
             | 
| 15 | 
            +
              - Refine DataFrame.new for empty arguments (#50)
         | 
| 16 | 
            +
                - Delete .rubocop_todo.yml for not to use yoda condition (#50)
         | 
| 17 | 
            +
             | 
| 18 | 
            +
              - Refine Group (#52, resolved #28)
         | 
| 19 | 
            +
                - Refine Group methods creation
         | 
| 20 | 
            +
                - Make group key at first(left)
         | 
| 21 | 
            +
                - Show only one group count when same counts
         | 
| 22 | 
            +
                - Add block acceptability for group
         | 
| 23 | 
            +
                - Rename empty key to :unnamed in DataFrame.new
         | 
| 24 | 
            +
                - Rename Group#aggregated_by to #summarize (#54)
         | 
| 25 | 
            +
             | 
| 26 | 
            +
              - Add Vector#shift (#51)
         | 
| 27 | 
            +
             | 
| 28 | 
            +
              - Vector#[] accepts Range as an argument (#51)
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            - Update documents
         | 
| 31 | 
            +
             | 
| 32 | 
            +
              - Add support for yard (#54)
         | 
| 33 | 
            +
             | 
| 34 | 
            +
              - Renew jupyter notebook '53 examples' (#54)
         | 
| 35 | 
            +
             | 
| 36 | 
            +
              - Add more examples and images in README (#52)
         | 
| 37 | 
            +
              - Add document of group manipulations in README (#52)
         | 
| 38 | 
            +
              - Renew DF#group document in DataFrame.md (#52)
         | 
| 39 | 
            +
             | 
| 5 40 | 
             
            ## [0.1.7] - 2022-07-15 (experimental)
         | 
| 6 41 |  | 
| 7 42 | 
             
            - Bug fixes
         | 
    
        data/Gemfile
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -56,7 +56,7 @@ require 'red_amber' # require 'red-amber' is also OK. | |
| 56 56 | 
             
            require 'datasets-arrow'
         | 
| 57 57 |  | 
| 58 58 | 
             
            arrow = Datasets::Penguins.new.to_arrow
         | 
| 59 | 
            -
            RedAmber::DataFrame.new(arrow)
         | 
| 59 | 
            +
            penguins = RedAmber::DataFrame.new(arrow)
         | 
| 60 60 |  | 
| 61 61 | 
             
            # =>
         | 
| 62 62 | 
             
            #<RedAmber::DataFrame : 344 x 8 Vectors, 0x0000000000013790>
         | 
| @@ -78,28 +78,71 @@ RedAmber::DataFrame.new(arrow) | |
| 78 78 |  | 
| 79 79 | 
             
            For example, `DataFrame#pick` accepts keys as an argument and returns a sub DataFrame.
         | 
| 80 80 |  | 
| 81 | 
            +
            
         | 
| 82 | 
            +
             | 
| 81 83 | 
             
            ```ruby
         | 
| 82 | 
            -
             | 
| 84 | 
            +
            penguins.keys
         | 
| 85 | 
            +
            # =>
         | 
| 86 | 
            +
            [:species,                                       
         | 
| 87 | 
            +
             :island,                                        
         | 
| 88 | 
            +
             :bill_length_mm,
         | 
| 89 | 
            +
             :bill_depth_mm,
         | 
| 90 | 
            +
             :flipper_length_mm,
         | 
| 91 | 
            +
             :body_mass_g,
         | 
| 92 | 
            +
             :sex,
         | 
| 93 | 
            +
             :year]
         | 
| 94 | 
            +
             | 
| 95 | 
            +
            df = penguins.pick(:species, :island, :body_mass_g)
         | 
| 83 96 | 
             
            df
         | 
| 84 97 |  | 
| 85 98 | 
             
            # =>
         | 
| 86 | 
            -
            #<RedAmber::DataFrame : 344 x  | 
| 87 | 
            -
                body_mass_g
         | 
| 88 | 
            -
             | 
| 89 | 
            -
              1        3750
         | 
| 90 | 
            -
              2        3800
         | 
| 91 | 
            -
              3        3250
         | 
| 92 | 
            -
              4       (nil)
         | 
| 93 | 
            -
              5        3450
         | 
| 94 | 
            -
              : | 
| 95 | 
            -
            342 | 
| 96 | 
            -
            343 | 
| 99 | 
            +
            #<RedAmber::DataFrame : 344 x 3 Vectors, 0x000000000003cc1c>                 
         | 
| 100 | 
            +
                species  island    body_mass_g                                           
         | 
| 101 | 
            +
                <string> <string>     <uint16>                                           
         | 
| 102 | 
            +
              1 Adelie   Torgersen        3750                                           
         | 
| 103 | 
            +
              2 Adelie   Torgersen        3800                                           
         | 
| 104 | 
            +
              3 Adelie   Torgersen        3250                                           
         | 
| 105 | 
            +
              4 Adelie   Torgersen       (nil)                                           
         | 
| 106 | 
            +
              5 Adelie   Torgersen        3450                                           
         | 
| 107 | 
            +
              : :        :                   :                                           
         | 
| 108 | 
            +
            342 Gentoo   Biscoe           5750                                           
         | 
| 109 | 
            +
            343 Gentoo   Biscoe           5200                                           
         | 
| 110 | 
            +
            344 Gentoo   Biscoe           5400
         | 
| 111 | 
            +
            ```
         | 
| 112 | 
            +
             | 
| 113 | 
            +
            `DataFrame#drop` drops some columns to create a remainer DataFrame.
         | 
| 114 | 
            +
             | 
| 115 | 
            +
            
         | 
| 116 | 
            +
             | 
| 117 | 
            +
            You can specify by keys or a boolean array (same size as n_keys).
         | 
| 118 | 
            +
             | 
| 119 | 
            +
            ```ruby
         | 
| 120 | 
            +
            # Same as df.drop(:species, :island)
         | 
| 121 | 
            +
            df = df.drop(true, true, false)
         | 
| 122 | 
            +
             | 
| 123 | 
            +
            # =>
         | 
| 124 | 
            +
            #<RedAmber::DataFrame : 344 x 1 Vector, 0x0000000000048760>
         | 
| 125 | 
            +
                body_mass_g                                     
         | 
| 126 | 
            +
                   <uint16>                                     
         | 
| 127 | 
            +
              1        3750                                     
         | 
| 128 | 
            +
              2        3800                                     
         | 
| 129 | 
            +
              3        3250                                     
         | 
| 130 | 
            +
              4       (nil)                                     
         | 
| 131 | 
            +
              5        3450                                     
         | 
| 132 | 
            +
              :           :                                     
         | 
| 133 | 
            +
            342        5750                                     
         | 
| 134 | 
            +
            343        5200                                     
         | 
| 97 135 | 
             
            344        5400
         | 
| 98 136 | 
             
            ```
         | 
| 99 137 |  | 
| 138 | 
            +
            Arrow data is immutable, so these methods always return an new object.
         | 
| 139 | 
            +
             | 
| 100 140 | 
             
            `DataFrame#assign` creates new variables (column in the table).
         | 
| 101 141 |  | 
| 142 | 
            +
            
         | 
| 143 | 
            +
             | 
| 102 144 | 
             
            ```ruby
         | 
| 145 | 
            +
            # New column is created because ':body_mass_kg' is a new key.
         | 
| 103 146 | 
             
            df.assign(:body_mass_kg => df[:body_mass_g] / 1000.0)
         | 
| 104 147 |  | 
| 105 148 | 
             
            # =>
         | 
| @@ -117,12 +160,97 @@ df.assign(:body_mass_kg => df[:body_mass_g] / 1000.0) | |
| 117 160 | 
             
            344        5400          5.4
         | 
| 118 161 | 
             
            ```
         | 
| 119 162 |  | 
| 163 | 
            +
            `DataFrame#slice` selects rows (observations) to create a sub DataFrame.
         | 
| 164 | 
            +
             | 
| 165 | 
            +
            
         | 
| 166 | 
            +
             | 
| 167 | 
            +
            ```ruby
         | 
| 168 | 
            +
            # returns 5 rows at the start and 5 rows from the end
         | 
| 169 | 
            +
            penguins.slice(0...5, -5..-1)
         | 
| 170 | 
            +
             | 
| 171 | 
            +
            # =>
         | 
| 172 | 
            +
            #<RedAmber::DataFrame : 10 x 8 Vectors, 0x0000000000042be4>
         | 
| 173 | 
            +
               species  island    bill_length_mm bill_depth_mm flipper_length_mm ...     year
         | 
| 174 | 
            +
               <string> <string>        <double>      <double>           <uint8> ... <uint16>
         | 
| 175 | 
            +
             1 Adelie   Torgersen           39.1          18.7               181 ...     2007
         | 
| 176 | 
            +
             2 Adelie   Torgersen           39.5          17.4               186 ...     2007
         | 
| 177 | 
            +
             3 Adelie   Torgersen           40.3          18.0               195 ...     2007
         | 
| 178 | 
            +
             4 Adelie   Torgersen          (nil)         (nil)             (nil) ...     2007
         | 
| 179 | 
            +
             5 Adelie   Torgersen           36.7          19.3               193 ...     2007
         | 
| 180 | 
            +
             : :        :                      :             :                 : ...        :
         | 
| 181 | 
            +
             8 Gentoo   Biscoe              50.4          15.7               222 ...     2009
         | 
| 182 | 
            +
             9 Gentoo   Biscoe              45.2          14.8               212 ...     2009
         | 
| 183 | 
            +
            10 Gentoo   Biscoe              49.9          16.1               213 ...     2009
         | 
| 184 | 
            +
            ```
         | 
| 185 | 
            +
             | 
| 186 | 
            +
            `DataFrame#remove` rejects rows (observations) to create a remainer DataFrame.
         | 
| 187 | 
            +
             | 
| 188 | 
            +
            
         | 
| 189 | 
            +
             | 
| 190 | 
            +
            ```ruby
         | 
| 191 | 
            +
            # penguins[:bill_length_mm] < 40 returns a boolean Vector
         | 
| 192 | 
            +
            penguins.remove(penguins[:bill_length_mm] < 40)
         | 
| 193 | 
            +
             | 
| 194 | 
            +
            # =>
         | 
| 195 | 
            +
            #<RedAmber::DataFrame : 244 x 8 Vectors, 0x000000000007d6f4>
         | 
| 196 | 
            +
                species  island    bill_length_mm bill_depth_mm flipper_length_mm ...     year
         | 
| 197 | 
            +
                <string> <string>        <double>      <double>           <uint8> ... <uint16>
         | 
| 198 | 
            +
              1 Adelie   Torgersen           40.3          18.0               195 ...     2007
         | 
| 199 | 
            +
              2 Adelie   Torgersen          (nil)         (nil)             (nil) ...     2007
         | 
| 200 | 
            +
              3 Adelie   Torgersen           42.0          20.2               190 ...     2007
         | 
| 201 | 
            +
              4 Adelie   Torgersen           41.1          17.6               182 ...     2007
         | 
| 202 | 
            +
              5 Adelie   Torgersen           42.5          20.7               197 ...     2007
         | 
| 203 | 
            +
              : :        :                      :             :                 : ...        :
         | 
| 204 | 
            +
            242 Gentoo   Biscoe              50.4          15.7               222 ...     2009
         | 
| 205 | 
            +
            243 Gentoo   Biscoe              45.2          14.8               212 ...     2009
         | 
| 206 | 
            +
            244 Gentoo   Biscoe              49.9          16.1               213 ...     2009
         | 
| 207 | 
            +
            ```
         | 
| 208 | 
            +
             | 
| 120 209 | 
             
            DataFrame manipulating methods like `pick`, `drop`, `slice`, `remove`, `rename` and `assign` accept a block.
         | 
| 121 210 |  | 
| 122 | 
            -
            This is  | 
| 211 | 
            +
            This example is usage of block to update numeric columns.
         | 
| 123 212 |  | 
| 124 213 | 
             
            ```ruby
         | 
| 125 | 
            -
             | 
| 214 | 
            +
            df = RedAmber::DataFrame.new(
         | 
| 215 | 
            +
              integer: [0, 1, 2, 3, nil],
         | 
| 216 | 
            +
              float:   [0.0, 1.1,  2.2, Float::NAN, nil],
         | 
| 217 | 
            +
              string:  ['A', 'B', 'C', 'D', nil],
         | 
| 218 | 
            +
              boolean: [true, false, true, false, nil])
         | 
| 219 | 
            +
            df
         | 
| 220 | 
            +
             | 
| 221 | 
            +
            # =>
         | 
| 222 | 
            +
            #<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000003131c>
         | 
| 223 | 
            +
              integer    float string   boolean
         | 
| 224 | 
            +
              <uint8> <double> <string> <boolean>
         | 
| 225 | 
            +
            1       0      0.0 A        true
         | 
| 226 | 
            +
            2       1      1.1 B        false
         | 
| 227 | 
            +
            3       2      2.2 C        true
         | 
| 228 | 
            +
            4       3      NaN D        false
         | 
| 229 | 
            +
            5   (nil)    (nil) (nil)    (nil)
         | 
| 230 | 
            +
             | 
| 231 | 
            +
            df.assign do
         | 
| 232 | 
            +
              vectors.each_with_object({}) do |v, h|
         | 
| 233 | 
            +
                h[v.key] = -v if v.numeric?
         | 
| 234 | 
            +
              end
         | 
| 235 | 
            +
            end
         | 
| 236 | 
            +
             | 
| 237 | 
            +
            # =>
         | 
| 238 | 
            +
            #<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000009a1b4>
         | 
| 239 | 
            +
              integer    float string   boolean
         | 
| 240 | 
            +
              <uint8> <double> <string> <boolean>
         | 
| 241 | 
            +
            1       0     -0.0 A        true
         | 
| 242 | 
            +
            2     255     -1.1 B        false
         | 
| 243 | 
            +
            3     254     -2.2 C        true
         | 
| 244 | 
            +
            4     253      NaN D        false
         | 
| 245 | 
            +
            5   (nil)    (nil) (nil)    (nil)
         | 
| 246 | 
            +
            ```
         | 
| 247 | 
            +
             | 
| 248 | 
            +
            Negate (-@) method of unsigned integer Vector returns complement. 
         | 
| 249 | 
            +
             | 
| 250 | 
            +
            Next example is to eliminate observations (row in the table) containing nil.
         | 
| 251 | 
            +
             | 
| 252 | 
            +
            ```ruby
         | 
| 253 | 
            +
            # remove all observations containing nil
         | 
| 126 254 | 
             
            nil_removed = penguins.remove { vectors.map(&:is_nil).reduce(&:|) }
         | 
| 127 255 | 
             
            nil_removed.tdr
         | 
| 128 256 | 
             
            # =>
         | 
| @@ -145,12 +273,51 @@ For this frequently needed task, we can do it much simpler. | |
| 145 273 | 
             
            penguins.remove_nil # => same result as above
         | 
| 146 274 | 
             
            ```
         | 
| 147 275 |  | 
| 276 | 
            +
            `DataFrame#group` method can be used for the grouping tasks.
         | 
| 277 | 
            +
             | 
| 278 | 
            +
            ```ruby
         | 
| 279 | 
            +
            starwars = RedAmber::DataFrame.load(URI("https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv"))
         | 
| 280 | 
            +
            starwars
         | 
| 281 | 
            +
             | 
| 282 | 
            +
            # =>
         | 
| 283 | 
            +
            #<RedAmber::DataFrame : 87 x 12 Vectors, 0x000000000000607c>
         | 
| 284 | 
            +
               unnamed1 name            height     mass hair_color skin_color  eye_color ... species
         | 
| 285 | 
            +
                <int64> <string>       <int64> <double> <string>   <string>    <string>  ... <string>
         | 
| 286 | 
            +
             1        1 Luke Skywalker     172     77.0 blond      fair        blue      ... Human
         | 
| 287 | 
            +
             2        2 C-3PO              167     75.0 NA         gold        yellow    ... Droid
         | 
| 288 | 
            +
             3        3 R2-D2               96     32.0 NA         white, blue red       ... Droid
         | 
| 289 | 
            +
             4        4 Darth Vader        202    136.0 none       white       yellow    ... Human
         | 
| 290 | 
            +
             5        5 Leia Organa        150     49.0 brown      light       brown     ... Human
         | 
| 291 | 
            +
             :        : :                    :        : :          :           :         ... :
         | 
| 292 | 
            +
            85       85 BB8              (nil)    (nil) none       none        black     ... Droid
         | 
| 293 | 
            +
            86       86 Captain Phasma   (nil)    (nil) unknown    unknown     unknown   ... NA
         | 
| 294 | 
            +
            87       87 Padmé Amidala      165     45.0 brown      light       brown     ... Human
         | 
| 295 | 
            +
             | 
| 296 | 
            +
            grouped = starwars.group(:species) { [count(:species), mean(:height, :mass)] }
         | 
| 297 | 
            +
            grouped.slice { v(:count) > 1 }
         | 
| 298 | 
            +
             | 
| 299 | 
            +
            # =>
         | 
| 300 | 
            +
            #<RedAmber::DataFrame : 9 x 4 Vectors, 0x000000000006e848>
         | 
| 301 | 
            +
              species    count mean(height) mean(mass)
         | 
| 302 | 
            +
              <string> <int64>     <double>   <double>
         | 
| 303 | 
            +
            1 Human         35        176.6       82.8
         | 
| 304 | 
            +
            2 Droid          6        131.2       69.8
         | 
| 305 | 
            +
            3 Wookiee        2        231.0      124.0
         | 
| 306 | 
            +
            4 Gungan         3        208.7       74.0
         | 
| 307 | 
            +
            5 NA             4        181.3       48.0
         | 
| 308 | 
            +
            : :              :            :          :
         | 
| 309 | 
            +
            7 Twi'lek        2        179.0       55.0
         | 
| 310 | 
            +
            8 Mirialan       2        168.0       53.1
         | 
| 311 | 
            +
            9 Kaminoan       2        221.0       88.0 
         | 
| 312 | 
            +
            ```
         | 
| 313 | 
            +
             | 
| 148 314 | 
             
            See [DataFrame.md](doc/DataFrame.md) for details.
         | 
| 149 315 |  | 
| 150 316 |  | 
| 151 317 | 
             
            ## `RedAmber::Vector`
         | 
| 152 318 |  | 
| 153 319 | 
             
            Class `RedAmber::Vector` represents a series of data in the DataFrame.
         | 
| 320 | 
            +
            Method `RedAmber::DataFrame#[key]` returns a Vector with the key `key`. 
         | 
| 154 321 |  | 
| 155 322 | 
             
            ```ruby
         | 
| 156 323 | 
             
            penguins[:bill_length_mm]
         | 
| @@ -161,11 +328,34 @@ penguins[:bill_length_mm] | |
| 161 328 |  | 
| 162 329 | 
             
            Vectors accepts some [functional methods from Arrow](https://arrow.apache.org/docs/cpp/compute.html).
         | 
| 163 330 |  | 
| 331 | 
            +
            This is an element-wise comparison and returns a boolean Vector of same size.
         | 
| 332 | 
            +
             | 
| 333 | 
            +
            
         | 
| 334 | 
            +
             | 
| 335 | 
            +
            ```ruby
         | 
| 336 | 
            +
            penguins[:bill_length_mm] < 40
         | 
| 337 | 
            +
             | 
| 338 | 
            +
            # =>
         | 
| 339 | 
            +
            #<RedAmber::Vector(:boolean, size=344):0x000000000007e7ac>
         | 
| 340 | 
            +
            [true, true, false, nil, true, true, true, true, true, false, true, true, false, ... ]
         | 
| 341 | 
            +
            ```
         | 
| 342 | 
            +
             | 
| 343 | 
            +
            Next example returns aggregated result.
         | 
| 344 | 
            +
             | 
| 345 | 
            +
            
         | 
| 346 | 
            +
             | 
| 347 | 
            +
            ```ruby
         | 
| 348 | 
            +
            penguins[:bill_length_mm].mean
         | 
| 349 | 
            +
            43.92192982456141
         | 
| 350 | 
            +
            # =>
         | 
| 351 | 
            +
             | 
| 352 | 
            +
            ```
         | 
| 353 | 
            +
             | 
| 164 354 | 
             
            See [Vector.md](doc/Vector.md) for details.
         | 
| 165 355 |  | 
| 166 356 | 
             
            ## Jupyter notebook
         | 
| 167 357 |  | 
| 168 | 
            -
            [ | 
| 358 | 
            +
            [53 Examples of Red Amber](doc/examples_of_red_amber.ipynb)
         | 
| 169 359 |  | 
| 170 360 | 
             
            ## Development
         | 
| 171 361 |  | 
    
        data/doc/DataFrame.md
    CHANGED
    
    | @@ -860,16 +860,10 @@ penguins.to_rover | |
| 860 860 |  | 
| 861 861 | 
             
            ## Grouping
         | 
| 862 862 |  | 
| 863 | 
            -
            ### `group( | 
| 864 | 
            -
             | 
| 865 | 
            -
              (
         | 
| 866 | 
            -
                This API will change in the future version. Especcially I want to change:
         | 
| 867 | 
            -
                  - Order of the column of the result (aggregation_keys should be the first)
         | 
| 868 | 
            -
                  - DataFrame#group will accept a block (heronshoes/red_amber #28)
         | 
| 869 | 
            -
              )
         | 
| 863 | 
            +
            ### `group(group_keys)`
         | 
| 870 864 |  | 
| 871 865 | 
             
              `group` creates a class `Group` object. `Group` accepts functions below as a method.
         | 
| 872 | 
            -
              Method accepts options as ` | 
| 866 | 
            +
              Method accepts options as `group_keys`.
         | 
| 873 867 |  | 
| 874 868 | 
             
              Available functions are:
         | 
| 875 869 |  | 
| @@ -889,8 +883,8 @@ penguins.to_rover | |
| 889 883 | 
             
              - [ ] tdigest
         | 
| 890 884 | 
             
              - ✓ variance
         | 
| 891 885 |  | 
| 892 | 
            -
              For the each group of ` | 
| 893 | 
            -
               | 
| 886 | 
            +
              For the each group of `group_keys`, the aggregation `function` is applied and returns a new dataframe with aggregated keys according to `summary_keys`.
         | 
| 887 | 
            +
              Summary key names are provided by `function(summary_keys)` style.
         | 
| 894 888 |  | 
| 895 889 | 
             
              This is an example of grouping of famous STARWARS dataset.
         | 
| 896 890 |  | 
| @@ -900,18 +894,18 @@ penguins.to_rover | |
| 900 894 | 
             
              starwars
         | 
| 901 895 |  | 
| 902 896 | 
             
              # =>
         | 
| 903 | 
            -
              #<RedAmber::DataFrame : 87 x 12 Vectors,  | 
| 904 | 
            -
             | 
| 905 | 
            -
             | 
| 906 | 
            -
             | 
| 907 | 
            -
             | 
| 908 | 
            -
             | 
| 909 | 
            -
             | 
| 910 | 
            -
             | 
| 911 | 
            -
             | 
| 912 | 
            -
               | 
| 913 | 
            -
               | 
| 914 | 
            -
               | 
| 897 | 
            +
              #<RedAmber::DataFrame : 87 x 12 Vectors, 0x0000000000005a50>
         | 
| 898 | 
            +
                 unnamed1 name            height     mass hair_color skin_color  eye_color ... species
         | 
| 899 | 
            +
                  <int64> <string>       <int64> <double> <string>   <string>    <string>  ... <string>
         | 
| 900 | 
            +
               1        1 Luke Skywalker     172     77.0 blond      fair        blue      ... Human
         | 
| 901 | 
            +
               2        2 C-3PO              167     75.0 NA         gold        yellow    ... Droid
         | 
| 902 | 
            +
               3        3 R2-D2               96     32.0 NA         white, blue red       ... Droid
         | 
| 903 | 
            +
               4        4 Darth Vader        202    136.0 none       white       yellow    ... Human
         | 
| 904 | 
            +
               5        5 Leia Organa        150     49.0 brown      light       brown     ... Human
         | 
| 905 | 
            +
               :        : :                    :        : :          :           :         ... :
         | 
| 906 | 
            +
              85       85 BB8              (nil)    (nil) none       none        black     ... Droid
         | 
| 907 | 
            +
              86       86 Captain Phasma   (nil)    (nil) unknown    unknown     unknown   ... NA
         | 
| 908 | 
            +
              87       87 Padmé Amidala      165     45.0 brown      light       brown     ... Human
         | 
| 915 909 |  | 
| 916 910 | 
             
              starwars.tdr(12)
         | 
| 917 911 |  | 
| @@ -919,7 +913,7 @@ penguins.to_rover | |
| 919 913 | 
             
              RedAmber::DataFrame : 87 x 12 Vectors
         | 
| 920 914 | 
             
              Vectors : 4 numeric, 8 strings
         | 
| 921 915 | 
             
              #  key         type   level data_preview
         | 
| 922 | 
            -
              1  : | 
| 916 | 
            +
              1  :unnamed1   int64     87 [1, 2, 3, 4, 5, ... ]
         | 
| 923 917 | 
             
              2  :name       string    87 ["Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Organa", ... ]
         | 
| 924 918 | 
             
              3  :height     int64     46 [172, 167, 96, 202, 150, ... ], 6 nils
         | 
| 925 919 | 
             
              4  :mass       double    39 [77.0, 75.0, 32.0, 136.0, 49.0, ... ], 28 nils
         | 
| @@ -933,74 +927,70 @@ penguins.to_rover | |
| 933 927 | 
             
              12 :species    string    38 ["Human", "Droid", "Droid", "Human", "Human", ... ]
         | 
| 934 928 | 
             
              ```
         | 
| 935 929 |  | 
| 936 | 
            -
              We can  | 
| 930 | 
            +
              We can group by `:species` and calculate the count.
         | 
| 937 931 |  | 
| 938 932 | 
             
              ```ruby
         | 
| 939 | 
            -
               | 
| 940 | 
            -
              grouped
         | 
| 933 | 
            +
              starwars.group(:species).count(:species)
         | 
| 941 934 |  | 
| 942 935 | 
             
              # =>
         | 
| 943 | 
            -
              #<RedAmber::DataFrame : 38 x  | 
| 944 | 
            -
                  | 
| 945 | 
            -
             | 
| 946 | 
            -
               1 | 
| 947 | 
            -
               2 | 
| 948 | 
            -
               3 | 
| 949 | 
            -
               4 | 
| 950 | 
            -
               5 | 
| 951 | 
            -
               : | 
| 952 | 
            -
              36 | 
| 953 | 
            -
              37 | 
| 954 | 
            -
              38 | 
| 936 | 
            +
              #<RedAmber::DataFrame : 38 x 2 Vectors, 0x000000000001d6f0>                                 
         | 
| 937 | 
            +
                 species    count                                                                         
         | 
| 938 | 
            +
                 <string> <int64>                                                                         
         | 
| 939 | 
            +
               1 Human         35                                                                         
         | 
| 940 | 
            +
               2 Droid          6                                                                         
         | 
| 941 | 
            +
               3 Wookiee        2                                                                         
         | 
| 942 | 
            +
               4 Rodian         1                                                                         
         | 
| 943 | 
            +
               5 Hutt           1                                                                         
         | 
| 944 | 
            +
               : :              :                                                                         
         | 
| 945 | 
            +
              36 Kaleesh        1                                                                         
         | 
| 946 | 
            +
              37 Pau'an         1                                                                         
         | 
| 947 | 
            +
              38 Kel Dor        1
         | 
| 955 948 | 
             
              ```
         | 
| 956 949 |  | 
| 957 | 
            -
               | 
| 958 | 
            -
             | 
| 950 | 
            +
              We can also calculate the mean of `:mass` and `:height` together.
         | 
| 951 | 
            +
             | 
| 959 952 | 
             
              ```ruby
         | 
| 960 | 
            -
               | 
| 961 | 
            -
              grouped = grouped.slice(count > 1)
         | 
| 953 | 
            +
              grouped = starwars.group(:species) { [count(:species), mean(:height, :mass)] }
         | 
| 962 954 |  | 
| 963 955 | 
             
              # =>
         | 
| 964 | 
            -
              #<RedAmber::DataFrame :  | 
| 965 | 
            -
                mean( | 
| 966 | 
            -
             | 
| 967 | 
            -
             | 
| 968 | 
            -
             | 
| 969 | 
            -
             | 
| 970 | 
            -
             | 
| 971 | 
            -
             | 
| 972 | 
            -
             | 
| 973 | 
            -
               | 
| 974 | 
            -
               | 
| 975 | 
            -
               | 
| 956 | 
            +
              #<RedAmber::DataFrame : 38 x 4 Vectors, 0x00000000000407cc>                                 
         | 
| 957 | 
            +
                 species    count mean(height) mean(mass)                                                 
         | 
| 958 | 
            +
                 <string> <int64>     <double>   <double>                                                 
         | 
| 959 | 
            +
               1 Human         35        176.6       82.8                                                 
         | 
| 960 | 
            +
               2 Droid          6        131.2       69.8                                                 
         | 
| 961 | 
            +
               3 Wookiee        2        231.0      124.0                                                 
         | 
| 962 | 
            +
               4 Rodian         1        173.0       74.0                                                 
         | 
| 963 | 
            +
               5 Hutt           1        175.0     1358.0                                                 
         | 
| 964 | 
            +
               : :              :            :          :                                                 
         | 
| 965 | 
            +
              36 Kaleesh        1        216.0      159.0                                                 
         | 
| 966 | 
            +
              37 Pau'an         1        206.0       80.0                                                 
         | 
| 967 | 
            +
              38 Kel Dor        1        188.0       80.0
         | 
| 976 968 | 
             
              ```
         | 
| 977 969 |  | 
| 978 | 
            -
               | 
| 979 | 
            -
             | 
| 980 | 
            -
              ```ruby
         | 
| 981 | 
            -
              grouped.assign(count: count[count > 1]).pick { [2,3,0,1].map{ |i| keys[i] } }
         | 
| 970 | 
            +
              Select rows for count > 1.
         | 
| 982 971 |  | 
| 972 | 
            +
              ```ruby
         | 
| 973 | 
            +
              grouped.slice(grouped[:count] > 1)
         | 
| 974 | 
            +
             | 
| 983 975 | 
             
              # =>
         | 
| 984 | 
            -
              #<RedAmber::DataFrame : 9 x 4 Vectors,  | 
| 985 | 
            -
                species    count mean( | 
| 986 | 
            -
                <string> < | 
| 987 | 
            -
              1 Human         35       82.8 | 
| 988 | 
            -
              2 Droid          6       69.8 | 
| 989 | 
            -
              3 Wookiee        2      124.0 | 
| 990 | 
            -
              4 Gungan         3       74.0 | 
| 991 | 
            -
              5 NA             4       48.0 | 
| 992 | 
            -
              : :              :          : | 
| 993 | 
            -
              7 Twi'lek        2       55.0 | 
| 994 | 
            -
              8 Mirialan       2       53.1 | 
| 995 | 
            -
              9 Kaminoan       2       88.0 | 
| 976 | 
            +
              #<RedAmber::DataFrame : 9 x 4 Vectors, 0x000000000004c270>
         | 
| 977 | 
            +
                species    count mean(height) mean(mass)
         | 
| 978 | 
            +
                <string> <int64>     <double>   <double>
         | 
| 979 | 
            +
              1 Human         35        176.6       82.8
         | 
| 980 | 
            +
              2 Droid          6        131.2       69.8
         | 
| 981 | 
            +
              3 Wookiee        2        231.0      124.0
         | 
| 982 | 
            +
              4 Gungan         3        208.7       74.0
         | 
| 983 | 
            +
              5 NA             4        181.3       48.0
         | 
| 984 | 
            +
              : :              :            :          :
         | 
| 985 | 
            +
              7 Twi'lek        2        179.0       55.0
         | 
| 986 | 
            +
              8 Mirialan       2        168.0       53.1
         | 
| 987 | 
            +
              9 Kaminoan       2        221.0       88.0
         | 
| 996 988 | 
             
              ```
         | 
| 997 989 |  | 
| 998 990 | 
             
            ## Combining DataFrames
         | 
| 999 991 |  | 
| 1000 992 | 
             
            - [ ] Combining rows to a dataframe
         | 
| 1001 993 |  | 
| 1002 | 
            -
            - [ ] Add vars
         | 
| 1003 | 
            -
             | 
| 1004 994 | 
             
            - [ ] Inner join
         | 
| 1005 995 |  | 
| 1006 996 | 
             
            - [ ] Left join
         | 
| @@ -1009,6 +999,6 @@ penguins.to_rover | |
| 1009 999 |  | 
| 1010 1000 | 
             
            - [ ] One-hot encoding
         | 
| 1011 1001 |  | 
| 1012 | 
            -
            ## Iteration | 
| 1002 | 
            +
            ## Iteration
         | 
| 1013 1003 |  | 
| 1014 1004 | 
             
            - [ ] each_rows
         | 
    
        data/doc/Vector.md
    CHANGED
    
    | @@ -500,3 +500,28 @@ vector.is_in(1, -1) | |
| 500 500 | 
             
            #<RedAmber::Vector(:boolean, size=3):0x000000000000f320>
         | 
| 501 501 | 
             
            [true, false, true]
         | 
| 502 502 | 
             
            ```
         | 
| 503 | 
            +
             | 
| 504 | 
            +
            ### `shift(amount = 1, fill: nil)`
         | 
| 505 | 
            +
             | 
| 506 | 
            +
            Shift vector's values by specified `amount`. Shifted space is filled by value `fill`.
         | 
| 507 | 
            +
             | 
| 508 | 
            +
            ```ruby
         | 
| 509 | 
            +
            vector = RedAmber::Vector.new([1, 2, 3, 4, 5])
         | 
| 510 | 
            +
            vector.shift
         | 
| 511 | 
            +
             | 
| 512 | 
            +
            # =>
         | 
| 513 | 
            +
            #<RedAmber::Vector(:uint8, size=5):0x00000000000072d8>  
         | 
| 514 | 
            +
            [nil, 1, 2, 3, 4]
         | 
| 515 | 
            +
             | 
| 516 | 
            +
            vector.shift(-2)
         | 
| 517 | 
            +
             | 
| 518 | 
            +
            # =>
         | 
| 519 | 
            +
            #<RedAmber::Vector(:uint8, size=5):0x0000000000009970>  
         | 
| 520 | 
            +
            [3, 4, 5, nil, nil]
         | 
| 521 | 
            +
             | 
| 522 | 
            +
            vector.shift(fill: Float::NAN)
         | 
| 523 | 
            +
             | 
| 524 | 
            +
            # =>
         | 
| 525 | 
            +
            #<RedAmber::Vector(:double, size=5):0x0000000000011d3c>                    
         | 
| 526 | 
            +
            [NaN, 1.0, 2.0, 3.0, 4.0]
         | 
| 527 | 
            +
            ```
         |