red_amber 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +39 -20
- data/.yardopts +2 -0
- data/CHANGELOG.md +113 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +25 -26
- data/benchmark/basic.yml +2 -2
- data/benchmark/combine.yml +2 -2
- data/benchmark/dataframe.yml +2 -2
- data/benchmark/group.yml +2 -2
- data/benchmark/reshape.yml +2 -2
- data/benchmark/vector.yml +3 -0
- data/doc/DataFrame.md +32 -12
- data/doc/DataFrame_Comparison.md +65 -0
- data/doc/SubFrames.md +11 -0
- data/doc/Vector.md +207 -1
- data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
- data/lib/red_amber/data_frame.rb +429 -75
- data/lib/red_amber/data_frame_combinable.rb +516 -66
- data/lib/red_amber/data_frame_displayable.rb +244 -14
- data/lib/red_amber/data_frame_indexable.rb +121 -18
- data/lib/red_amber/data_frame_loadsave.rb +78 -10
- data/lib/red_amber/data_frame_reshaping.rb +184 -14
- data/lib/red_amber/data_frame_selectable.rb +622 -66
- data/lib/red_amber/data_frame_variable_operation.rb +446 -34
- data/lib/red_amber/group.rb +187 -22
- data/lib/red_amber/helper.rb +70 -10
- data/lib/red_amber/refinements.rb +12 -5
- data/lib/red_amber/subframes.rb +1066 -0
- data/lib/red_amber/vector.rb +385 -11
- data/lib/red_amber/vector_aggregation.rb +312 -0
- data/lib/red_amber/vector_binary_element_wise.rb +387 -0
- data/lib/red_amber/vector_selectable.rb +217 -12
- data/lib/red_amber/vector_unary_element_wise.rb +436 -0
- data/lib/red_amber/vector_updatable.rb +278 -34
- data/lib/red_amber/version.rb +2 -1
- data/lib/red_amber.rb +13 -1
- data/red_amber.gemspec +2 -2
- metadata +13 -8
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/lib/red_amber/vector_functions.rb +0 -242
| @@ -3,22 +3,67 @@ | |
| 3 3 | 
             
            require 'stringio'
         | 
| 4 4 |  | 
| 5 5 | 
             
            module RedAmber
         | 
| 6 | 
            -
              #  | 
| 6 | 
            +
              # Mix-in for the class DataFrame
         | 
| 7 7 | 
             
              module DataFrameDisplayable
         | 
| 8 | 
            +
                # Used internally to display table.
         | 
| 8 9 | 
             
                INDEX_KEY = :index_key_for_format_table
         | 
| 10 | 
            +
                private_constant :INDEX_KEY
         | 
| 9 11 |  | 
| 10 | 
            -
                 | 
| 12 | 
            +
                # rubocop:disable Layout/LineLength
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                # Show a preview of self as a string.
         | 
| 15 | 
            +
                #
         | 
| 16 | 
            +
                # @param width [Integer]
         | 
| 17 | 
            +
                #   maximum size of result.
         | 
| 18 | 
            +
                # @param head [Integer]
         | 
| 19 | 
            +
                #   number of records to show from head.
         | 
| 20 | 
            +
                # @param tail [Integer]
         | 
| 21 | 
            +
                #   number of records to show at tail.
         | 
| 22 | 
            +
                # @return [String]
         | 
| 23 | 
            +
                #   string representation of self.
         | 
| 24 | 
            +
                # @example Show penguins dataset
         | 
| 25 | 
            +
                #   puts penguins.to_s
         | 
| 26 | 
            +
                #
         | 
| 27 | 
            +
                #   # =>
         | 
| 28 | 
            +
                #       species  island    bill_length_mm bill_depth_mm flipper_length_mm ...     year
         | 
| 29 | 
            +
                #       <string> <string>        <double>      <double>           <uint8> ... <uint16>
         | 
| 30 | 
            +
                #     0 Adelie   Torgersen           39.1          18.7               181 ...     2007
         | 
| 31 | 
            +
                #     1 Adelie   Torgersen           39.5          17.4               186 ...     2007
         | 
| 32 | 
            +
                #     2 Adelie   Torgersen           40.3          18.0               195 ...     2007
         | 
| 33 | 
            +
                #     3 Adelie   Torgersen          (nil)         (nil)             (nil) ...     2007
         | 
| 34 | 
            +
                #     4 Adelie   Torgersen           36.7          19.3               193 ...     2007
         | 
| 35 | 
            +
                #     : :        :                      :             :                 : ...        :
         | 
| 36 | 
            +
                #   341 Gentoo   Biscoe              50.4          15.7               222 ...     2009
         | 
| 37 | 
            +
                #   342 Gentoo   Biscoe              45.2          14.8               212 ...     2009
         | 
| 38 | 
            +
                #   343 Gentoo   Biscoe              49.9          16.1               213 ...     2009
         | 
| 39 | 
            +
                #
         | 
| 40 | 
            +
                def to_s(width: 80, head: 5, tail: 3)
         | 
| 11 41 | 
             
                  return '' if empty?
         | 
| 12 42 |  | 
| 13 | 
            -
                  format_table(width: width)
         | 
| 43 | 
            +
                  format_table(width: width, head: head, tail: tail)
         | 
| 14 44 | 
             
                end
         | 
| 15 45 |  | 
| 16 | 
            -
                # Show statistical summary by a new  | 
| 17 | 
            -
                # | 
| 18 | 
            -
                # | 
| 19 | 
            -
                # | 
| 46 | 
            +
                # Show statistical summary by a new DataFrame.
         | 
| 47 | 
            +
                #
         | 
| 48 | 
            +
                # This method will make stats only for numeric columns.
         | 
| 49 | 
            +
                # - NaNs are ignored.
         | 
| 50 | 
            +
                # - `count` shows non-NaN counts.
         | 
| 51 | 
            +
                #
         | 
| 52 | 
            +
                # @return [DataFrame]
         | 
| 53 | 
            +
                #   a new dataframe.
         | 
| 54 | 
            +
                # @example Statistical summary of penguins dataset
         | 
| 55 | 
            +
                #   # needs more width to show all stats in this example
         | 
| 56 | 
            +
                #   puts penguins.summary.to_s(width: 82)
         | 
| 57 | 
            +
                #
         | 
| 58 | 
            +
                #   # =>
         | 
| 59 | 
            +
                #     variables            count     mean      std      min      25%   median      75%      max
         | 
| 60 | 
            +
                #     <dictionary>      <uint16> <double> <double> <double> <double> <double> <double> <double>
         | 
| 61 | 
            +
                #   0 bill_length_mm         342    43.92     5.46     32.1    39.23    44.38     48.5     59.6
         | 
| 62 | 
            +
                #   1 bill_depth_mm          342    17.15     1.97     13.1     15.6    17.32     18.7     21.5
         | 
| 63 | 
            +
                #   2 flipper_length_mm      342   200.92    14.06    172.0    190.0    197.0    213.0    231.0
         | 
| 64 | 
            +
                #   3 body_mass_g            342  4201.75   801.95   2700.0   3550.0   4031.5   4750.0   6300.0
         | 
| 65 | 
            +
                #   4 year                   344  2008.03     0.82   2007.0   2007.0   2008.0   2009.0   2009.0
         | 
| 20 66 | 
             
                #
         | 
| 21 | 
            -
                # @return [DataFrame] a new dataframe.
         | 
| 22 67 | 
             
                def summary
         | 
| 23 68 | 
             
                  num_keys = keys.select { |key| self[key].numeric? }
         | 
| 24 69 |  | 
| @@ -36,6 +81,42 @@ module RedAmber | |
| 36 81 | 
             
                end
         | 
| 37 82 | 
             
                alias_method :describe, :summary
         | 
| 38 83 |  | 
| 84 | 
            +
                # Show information of self.
         | 
| 85 | 
            +
                #
         | 
| 86 | 
            +
                # According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
         | 
| 87 | 
            +
                # - If it is 'TDR', returns class, shape and transposed preview by 3 rows.
         | 
| 88 | 
            +
                # - If it is 'MINIMUM', returns class and shape.
         | 
| 89 | 
            +
                # - If it is 'TABLE' or otherwise, returns class, shape and Table preview.
         | 
| 90 | 
            +
                #   Default value of the ENV is 'Table'.
         | 
| 91 | 
            +
                # @return [String]
         | 
| 92 | 
            +
                #   information of self.
         | 
| 93 | 
            +
                # @example Default (ENV ['RED_AMBER_OUTPUT_MODE'] == 'Table')
         | 
| 94 | 
            +
                #   puts df.inspect
         | 
| 95 | 
            +
                #
         | 
| 96 | 
            +
                #   # =>
         | 
| 97 | 
            +
                #   #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c148>
         | 
| 98 | 
            +
                #           x y
         | 
| 99 | 
            +
                #     <uint8> <string>
         | 
| 100 | 
            +
                #   0       1 A
         | 
| 101 | 
            +
                #   1       2 B
         | 
| 102 | 
            +
                #   2       3 C
         | 
| 103 | 
            +
                #
         | 
| 104 | 
            +
                # @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'TDR'
         | 
| 105 | 
            +
                #   puts df.inspect
         | 
| 106 | 
            +
                #
         | 
| 107 | 
            +
                #   # =>
         | 
| 108 | 
            +
                #   #<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000c148>
         | 
| 109 | 
            +
                #   Vectors : 1 numeric, 1 string
         | 
| 110 | 
            +
                #   # key type   level data_preview
         | 
| 111 | 
            +
                #   0 :x  uint8      3 [1, 2, 3]
         | 
| 112 | 
            +
                #   1 :y  string     3 ["A", "B", "C"]
         | 
| 113 | 
            +
                #
         | 
| 114 | 
            +
                # @example In case of ENV ['RED_AMBER_OUTPUT_MODE'] == 'Minimum'
         | 
| 115 | 
            +
                #   puts df.inspect
         | 
| 116 | 
            +
                #
         | 
| 117 | 
            +
                #   # =>
         | 
| 118 | 
            +
                #   RedAmber::DataFrame : 3 x 2 Vectors
         | 
| 119 | 
            +
                #
         | 
| 39 120 | 
             
                def inspect
         | 
| 40 121 | 
             
                  mode = ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table')
         | 
| 41 122 | 
             
                  case mode.upcase
         | 
| @@ -48,17 +129,148 @@ module RedAmber | |
| 48 129 | 
             
                  end
         | 
| 49 130 | 
             
                end
         | 
| 50 131 |  | 
| 51 | 
            -
                #  | 
| 52 | 
            -
                # | 
| 53 | 
            -
                #  | 
| 132 | 
            +
                # Shows some information about self in a transposed style.
         | 
| 133 | 
            +
                #
         | 
| 134 | 
            +
                # @param limit [Integer, :all]
         | 
| 135 | 
            +
                #   maximum number of variables (columns) to show.
         | 
| 136 | 
            +
                #   Shows all valiables (columns) if it is `:all`.
         | 
| 137 | 
            +
                # @param tally [Integer]
         | 
| 138 | 
            +
                #   maximum level to use tally mode.
         | 
| 139 | 
            +
                #   Tally mode counts the occurrences of each element and shows as a hash
         | 
| 140 | 
            +
                #   with the elements as keys and the corresponding counts as values.
         | 
| 141 | 
            +
                # @param elements [Integer]
         | 
| 142 | 
            +
                #   maximum number of elements to show values
         | 
| 143 | 
            +
                #   in each column.
         | 
| 144 | 
            +
                # @return [nil]
         | 
| 145 | 
            +
                # @example Default
         | 
| 146 | 
            +
                #   diamonds = diamonds.assign_left(:index) { indices }
         | 
| 147 | 
            +
                #   diamonds
         | 
| 148 | 
            +
                #
         | 
| 149 | 
            +
                #   # =>
         | 
| 150 | 
            +
                #   #<RedAmber::DataFrame : 53940 x 11 Vectors, 0x000000000000c314>
         | 
| 151 | 
            +
                #            index    carat cut       color    clarity     depth    table    price ...        z
         | 
| 152 | 
            +
                #         <uint16> <double> <string>  <string> <string> <double> <double> <uint16> ... <double>
         | 
| 153 | 
            +
                #       0        0     0.23 Ideal     E        SI2          61.5     55.0      326 ...     2.43
         | 
| 154 | 
            +
                #       1        1     0.21 Premium   E        SI1          59.8     61.0      326 ...     2.31
         | 
| 155 | 
            +
                #       2        2     0.23 Good      E        VS1          56.9     65.0      327 ...     2.31
         | 
| 156 | 
            +
                #       3        3     0.29 Premium   I        VS2          62.4     58.0      334 ...     2.63
         | 
| 157 | 
            +
                #       4        4     0.31 Good      J        SI2          63.3     58.0      335 ...     2.75
         | 
| 158 | 
            +
                #       :        :        : :         :        :               :        :        : ...        :
         | 
| 159 | 
            +
                #   53937    53937      0.7 Very Good D        SI1          62.8     60.0     2757 ...     3.56
         | 
| 160 | 
            +
                #   53938    53938     0.86 Premium   H        SI2          61.0     58.0     2757 ...     3.74
         | 
| 161 | 
            +
                #   53939    53939     0.75 Ideal     D        SI2          62.2     55.0     2757 ...     3.64
         | 
| 162 | 
            +
                #
         | 
| 163 | 
            +
                #   diamonds.tdr
         | 
| 164 | 
            +
                #
         | 
| 165 | 
            +
                #   # =>
         | 
| 166 | 
            +
                #   RedAmber::DataFrame : 53940 x 11 Vectors
         | 
| 167 | 
            +
                #   Vectors : 8 numeric, 3 strings
         | 
| 168 | 
            +
                #   #  key      type   level data_preview
         | 
| 169 | 
            +
                #   0  :index   uint16 53940 [0, 1, 2, 3, 4, ... ]
         | 
| 170 | 
            +
                #   1  :carat   double   273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
         | 
| 171 | 
            +
                #   2  :cut     string     5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
         | 
| 172 | 
            +
                #   3  :color   string     7 ["E", "E", "E", "I", "J", ... ]
         | 
| 173 | 
            +
                #   4  :clarity string     8 ["SI2", "SI1", "VS1", "VS2", "SI2", ... ]
         | 
| 174 | 
            +
                #   5  :depth   double   184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
         | 
| 175 | 
            +
                #   6  :table   double   127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
         | 
| 176 | 
            +
                #   7  :price   uint16 11602 [326, 326, 327, 334, 335, ... ]
         | 
| 177 | 
            +
                #   8  :x       double   554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
         | 
| 178 | 
            +
                #   9  :y       double   552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
         | 
| 179 | 
            +
                #    ... 1 more Vector ...
         | 
| 180 | 
            +
                #
         | 
| 181 | 
            +
                # @example Show all variables
         | 
| 182 | 
            +
                #   diamonds.tdr(:all)
         | 
| 183 | 
            +
                #
         | 
| 184 | 
            +
                #   # =>
         | 
| 185 | 
            +
                #   RedAmber::DataFrame : 53940 x 11 Vectors
         | 
| 186 | 
            +
                #   Vectors : 8 numeric, 3 strings
         | 
| 187 | 
            +
                #   #  key      type   level data_preview
         | 
| 188 | 
            +
                #   0  :index   uint16 53940 [0, 1, 2, 3, 4, ... ]
         | 
| 189 | 
            +
                #   1  :carat   double   273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
         | 
| 190 | 
            +
                #   2  :cut     string     5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
         | 
| 191 | 
            +
                #   3  :color   string     7 ["E", "E", "E", "I", "J", ... ]
         | 
| 192 | 
            +
                #   4  :clarity string     8 ["SI2", "SI1", "VS1", "VS2", "SI2", ... ]
         | 
| 193 | 
            +
                #   5  :depth   double   184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
         | 
| 194 | 
            +
                #   6  :table   double   127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
         | 
| 195 | 
            +
                #   7  :price   uint16 11602 [326, 326, 327, 334, 335, ... ]
         | 
| 196 | 
            +
                #   8  :x       double   554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
         | 
| 197 | 
            +
                #   9  :y       double   552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
         | 
| 198 | 
            +
                #   10 :z       double   375 [2.43, 2.31, 2.31, 2.63, 2.75, ... ]
         | 
| 199 | 
            +
                #
         | 
| 200 | 
            +
                # @example Use tally mode up to 8 levels
         | 
| 201 | 
            +
                #   diamonds.tdr(tally: 8)
         | 
| 202 | 
            +
                #
         | 
| 203 | 
            +
                #   # =>
         | 
| 204 | 
            +
                #   RedAmber::DataFrame : 53940 x 11 Vectors
         | 
| 205 | 
            +
                #   Vectors : 8 numeric, 3 strings
         | 
| 206 | 
            +
                #   #  key      type   level data_preview
         | 
| 207 | 
            +
                #   0  :index   uint16 53940 [0, 1, 2, 3, 4, ... ]
         | 
| 208 | 
            +
                #   1  :carat   double   273 [0.23, 0.21, 0.23, 0.29, 0.31, ... ]
         | 
| 209 | 
            +
                #   2  :cut     string     5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
         | 
| 210 | 
            +
                #   3  :color   string     7 {"E"=>9797, "I"=>5422, "J"=>2808, "H"=>8304, "F"=>9542, "G"=>11292, "D"=>6775}
         | 
| 211 | 
            +
                #   4  :clarity string     8 {"SI2"=>9194, "SI1"=>13065, "VS1"=>8171, "VS2"=>12258, "VVS2"=>5066, "VVS1"=>3655, "I1"=>741, "IF"=>1790}
         | 
| 212 | 
            +
                #   5  :depth   double   184 [61.5, 59.8, 56.9, 62.4, 63.3, ... ]
         | 
| 213 | 
            +
                #   6  :table   double   127 [55.0, 61.0, 65.0, 58.0, 58.0, ... ]
         | 
| 214 | 
            +
                #   7  :price   uint16 11602 [326, 326, 327, 334, 335, ... ]
         | 
| 215 | 
            +
                #   8  :x       double   554 [3.95, 3.89, 4.05, 4.2, 4.34, ... ]
         | 
| 216 | 
            +
                #   9  :y       double   552 [3.98, 3.84, 4.07, 4.23, 4.35, ... ]
         | 
| 217 | 
            +
                #    ... 1 more Vector ...
         | 
| 218 | 
            +
                #
         | 
| 219 | 
            +
                # @example Increase elements to show
         | 
| 220 | 
            +
                #   diamonds.tdr(elements: 10)
         | 
| 221 | 
            +
                #
         | 
| 222 | 
            +
                #   # =>
         | 
| 223 | 
            +
                #   RedAmber::DataFrame : 53940 x 11 Vectors
         | 
| 224 | 
            +
                #   Vectors : 8 numeric, 3 strings
         | 
| 225 | 
            +
                #   #  key      type   level data_preview
         | 
| 226 | 
            +
                #   0  :index   uint16 53940 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... ]
         | 
| 227 | 
            +
                #   1  :carat   double   273 [0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, ... ]
         | 
| 228 | 
            +
                #   2  :cut     string     5 {"Ideal"=>21551, "Premium"=>13791, "Good"=>4906, "Very Good"=>12082, "Fair"=>1610}
         | 
| 229 | 
            +
                #   3  :color   string     7 ["E", "E", "E", "I", "J", "J", "I", "H", "E", "H", ... ]
         | 
| 230 | 
            +
                #   4  :clarity string     8 ["SI2", "SI1", "VS1", "VS2", "SI2", "VVS2", "VVS1", "SI1", "VS2", "VS1", ... ]
         | 
| 231 | 
            +
                #   5  :depth   double   184 [61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, ... ]
         | 
| 232 | 
            +
                #   6  :table   double   127 [55.0, 61.0, 65.0, 58.0, 58.0, 57.0, 57.0, 55.0, 61.0, 61.0, ... ]
         | 
| 233 | 
            +
                #   7  :price   uint16 11602 [326, 326, 327, 334, 335, 336, 336, 337, 337, 338, ... ]
         | 
| 234 | 
            +
                #   8  :x       double   554 [3.95, 3.89, 4.05, 4.2, 4.34, 3.94, 3.95, 4.07, 3.87, 4.0, ... ]
         | 
| 235 | 
            +
                #   9  :y       double   552 [3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, ... ]
         | 
| 236 | 
            +
                #    ... 1 more Vector ...
         | 
| 237 | 
            +
                #
         | 
| 54 238 | 
             
                def tdr(limit = 10, tally: 5, elements: 5)
         | 
| 55 239 | 
             
                  puts tdr_str(limit, tally: tally, elements: elements)
         | 
| 56 240 | 
             
                end
         | 
| 241 | 
            +
                alias_method :glimpse, :tdr
         | 
| 57 242 |  | 
| 243 | 
            +
                # Shortcut for `tdr(:all)``.
         | 
| 244 | 
            +
                #
         | 
| 245 | 
            +
                # @return (see #tdr)
         | 
| 246 | 
            +
                #
         | 
| 247 | 
            +
                def tdra
         | 
| 248 | 
            +
                  puts tdr_str(:all)
         | 
| 249 | 
            +
                end
         | 
| 250 | 
            +
             | 
| 251 | 
            +
                # rubocop:enable Layout/LineLength
         | 
| 252 | 
            +
             | 
| 253 | 
            +
                # Returns some information about self in a transposed style by a string.
         | 
| 254 | 
            +
                #
         | 
| 255 | 
            +
                # @param (see #tdr)
         | 
| 256 | 
            +
                # @option (see #tdr)
         | 
| 257 | 
            +
                # @return [String] TDR style string.
         | 
| 258 | 
            +
                #
         | 
| 58 259 | 
             
                def tdr_str(limit = 10, tally: 5, elements: 5)
         | 
| 59 260 | 
             
                  "#{shape_str}\n#{dataframe_info(limit, tally_level: tally, max_element: elements)}"
         | 
| 60 261 | 
             
                end
         | 
| 61 262 |  | 
| 263 | 
            +
                # Returns html formatted text of self by IRuby::HTML.table.
         | 
| 264 | 
            +
                #
         | 
| 265 | 
            +
                # According to `ENV [“RED_AMBER_OUTPUT_MODE”].upcase`,
         | 
| 266 | 
            +
                # - If it is 'MINIMUM', returns shape by plain text.
         | 
| 267 | 
            +
                # - If it is 'PLAIN', returns `#inspect` value by plain text.
         | 
| 268 | 
            +
                # - If it is 'TDR', returns shape and transposed preview by plain text.
         | 
| 269 | 
            +
                # - If it is 'TABLE' or otherwise, returns Table preview by html format.
         | 
| 270 | 
            +
                #   Default value of the ENV is 'TABLE'.
         | 
| 271 | 
            +
                # @return [String]
         | 
| 272 | 
            +
                #   formatted string.
         | 
| 273 | 
            +
                #
         | 
| 62 274 | 
             
                def to_iruby
         | 
| 63 275 | 
             
                  require 'iruby'
         | 
| 64 276 | 
             
                  return ['text/plain', '(empty DataFrame)'] if empty?
         | 
| @@ -76,14 +288,32 @@ module RedAmber | |
| 76 288 | 
             
                  end
         | 
| 77 289 | 
             
                end
         | 
| 78 290 |  | 
| 79 | 
            -
                 | 
| 80 | 
            -
             | 
| 291 | 
            +
                # Return class and shape of self by a String.
         | 
| 292 | 
            +
                #
         | 
| 293 | 
            +
                # @param with_id [true, false]
         | 
| 294 | 
            +
                #   show id if true.
         | 
| 295 | 
            +
                # @return [String]
         | 
| 296 | 
            +
                #   shape string.
         | 
| 297 | 
            +
                # @example Default (without id)
         | 
| 298 | 
            +
                #   penguins.shape_str
         | 
| 299 | 
            +
                #
         | 
| 300 | 
            +
                #   # =>
         | 
| 301 | 
            +
                #   "RedAmber::DataFrame : 344 x 8 Vectors"
         | 
| 302 | 
            +
                #
         | 
| 303 | 
            +
                # @example With id
         | 
| 304 | 
            +
                #   penguins.shape_str(with_id: true)
         | 
| 305 | 
            +
                #
         | 
| 306 | 
            +
                #   # =>
         | 
| 307 | 
            +
                #   "RedAmber::DataFrame : 344 x 8 Vectors, 0x0000000000003980"
         | 
| 308 | 
            +
                #
         | 
| 81 309 | 
             
                def shape_str(with_id: false)
         | 
| 82 310 | 
             
                  shape_info = empty? ? '(empty)' : "#{size} x #{n_keys} Vector#{pl(n_keys)}"
         | 
| 83 311 | 
             
                  id = with_id ? format(', 0x%016x', object_id) : ''
         | 
| 84 312 | 
             
                  "#{self.class} : #{shape_info}#{id}"
         | 
| 85 313 | 
             
                end
         | 
| 86 314 |  | 
| 315 | 
            +
                private # =====
         | 
| 316 | 
            +
             | 
| 87 317 | 
             
                def dataframe_info(limit, tally_level: 5, max_element: 5)
         | 
| 88 318 | 
             
                  return '' if empty?
         | 
| 89 319 |  | 
| @@ -201,7 +431,7 @@ module RedAmber | |
| 201 431 | 
             
                  df = df.assign do
         | 
| 202 432 | 
             
                    vectors.each_with_object({}) do |v, assigner|
         | 
| 203 433 | 
             
                      vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
         | 
| 204 | 
            -
             | 
| 434 | 
            +
                              .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
         | 
| 205 435 | 
             
                      assigner[v.key] =
         | 
| 206 436 | 
             
                        original.size > head + tail + 1 ? vec.replace(head + 2, ':') : vec
         | 
| 207 437 | 
             
                    end
         | 
| @@ -1,38 +1,141 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 2 |  | 
| 3 3 | 
             
            module RedAmber
         | 
| 4 | 
            -
              #  | 
| 4 | 
            +
              # Mix-ins for the class DataFrame
         | 
| 5 5 | 
             
              module DataFrameIndexable
         | 
| 6 | 
            -
                #  | 
| 7 | 
            -
                 | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 6 | 
            +
                # Returns row index Vector.
         | 
| 7 | 
            +
                #
         | 
| 8 | 
            +
                # @overload indices
         | 
| 9 | 
            +
                #   return @indices as row indices (0...size).
         | 
| 10 | 
            +
                #
         | 
| 11 | 
            +
                #   @return [Vector]
         | 
| 12 | 
            +
                #     a Vector of row indices.
         | 
| 13 | 
            +
                #   @example When `dataframe.size == 5`;
         | 
| 14 | 
            +
                #     dataframe.indices
         | 
| 15 | 
            +
                #
         | 
| 16 | 
            +
                #     # =>
         | 
| 17 | 
            +
                #     #<RedAmber::Vector(:uint8, size=5):0x000000000000fb54>
         | 
| 18 | 
            +
                #     [0, 1, 2, 3, 4]
         | 
| 19 | 
            +
                #
         | 
| 20 | 
            +
                # @overload indices(start)
         | 
| 21 | 
            +
                #   return customized index Vector `(start..).take(size)`.
         | 
| 22 | 
            +
                #
         | 
| 23 | 
            +
                #   @param start [#succ]
         | 
| 24 | 
            +
                #     element of start which have `#succ` method.
         | 
| 25 | 
            +
                #   @return [Vector]
         | 
| 26 | 
            +
                #     a Vector of row indices.
         | 
| 27 | 
            +
                #   @example When `dataframe.size == 5`;
         | 
| 28 | 
            +
                #     dataframe.indices(1)
         | 
| 29 | 
            +
                #
         | 
| 30 | 
            +
                #     # =>
         | 
| 31 | 
            +
                #     #<RedAmber::Vector(:uint8, size=5):0x000000000000fba4>
         | 
| 32 | 
            +
                #     [1, 2, 3, 4, 5]
         | 
| 33 | 
            +
                #
         | 
| 34 | 
            +
                #     dataframe.indices('a')
         | 
| 35 | 
            +
                #     # =>
         | 
| 36 | 
            +
                #     #<RedAmber::Vector(:string, size=5):0x000000000000fbb8>
         | 
| 37 | 
            +
                #     ["a", "b", "c", "d", "e"]
         | 
| 38 | 
            +
                #
         | 
| 39 | 
            +
                def indices(start = 0)
         | 
| 40 | 
            +
                  if start == 0 # rubocop:disable Style/NumericPredicate
         | 
| 41 | 
            +
                    @indices ||= Vector.new(0...size)
         | 
| 42 | 
            +
                  else
         | 
| 43 | 
            +
                    Vector.new((start..).take(size))
         | 
| 44 | 
            +
                  end
         | 
| 13 45 | 
             
                end
         | 
| 46 | 
            +
                alias_method :indexes, :indices
         | 
| 14 47 |  | 
| 48 | 
            +
                # Return sorted indexes of self by a Vector.
         | 
| 49 | 
            +
                #
         | 
| 15 50 | 
             
                # @param sort_keys [Arrow::SortKey]
         | 
| 16 51 | 
             
                #   :key, "key" or "+key" denotes ascending,
         | 
| 17 52 | 
             
                #   "-key" denotes descending order
         | 
| 18 | 
            -
                # @return [RedAmber::Vector] | 
| 53 | 
            +
                # @return [RedAmber::Vector]
         | 
| 54 | 
            +
                #   sorted indices in Vector
         | 
| 55 | 
            +
                # @example
         | 
| 56 | 
            +
                #   df
         | 
| 57 | 
            +
                #
         | 
| 58 | 
            +
                #   # =>
         | 
| 59 | 
            +
                #           x y
         | 
| 60 | 
            +
                #     <uint8> <string>
         | 
| 61 | 
            +
                #     0       3 B
         | 
| 62 | 
            +
                #     1       5 A
         | 
| 63 | 
            +
                #     2       1 B
         | 
| 64 | 
            +
                #     3       4 A
         | 
| 65 | 
            +
                #     4       2 C
         | 
| 66 | 
            +
                #
         | 
| 67 | 
            +
                #   df.sort_indices('x')
         | 
| 68 | 
            +
                #
         | 
| 69 | 
            +
                #   # =>
         | 
| 70 | 
            +
                #   #<RedAmber::Vector(:uint64, size=5):0x0000000000003854>
         | 
| 71 | 
            +
                #   [2, 4, 0, 3, 1]
         | 
| 72 | 
            +
                #
         | 
| 19 73 | 
             
                def sort_indices(*sort_keys)
         | 
| 20 74 | 
             
                  indices = @table.sort_indices(sort_keys.flatten)
         | 
| 21 75 | 
             
                  Vector.create(indices)
         | 
| 22 76 | 
             
                end
         | 
| 23 77 |  | 
| 24 | 
            -
                #  | 
| 78 | 
            +
                # Sort the contents of self.
         | 
| 79 | 
            +
                #
         | 
| 80 | 
            +
                # @param sort_keys [Arrow::SortKey]
         | 
| 81 | 
            +
                #   :key, "key" or "+key" denotes ascending,
         | 
| 82 | 
            +
                #   "-key" denotes descending order
         | 
| 83 | 
            +
                # @return [RedAmber::DataFrame]
         | 
| 84 | 
            +
                #   sorted DataFrame
         | 
| 85 | 
            +
                # @example Sort by a key
         | 
| 86 | 
            +
                #   df
         | 
| 87 | 
            +
                #
         | 
| 88 | 
            +
                #   # =>
         | 
| 89 | 
            +
                #           x y
         | 
| 90 | 
            +
                #     <uint8> <string>
         | 
| 91 | 
            +
                #     0       3 B
         | 
| 92 | 
            +
                #     1       5 A
         | 
| 93 | 
            +
                #     2       1 B
         | 
| 94 | 
            +
                #     3       4 A
         | 
| 95 | 
            +
                #     4       2 C
         | 
| 96 | 
            +
                #
         | 
| 97 | 
            +
                #   df.sort('y')
         | 
| 98 | 
            +
                #
         | 
| 99 | 
            +
                #   # =>
         | 
| 100 | 
            +
                #   #<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000382c>
         | 
| 101 | 
            +
                #           x y
         | 
| 102 | 
            +
                #     <uint8> <string>
         | 
| 103 | 
            +
                #   0       5 A
         | 
| 104 | 
            +
                #   1       4 A
         | 
| 105 | 
            +
                #   2       3 B
         | 
| 106 | 
            +
                #   3       1 B
         | 
| 107 | 
            +
                #   4       2 C
         | 
| 108 | 
            +
                #
         | 
| 109 | 
            +
                # @example Sort by two keys
         | 
| 110 | 
            +
                #   df.sort('y', 'x')
         | 
| 111 | 
            +
                #
         | 
| 112 | 
            +
                #   # =>
         | 
| 113 | 
            +
                #   #<RedAmber::DataFrame : 5 x 2 Vectors, 0x0000000000003890>
         | 
| 114 | 
            +
                #           x y
         | 
| 115 | 
            +
                #     <uint8> <string>
         | 
| 116 | 
            +
                #   0       4 A
         | 
| 117 | 
            +
                #   1       5 A
         | 
| 118 | 
            +
                #   2       1 B
         | 
| 119 | 
            +
                #   3       3 B
         | 
| 120 | 
            +
                #   4       2 C
         | 
| 121 | 
            +
                #
         | 
| 122 | 
            +
                # @example Sort in descending order
         | 
| 123 | 
            +
                #   df.sort('-x')
         | 
| 124 | 
            +
                #
         | 
| 125 | 
            +
                #   # =>
         | 
| 126 | 
            +
                #   #<RedAmber::DataFrame : 5 x 2 Vectors, 0x0000000000003840>
         | 
| 127 | 
            +
                #           x y
         | 
| 128 | 
            +
                #     <uint8> <string>
         | 
| 129 | 
            +
                #   0       5 A
         | 
| 130 | 
            +
                #   1       4 A
         | 
| 131 | 
            +
                #   2       3 B
         | 
| 132 | 
            +
                #   3       2 C
         | 
| 133 | 
            +
                #   4       1 B
         | 
| 134 | 
            +
                #
         | 
| 25 135 | 
             
                def sort(*sort_keys)
         | 
| 26 136 | 
             
                  indices = @table.sort_indices(sort_keys.flatten)
         | 
| 27 137 |  | 
| 28 | 
            -
                   | 
| 29 | 
            -
                end
         | 
| 30 | 
            -
             | 
| 31 | 
            -
                private
         | 
| 32 | 
            -
             | 
| 33 | 
            -
                def new_dataframe_by(index_array)
         | 
| 34 | 
            -
                  t = Arrow::Function.find(:take).execute([@table, index_array]).value
         | 
| 35 | 
            -
                  DataFrame.create(t)
         | 
| 138 | 
            +
                  take(indices)
         | 
| 36 139 | 
             
                end
         | 
| 37 140 | 
             
              end
         | 
| 38 141 | 
             
            end
         | 
| @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 2 |  | 
| 3 3 | 
             
            module RedAmber
         | 
| 4 | 
            -
              #  | 
| 4 | 
            +
              # Mix-in for the class DataFrame
         | 
| 5 5 | 
             
              module DataFrameLoadSave
         | 
| 6 6 | 
             
                # Enable `self.load` as class method of DataFrame
         | 
| 7 7 | 
             
                def self.included(klass)
         | 
| @@ -10,30 +10,98 @@ module RedAmber | |
| 10 10 |  | 
| 11 11 | 
             
                # Enable `self.load` as class method of DataFrame
         | 
| 12 12 | 
             
                module ClassMethods
         | 
| 13 | 
            -
                  # Load DataFrame via Arrow::Table.load
         | 
| 14 | 
            -
                   | 
| 15 | 
            -
             | 
| 13 | 
            +
                  # Load DataFrame via Arrow::Table.load.
         | 
| 14 | 
            +
                  #
         | 
| 15 | 
            +
                  # Format is automatically detected by extension.
         | 
| 16 | 
            +
                  # @!method load(input, format: nil, compression: nil, schema: nil, skip_lines: nil)
         | 
| 17 | 
            +
                  # @param input [path]
         | 
| 18 | 
            +
                  #   source path.
         | 
| 19 | 
            +
                  # @param format [:arrow_file, :batch, :arrows, :arrow_stream, :stream, :csv, :tsv]
         | 
| 20 | 
            +
                  #   format specifier.
         | 
| 21 | 
            +
                  # @param compression [:gzip, nil]
         | 
| 22 | 
            +
                  #   compression type.
         | 
| 23 | 
            +
                  # @param schema [Arrow::Schema]
         | 
| 24 | 
            +
                  #   schema of table.
         | 
| 25 | 
            +
                  # @param skip_lines [Regexp]
         | 
| 26 | 
            +
                  #   pattern of rows to skip.
         | 
| 27 | 
            +
                  # @return [DataFrame]
         | 
| 28 | 
            +
                  #   loaded DataFrame.
         | 
| 29 | 
            +
                  # @example Load a tsv file
         | 
| 30 | 
            +
                  #   DataFrame.load("file.tsv")
         | 
| 31 | 
            +
                  #
         | 
| 32 | 
            +
                  # @example Load a csv.gz file
         | 
| 33 | 
            +
                  #   DataFrame.load("file.csv.gz")
         | 
| 34 | 
            +
                  #
         | 
| 35 | 
            +
                  # @example Load from URI
         | 
| 36 | 
            +
                  #   DataFrame.load(URI("https://some_uri/file.csv"))
         | 
| 37 | 
            +
                  #
         | 
| 38 | 
            +
                  # @example Load from a Buffer
         | 
| 39 | 
            +
                  #   DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv)
         | 
| 40 | 
            +
                  #     name,age
         | 
| 41 | 
            +
                  #     Yasuko,68
         | 
| 42 | 
            +
                  #     Rui,49
         | 
| 43 | 
            +
                  #     Hinata,28
         | 
| 44 | 
            +
                  #   BUFFER
         | 
| 45 | 
            +
                  #
         | 
| 46 | 
            +
                  # @example Load from a Buffer skipping comment line
         | 
| 47 | 
            +
                  #   DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines: /^#/)
         | 
| 48 | 
            +
                  #     # comment
         | 
| 49 | 
            +
                  #     name,age
         | 
| 50 | 
            +
                  #     Yasuko,68
         | 
| 51 | 
            +
                  #     Rui,49
         | 
| 52 | 
            +
                  #     Hinata,28
         | 
| 53 | 
            +
                  #   BUFFER
         | 
| 54 | 
            +
                  #
         | 
| 55 | 
            +
                  def load(input, **options)
         | 
| 56 | 
            +
                    DataFrame.new(Arrow::Table.load(input, options))
         | 
| 16 57 | 
             
                  end
         | 
| 17 58 | 
             
                end
         | 
| 18 59 |  | 
| 19 60 | 
             
                # Save DataFrame
         | 
| 20 61 | 
             
                #
         | 
| 21 | 
            -
                #  | 
| 22 | 
            -
                 | 
| 62 | 
            +
                # Format is automatically detected by extension.
         | 
| 63 | 
            +
                # @!method save(output, format: nil, compression: nil, schema: nil, skip_lines: nil)
         | 
| 64 | 
            +
                # @param output [path]
         | 
| 65 | 
            +
                #   output path.
         | 
| 66 | 
            +
                # @param format [:arrow_file, :batch, :arrows, :arrow_stream, :stream, :csv, :tsv]
         | 
| 67 | 
            +
                #   format specifier.
         | 
| 68 | 
            +
                # @param compression [:gzip, nil]
         | 
| 69 | 
            +
                #   compression type.
         | 
| 70 | 
            +
                # @param schema [Arrow::Schema]
         | 
| 71 | 
            +
                #   schema of table.
         | 
| 72 | 
            +
                # @param skip_lines [Regexp]
         | 
| 73 | 
            +
                #   pattern of rows to skip.
         | 
| 74 | 
            +
                # @return [DataFrame]
         | 
| 75 | 
            +
                #   self.
         | 
| 76 | 
            +
                # @example Save a csv file
         | 
| 77 | 
            +
                #   DataFrame.save("file.csv")
         | 
| 78 | 
            +
                #
         | 
| 79 | 
            +
                # @example Save a csv.gz file
         | 
| 80 | 
            +
                #   DataFrame.save("file.csv.gz")
         | 
| 81 | 
            +
                #
         | 
| 82 | 
            +
                # @example Save an arrow file
         | 
| 83 | 
            +
                #   DataFrame.save("file.arrow")
         | 
| 84 | 
            +
                #
         | 
| 85 | 
            +
                def save(output, **options)
         | 
| 23 86 | 
             
                  @table.save(output, options)
         | 
| 24 87 | 
             
                  self
         | 
| 25 88 | 
             
                end
         | 
| 26 89 |  | 
| 27 90 | 
             
                # Save and reload to cast automatically
         | 
| 28 | 
            -
                # | 
| 91 | 
            +
                # via tsv format file temporally as default.
         | 
| 92 | 
            +
                #
         | 
| 93 | 
            +
                # @param format [Symbol]
         | 
| 94 | 
            +
                #   format specifier.
         | 
| 95 | 
            +
                # @return [DataFrame]
         | 
| 96 | 
            +
                #   reloaded DataFrame.
         | 
| 29 97 | 
             
                #
         | 
| 30 98 | 
             
                # @note experimental feature
         | 
| 31 99 | 
             
                def auto_cast(format: :tsv)
         | 
| 32 100 | 
             
                  return self if empty?
         | 
| 33 101 |  | 
| 34 | 
            -
                   | 
| 35 | 
            -
                  save( | 
| 36 | 
            -
                  DataFrame.load( | 
| 102 | 
            +
                  buffer = Arrow::ResizableBuffer.new(1024)
         | 
| 103 | 
            +
                  save(buffer, format: format)
         | 
| 104 | 
            +
                  DataFrame.load(buffer, format: format)
         | 
| 37 105 | 
             
                end
         | 
| 38 106 | 
             
              end
         | 
| 39 107 | 
             
            end
         |