red_amber 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +7 -1
- data/.rubocop_todo.yml +2 -15
- data/.yardopts +1 -0
- data/CHANGELOG.md +35 -0
- data/Gemfile +1 -0
- data/README.md +206 -16
- data/doc/DataFrame.md +63 -73
- data/doc/Vector.md +25 -0
- data/doc/{47_examples_of_red_amber.ipynb → examples_of_red_amber.ipynb} +693 -111
- data/lib/red_amber/data_frame.rb +26 -8
- data/lib/red_amber/data_frame_displayable.rb +7 -5
- data/lib/red_amber/group.rb +25 -27
- data/lib/red_amber/vector_selectable.rb +2 -0
- data/lib/red_amber/vector_updatable.rb +22 -1
- data/lib/red_amber/version.rb +1 -1
- metadata +4 -3
    
        data/lib/red_amber/data_frame.rb
    CHANGED
    
    | @@ -13,10 +13,7 @@ module RedAmber | |
| 13 13 |  | 
| 14 14 | 
             
                def initialize(*args)
         | 
| 15 15 | 
             
                  @variables = @keys = @vectors = @types = @data_types = nil
         | 
| 16 | 
            -
                   | 
| 17 | 
            -
                  #  [Arrow::Table] == [nil] shows ArgumentError
         | 
| 18 | 
            -
                  #  temporary use yoda condition to workaround
         | 
| 19 | 
            -
                  if args.empty? || args == [[]] || args == [{}] || [nil] == args
         | 
| 16 | 
            +
                  if args.empty? || args[0] == [] || args[0] == {} || args[0].nil?
         | 
| 20 17 | 
             
                    # DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
         | 
| 21 18 | 
             
                    #   returns empty DataFrame
         | 
| 22 19 | 
             
                    @table = Arrow::Table.new({}, [])
         | 
| @@ -34,6 +31,7 @@ module RedAmber | |
| 34 31 | 
             
                        raise DataFrameTypeError, "invalid argument: #{arg}"
         | 
| 35 32 | 
             
                      end
         | 
| 36 33 | 
             
                  end
         | 
| 34 | 
            +
                  name_unnamed_keys
         | 
| 37 35 | 
             
                end
         | 
| 38 36 |  | 
| 39 37 | 
             
                def self.load(path, options = {})
         | 
| @@ -78,12 +76,12 @@ module RedAmber | |
| 78 76 | 
             
                alias_method :var_names, :keys
         | 
| 79 77 |  | 
| 80 78 | 
             
                def key?(key)
         | 
| 81 | 
            -
                   | 
| 79 | 
            +
                  keys.include?(key.to_sym)
         | 
| 82 80 | 
             
                end
         | 
| 83 81 | 
             
                alias_method :has_key?, :key?
         | 
| 84 82 |  | 
| 85 83 | 
             
                def key_index(key)
         | 
| 86 | 
            -
                   | 
| 84 | 
            +
                  keys.find_index(key.to_sym)
         | 
| 87 85 | 
             
                end
         | 
| 88 86 | 
             
                alias_method :find_index, :key_index
         | 
| 89 87 | 
             
                alias_method :index, :key_index
         | 
| @@ -144,8 +142,10 @@ module RedAmber | |
| 144 142 | 
             
                  end
         | 
| 145 143 | 
             
                end
         | 
| 146 144 |  | 
| 147 | 
            -
                def group(*group_keys)
         | 
| 148 | 
            -
                  Group.new(self, group_keys)
         | 
| 145 | 
            +
                def group(*group_keys, &block)
         | 
| 146 | 
            +
                  g = Group.new(self, group_keys)
         | 
| 147 | 
            +
                  g = g.summarize(&block) if block
         | 
| 148 | 
            +
                  g
         | 
| 149 149 | 
             
                end
         | 
| 150 150 |  | 
| 151 151 | 
             
                private
         | 
| @@ -182,5 +182,23 @@ module RedAmber | |
| 182 182 | 
             
                  html = IRuby::HTML.table(converted.to_h, maxrows: 8, maxcols: 15)
         | 
| 183 183 | 
             
                  "#{self.class} <#{size} x #{n_keys} vector#{pl(n_keys)}> #{html}"
         | 
| 184 184 | 
             
                end
         | 
| 185 | 
            +
             | 
| 186 | 
            +
                def name_unnamed_keys
         | 
| 187 | 
            +
                  return unless @table[:'']
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                  # We can't use #keys because it causes mismatch of @table and @keys
         | 
| 190 | 
            +
                  keys = @table.schema.fields.map { |f| f.name.to_sym }
         | 
| 191 | 
            +
                  unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
         | 
| 192 | 
            +
                  fields =
         | 
| 193 | 
            +
                    @table.schema.fields.map do |field|
         | 
| 194 | 
            +
                      if field.name.empty?
         | 
| 195 | 
            +
                        Arrow::Field.new(unnamed, field.data_type)
         | 
| 196 | 
            +
                      else
         | 
| 197 | 
            +
                        field
         | 
| 198 | 
            +
                      end
         | 
| 199 | 
            +
                    end
         | 
| 200 | 
            +
                  schema = Arrow::Schema.new(fields)
         | 
| 201 | 
            +
                  @table = Arrow::Table.new(schema, @table.columns)
         | 
| 202 | 
            +
                end
         | 
| 185 203 | 
             
              end
         | 
| 186 204 | 
             
            end
         | 
| @@ -5,6 +5,8 @@ require 'stringio' | |
| 5 5 | 
             
            module RedAmber
         | 
| 6 6 | 
             
              # mix-ins for the class DataFrame
         | 
| 7 7 | 
             
              module DataFrameDisplayable
         | 
| 8 | 
            +
                INDEX_KEY = :index_key_for_format_table
         | 
| 9 | 
            +
             | 
| 8 10 | 
             
                def to_s
         | 
| 9 11 | 
             
                  return '' if empty?
         | 
| 10 12 |  | 
| @@ -139,7 +141,7 @@ module RedAmber | |
| 139 141 | 
             
                  original = self
         | 
| 140 142 | 
             
                  indices = size > head + tail ? [*0...head, *(size - tail)...size] : [*0...size]
         | 
| 141 143 | 
             
                  df = slice(indices).assign do
         | 
| 142 | 
            -
                    assigner = {  | 
| 144 | 
            +
                    assigner = { INDEX_KEY => indices.map { |i| (i + 1).to_s } }
         | 
| 143 145 | 
             
                    vectors.each_with_object(assigner) do |v, a|
         | 
| 144 146 | 
             
                      a[v.key] = v.to_a.map do |e|
         | 
| 145 147 | 
             
                        if e.nil?
         | 
| @@ -155,12 +157,12 @@ module RedAmber | |
| 155 157 | 
             
                    end
         | 
| 156 158 | 
             
                  end
         | 
| 157 159 |  | 
| 158 | 
            -
                  df = df.pick { [keys | 
| 160 | 
            +
                  df = df.pick { [INDEX_KEY, keys - [INDEX_KEY]] }
         | 
| 159 161 | 
             
                  df = size > head + tail ? df[0, 0, 0...head, 0, -tail..-1] : df[0, 0, 0..-1]
         | 
| 160 162 | 
             
                  df = df.assign do
         | 
| 161 163 | 
             
                    vectors.each_with_object({}) do |v, assigner|
         | 
| 162 | 
            -
                      vec = v.replace(0, v.key.to_s)
         | 
| 163 | 
            -
                             .replace(1, v.key ==  | 
| 164 | 
            +
                      vec = v.replace(0, v.key == INDEX_KEY ? '' : v.key.to_s)
         | 
| 165 | 
            +
                             .replace(1, v.key == INDEX_KEY ? '' : "<#{original[v.key].type}>")
         | 
| 164 166 | 
             
                      assigner[v.key] = size > head + tail ? vec.replace(head + 2, ':') : vec
         | 
| 165 167 | 
             
                    end
         | 
| 166 168 | 
             
                  end
         | 
| @@ -197,7 +199,7 @@ module RedAmber | |
| 197 199 | 
             
                end
         | 
| 198 200 |  | 
| 199 201 | 
             
                def format_for_column(vector, original, width)
         | 
| 200 | 
            -
                  if vector.key !=  | 
| 202 | 
            +
                  if vector.key != INDEX_KEY && !original[vector.key].numeric?
         | 
| 201 203 | 
             
                    "%-#{width}s"
         | 
| 202 204 | 
             
                  else
         | 
| 203 205 | 
             
                    "%#{width}s"
         | 
    
        data/lib/red_amber/group.rb
    CHANGED
    
    | @@ -16,36 +16,30 @@ module RedAmber | |
| 16 16 | 
             
                  @group = @table.group(*@group_keys)
         | 
| 17 17 | 
             
                end
         | 
| 18 18 |  | 
| 19 | 
            -
                 | 
| 20 | 
            -
             | 
| 19 | 
            +
                functions = %i[count sum product mean min max stddev variance]
         | 
| 20 | 
            +
                functions.each do |function|
         | 
| 21 | 
            +
                  define_method(function) do |*summary_keys|
         | 
| 22 | 
            +
                    by(function, summary_keys)
         | 
| 23 | 
            +
                  end
         | 
| 21 24 | 
             
                end
         | 
| 22 25 |  | 
| 23 | 
            -
                def  | 
| 24 | 
            -
                   | 
| 26 | 
            +
                def inspect
         | 
| 27 | 
            +
                  tallys = @dataframe.pick(@group_keys).vectors.map.with_object({}) do |v, h|
         | 
| 28 | 
            +
                    h[v.key] = v.tally
         | 
| 29 | 
            +
                  end
         | 
| 30 | 
            +
                  "#<#{self.class}:#{format('0x%016x', object_id)}\n#{tallys}>"
         | 
| 25 31 | 
             
                end
         | 
| 26 32 |  | 
| 27 | 
            -
                def  | 
| 28 | 
            -
                   | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
             | 
| 32 | 
            -
                   | 
| 33 | 
            -
             | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
                   | 
| 37 | 
            -
                end
         | 
| 38 | 
            -
             | 
| 39 | 
            -
                def max(*summary_keys)
         | 
| 40 | 
            -
                  by(:max, summary_keys)
         | 
| 41 | 
            -
                end
         | 
| 42 | 
            -
             | 
| 43 | 
            -
                def stddev(*summary_keys)
         | 
| 44 | 
            -
                  by(:stddev, summary_keys)
         | 
| 45 | 
            -
                end
         | 
| 46 | 
            -
             | 
| 47 | 
            -
                def variance(*summary_keys)
         | 
| 48 | 
            -
                  by(:variance, summary_keys)
         | 
| 33 | 
            +
                def summarize(&block)
         | 
| 34 | 
            +
                  agg = instance_eval(&block)
         | 
| 35 | 
            +
                  case agg
         | 
| 36 | 
            +
                  when DataFrame
         | 
| 37 | 
            +
                    agg
         | 
| 38 | 
            +
                  when Array
         | 
| 39 | 
            +
                    agg.reduce { |aggregated, df| aggregated.assign(df.to_h) }
         | 
| 40 | 
            +
                  else
         | 
| 41 | 
            +
                    raise GroupArgumentError, "Unknown argument: #{agg}"
         | 
| 42 | 
            +
                  end
         | 
| 49 43 | 
             
                end
         | 
| 50 44 |  | 
| 51 45 | 
             
                private
         | 
| @@ -55,7 +49,11 @@ module RedAmber | |
| 55 49 | 
             
                  d = summary_keys - @dataframe.keys
         | 
| 56 50 | 
             
                  raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?
         | 
| 57 51 |  | 
| 58 | 
            -
                  RedAmber::DataFrame.new(@group.send(func, *summary_keys))
         | 
| 52 | 
            +
                  df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
         | 
| 53 | 
            +
                  df = df[df.keys[-1], df.keys[0...-1]]
         | 
| 54 | 
            +
                  # if counts are the same (no nil included), aggregate count columns.
         | 
| 55 | 
            +
                  df = df[df.keys[0..1]].rename(df.keys[1], :count) if func == :count && df.to_h.values[1..].uniq.size == 1
         | 
| 56 | 
            +
                  df
         | 
| 59 57 | 
             
                end
         | 
| 60 58 | 
             
              end
         | 
| 61 59 | 
             
            end
         | 
| @@ -64,6 +64,8 @@ module RedAmber | |
| 64 64 | 
             
                    return filter_by_array(arg)
         | 
| 65 65 | 
             
                  when Arrow::Array
         | 
| 66 66 | 
             
                    array = arg
         | 
| 67 | 
            +
                  when Range
         | 
| 68 | 
            +
                    array = normalize_element(arg)
         | 
| 67 69 | 
             
                  else
         | 
| 68 70 | 
             
                    unless arg.is_a?(Numeric) || booleans?([arg])
         | 
| 69 71 | 
             
                      raise VectorArgumentError, "Argument must be numeric or boolean: #{args}"
         | 
| @@ -12,7 +12,15 @@ module RedAmber | |
| 12 12 | 
             
                # @param replacer [Array, Vector, Arrow::Array] new data to replace for.
         | 
| 13 13 | 
             
                # @return [Vector] Replaced new Vector
         | 
| 14 14 | 
             
                def replace(args, replacer)
         | 
| 15 | 
            -
                  args = | 
| 15 | 
            +
                  args =
         | 
| 16 | 
            +
                    case args
         | 
| 17 | 
            +
                    when Array
         | 
| 18 | 
            +
                      args
         | 
| 19 | 
            +
                    when Range
         | 
| 20 | 
            +
                      normalize_element(args)
         | 
| 21 | 
            +
                    else
         | 
| 22 | 
            +
                      Array(args)
         | 
| 23 | 
            +
                    end
         | 
| 16 24 | 
             
                  replacer = Array(replacer)
         | 
| 17 25 | 
             
                  return self if args.empty? || args[0].nil?
         | 
| 18 26 |  | 
| @@ -22,6 +30,7 @@ module RedAmber | |
| 22 30 | 
             
                    if vector.boolean?
         | 
| 23 31 | 
             
                      vector
         | 
| 24 32 | 
             
                    elsif vector.numeric?
         | 
| 33 | 
            +
                      replacer.sort_by! { |x| args[replacer.index(x)] } if replacer # rubocop:disable Style/SafeNavigation
         | 
| 25 34 | 
             
                      Vector.new(indices).is_in(vector)
         | 
| 26 35 | 
             
                    else
         | 
| 27 36 | 
             
                      raise VectorArgumentError, "Invalid data type #{args}"
         | 
| @@ -50,6 +59,18 @@ module RedAmber | |
| 50 59 | 
             
                  is_nil.if_else(false, self).invert
         | 
| 51 60 | 
             
                end
         | 
| 52 61 |  | 
| 62 | 
            +
                def shift(amount = 1, fill: nil)
         | 
| 63 | 
            +
                  raise VectorArgumentError, 'Shift amount is too large' if amount.abs > size
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                  if amount.positive?
         | 
| 66 | 
            +
                    replace(amount..-1, self[0...-amount]).replace(0...amount, fill)
         | 
| 67 | 
            +
                  elsif amount.negative?
         | 
| 68 | 
            +
                    replace(0...amount, self[-amount..]).replace(amount..-1, fill)
         | 
| 69 | 
            +
                  else # amount == 0
         | 
| 70 | 
            +
                    self
         | 
| 71 | 
            +
                  end
         | 
| 72 | 
            +
                end
         | 
| 73 | 
            +
             | 
| 53 74 | 
             
                private
         | 
| 54 75 |  | 
| 55 76 | 
             
                # [Ternary]: replace_with(booleans, replacements) => vector
         | 
    
        data/lib/red_amber/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: red_amber
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.1. | 
| 4 | 
            +
              version: 0.1.8
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Hirokazu SUZUKI (heronshoes)
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2022- | 
| 11 | 
            +
            date: 2022-08-03 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: red-arrow
         | 
| @@ -34,6 +34,7 @@ extra_rdoc_files: [] | |
| 34 34 | 
             
            files:
         | 
| 35 35 | 
             
            - ".rubocop.yml"
         | 
| 36 36 | 
             
            - ".rubocop_todo.yml"
         | 
| 37 | 
            +
            - ".yardopts"
         | 
| 37 38 | 
             
            - CHANGELOG.md
         | 
| 38 39 | 
             
            - Gemfile
         | 
| 39 40 | 
             
            - LICENSE
         | 
| @@ -41,10 +42,10 @@ files: | |
| 41 42 | 
             
            - Rakefile
         | 
| 42 43 | 
             
            - benchmark/csv_load_penguins.yml
         | 
| 43 44 | 
             
            - benchmark/drop_nil.yml
         | 
| 44 | 
            -
            - doc/47_examples_of_red_amber.ipynb
         | 
| 45 45 | 
             
            - doc/CODE_OF_CONDUCT.md
         | 
| 46 46 | 
             
            - doc/DataFrame.md
         | 
| 47 47 | 
             
            - doc/Vector.md
         | 
| 48 | 
            +
            - doc/examples_of_red_amber.ipynb
         | 
| 48 49 | 
             
            - doc/image/arrow_table_new.png
         | 
| 49 50 | 
             
            - doc/image/dataframe/assign.png
         | 
| 50 51 | 
             
            - doc/image/dataframe/drop.png
         |