RubyGems - red_amber - Versions diffs - 0.3.0 → 0.4.1 - Mend

red_amber 0.3.0 → 0.4.1

Files changed (42) hide show

checksums.yaml +4 -4
data/.rubocop.yml +56 -22
data/.yardopts +2 -0
data/CHANGELOG.md +178 -0
data/Gemfile +1 -1
data/LICENSE +1 -1
data/README.md +29 -30
data/benchmark/basic.yml +7 -7
data/benchmark/combine.yml +3 -3
data/benchmark/dataframe.yml +15 -9
data/benchmark/group.yml +6 -6
data/benchmark/reshape.yml +6 -6
data/benchmark/vector.yml +6 -3
data/doc/DataFrame.md +32 -12
data/doc/DataFrame_Comparison.md +65 -0
data/doc/SubFrames.md +11 -0
data/doc/Vector.md +207 -1
data/doc/yard-templates/default/fulldoc/html/css/common.css +6 -0
data/lib/red_amber/data_frame.rb +454 -85
data/lib/red_amber/data_frame_combinable.rb +609 -115
data/lib/red_amber/data_frame_displayable.rb +313 -34
data/lib/red_amber/data_frame_indexable.rb +122 -19
data/lib/red_amber/data_frame_loadsave.rb +78 -10
data/lib/red_amber/data_frame_reshaping.rb +184 -14
data/lib/red_amber/data_frame_selectable.rb +623 -70
data/lib/red_amber/data_frame_variable_operation.rb +452 -35
data/lib/red_amber/group.rb +186 -22
data/lib/red_amber/helper.rb +74 -14
data/lib/red_amber/refinements.rb +26 -6
data/lib/red_amber/subframes.rb +1101 -0
data/lib/red_amber/vector.rb +362 -11
data/lib/red_amber/vector_aggregation.rb +312 -0
data/lib/red_amber/vector_binary_element_wise.rb +506 -0
data/lib/red_amber/vector_selectable.rb +265 -23
data/lib/red_amber/vector_unary_element_wise.rb +529 -0
data/lib/red_amber/vector_updatable.rb +278 -34
data/lib/red_amber/version.rb +2 -1
data/lib/red_amber.rb +13 -1
data/red_amber.gemspec +2 -2
metadata +13 -8
data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
data/lib/red_amber/vector_functions.rb +0 -242

data/lib/red_amber/vector_aggregation.rb ADDED Viewed

@@ -0,0 +1,312 @@
+# frozen_string_literal: true
+# Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
+# reference: https://arrow.apache.org/docs/cpp/compute.html
+module RedAmber
+  # Representing a series of data.
+  class Vector
+    class << self
+      private
+      # @!macro [attach] define_unary_aggregation
+      #   [Unary aggregation function] Returns a scalar.
+      #
+      def define_unary_aggregation(function)
+        define_method(function) do |**options|
+          datum = exec_func_unary(function, options)
+          get_scalar(datum)
+        end
+      end
+    end
+    # Not implemented in red-arrow yet:
+    # Arrow::Indexoptions, Arrow::ModeOptions, Arrow::TDigestOptions
+    # @!macro scalar_aggregate_options
+    #   @param skip_nulls [true, false]
+    #     If true, nil values are ignored.
+    #     Otherwise, if any value is nil, emit nil.
+    #   @param min_count [Integer]
+    #     if less than this many non-nil values are observed, emit nil.
+    #     If skip_nulls is false, this option is not respected.
+    # @!macro count_options
+    #   @param mode [:only_valid, :only_null, :all]
+    #     control count aggregate kernel behavior.
+    #     - only_valid: count only non-nil values.
+    #     - only_null: count only nil.
+    #     - all: count both.
+    # @!macro variance_options
+    #   @param ddof [0, 1]
+    #     Control Delta Degrees of Freedom (ddof) of Variance and Stddev kernel.
+    #     The divisor used in calculations is N - ddof, where N is the number
+    #     of elements. By default, ddof is zero, and population variance or stddev
+    #     is returned.
+    #   @macro scalar_aggregate_options
+    # Test whether all elements in self are evaluated to true.
+    #
+    # @!method all(skip_nulls: true, min_count: 1)
+    # @macro scalar_aggregate_options
+    # @return [true, false]
+    #   `all` result of self.
+    # @example Default.
+    #   Vector.new(true, true, nil).all # => true
+    #
+    # @example Skip nils.
+    #   Vector.new(true, true, nil).all(skip_nulls: false) # => false
+    #
+    define_unary_aggregation :all
+    alias_method :all?, :all
+    # Test whether any elements in self are evaluated to true.
+    #
+    # @!method any(skip_nulls: true, min_count: 1)
+    # @macro scalar_aggregate_options
+    # @return [true, false]
+    #   `any` result of self.
+    # @example Default.
+    #   Vector.new(true, false, nil).any # => true
+    #
+    define_unary_aggregation :any
+    alias_method :any?, :any
+    # Approximate median of a numeric Vector with T-Digest algorithm.
+    #
+    # @!method approximate_median(skip_nulls: true, min_count: 1)
+    # @macro scalar_aggregate_options
+    # @return [Float]
+    #   median of self.
+    #   A nil is returned if there is no valid data point.
+    #
+    define_unary_aggregation :approximate_median
+    alias_method :median, :approximate_median
+    # Count the number of nil / non-nil values.
+    #
+    # @!method count(mode: :non_null)
+    # @macro count_options
+    # @return [Integer] count of self.
+    # @example Count only non-nil (default)
+    #   Vector.new(1.0, -2.0, Float::NAN, nil).count # => 3
+    #
+    # @example Count nil only.
+    #   Vector.new(1.0, -2.0, Float::NAN, nil).count(mode: :only_null) # => 1
+    #
+    # @example Count both non-nil and nil.
+    #   Vector.new(1.0, -2.0, Float::NAN, nil).count(mode: :all) # => 4
+    #
+    define_unary_aggregation :count
+    # Count the number of unique values.
+    #
+    # @!method count_distinct(mode: :only_valid)
+    # @macro count_options
+    # @return [Integer]
+    #   unique count of self.
+    # @example
+    #   vector = Vector.new(1, 1.0, nil, nil, Float::NAN, Float::NAN)
+    #   vector
+    #
+    #   # =>
+    #   #<RedAmber::Vector(:double, size=6):0x000000000000d390>
+    #   [1.0, 1.0, nil, nil, NaN, NaN]
+    #
+    #   # Float::NANs are counted as 1.
+    #   vector.count_uniq # => 2
+    #
+    #   # nils are counted as 1.
+    #   vector.count_uniq(mode: :only_null) # => 1
+    #
+    #   vector.count_uniq(mode: :all) # => 3
+    #
+    define_unary_aggregation :count_distinct
+    alias_method :count_uniq, :count_distinct
+    # Compute maximum value of self.
+    #
+    # @!method max(skip_nulls: true, min_count: 1)
+    # @macro scalar_aggregate_options
+    # @return [Numeric]
+    #   maximum value of self.
+    #
+    define_unary_aggregation :max
+    # Compute mean value of self.
+    #
+    # @!method mean(skip_nulls: true, min_count: 1)
+    # @macro scalar_aggregate_options
+    # @return [Numeric]
+    #   mean of self.
+    #
+    define_unary_aggregation :mean
+    # Compute minimum value of self.
+    #
+    # @!method min(skip_nulls: true, min_count: 1)
+    # @macro scalar_aggregate_options
+    # @return [Numeric]
+    #   minimum of self.
+    #
+    define_unary_aggregation :min
+    # Compute the min and max value of self.
+    #
+    # @!method min_max(skip_nulls: true, min_count: 1)
+    # @macro scalar_aggregate_options
+    # @return [Array<min, max>]
+    #   min and max of self in an Array.
+    #
+    define_unary_aggregation :min_max
+    # Compute product value of self.
+    #
+    # @note Self must be a numeric Vector.
+    # @!method product(skip_nulls: true, min_count: 1)
+    # @macro scalar_aggregate_options
+    # @return [Numeric]
+    #   product of self.
+    #
+    define_unary_aggregation :product
+    # Calculate standard deviation of self.
+    #
+    # @note Self must be a numeric Vector.
+    # @!method stddev(ddof: 0, skip_nulls: true, min_count: 1)
+    # @macro variance_options
+    # @return [Float]
+    #   standard deviation of self. Biased (ddof=0) by default.
+    #
+    define_unary_aggregation :stddev
+    # Calculate unbiased standard deviation of self.
+    #
+    # @note Self must be a numeric Vector.
+    # @!method sd(ddof: 1, skip_nulls: true, min_count: 1)
+    # @macro variance_options
+    # @return [Float]
+    #   standard deviation of self. Unviased (ddof=1)by default.
+    #
+    def sd
+      stddev(ddof: 1)
+    end
+    alias_method :std, :sd
+    # Compute sum of self.
+    #
+    # @note Self must be a numeric Vector.
+    # @!method sum(skip_nulls: true, min_count: 1)
+    # @macro scalar_aggregate_options
+    # @return [Numeric]
+    #   sum of self.
+    #
+    define_unary_aggregation :sum
+    # Calculate variance of self.
+    #
+    # @note Self must be a numeric Vector.
+    # @!method variance(ddof: 0, skip_nulls: true, min_count: 1)
+    # @macro variance_options
+    #
+    # @return [Float]
+    #   unviased (ddof=1) standard deviation of self by default.
+    #
+    # @return [Float]
+    #   variance of self. Biased (ddof=0) by default.
+    #
+    define_unary_aggregation :variance
+    # Calculate unbiased variance of self.
+    #
+    # @note self must be a numeric Vector.
+    # @!method unbiased_variance(ddof: 1, skip_nulls: true, min_count: 1)
+    # @macro variance_options
+    # @return [Float]
+    #   variance of self. Unviased (ddof=1) by default.
+    #
+    def unbiased_variance
+      variance(ddof: 1)
+    end
+    alias_method :var, :unbiased_variance
+    # @!macro quantile_interpolation
+    #   @param interpolation [Symbol]
+    #     specifies interpolation method to use,
+    #     when the quantile lies between the data i and j.
+    #     - Default value is :linear, which returns i + (j - i) * fraction.
+    #     - lower: returns i.
+    #     - higher: returns j.
+    #     - nearest: returns i or j, whichever is closer.
+    #     - midpoint: returns (i + j) / 2.
+    # Returns a quantile value.
+    # - 0.5 quantile (median) is returned by default.
+    # - Or return quantile for specified probability (prob).
+    # - If quantile lies between two data points, interpolated value is
+    #   returned based on selected interpolation method.
+    # - Nils and NaNs are ignored.
+    # - Nil is returned if there are no valid data point.
+    #
+    # @param prob [Float]
+    #   probability.
+    # @macro quantile_interpolation
+    # @macro scalar_aggregate_options
+    # @return [Float]
+    #   quantile of self.
+    # @example
+    #   penguins[:bill_depth_mm].quantile
+    #
+    #   # =>
+    #   17.3 # defaultis prob = 0.5
+    #
+    def quantile(prob = 0.5, interpolation: :linear, skip_nulls: true, min_count: 0)
+      unless (0..1).cover? prob
+        raise VectorArgumentError,
+              "Invalid: probability #{prob} must be between 0 and 1"
+      end
+      datum = find(:quantile).execute([data],
+                                      q: prob,
+                                      interpolation: interpolation,
+                                      skip_nulls: skip_nulls,
+                                      min_count: min_count)
+      datum.value.to_a.first
+    end
+    # Return quantiles in a DataFrame
+    #
+    # @param probs [Array]
+    #   Array of probabilities. Default probabilities are 0.0, 0.25, 0.5 0.75, 1.0 .
+    # @macro quantile_interpolation
+    # @macro scalar_aggregate_options
+    # @return [DataFrame]
+    #   quantiles of self.
+    # @example
+    #   penguins[:bill_depth_mm].quantiles([0.05, 0.95])
+    #
+    #   # =>
+    #   #<RedAmber::DataFrame : 2 x 2 Vectors, 0x000000000000fb2c>
+    #        probs quantiles
+    #     <double>  <double>
+    #   0     0.05      13.9
+    #   1     0.95      20.0
+    #
+    def quantiles(probs = [0.0, 0.25, 0.5, 0.75, 1.0],
+                  interpolation: :linear, skip_nulls: true, min_count: 0)
+      if probs.empty? || !probs.all? { |q| (0..1).cover?(q) }
+        raise VectorArgumentError, "Invarid probavilities #{probs}"
+      end
+      DataFrame.new(
+        probs: probs,
+        quantiles: probs.map do |q|
+          quantile(q,
+                   interpolation: interpolation, skip_nulls: skip_nulls,
+                   min_count: min_count)
+        end
+      )
+    end
+  end
+end