statistical 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,144 @@
1
+ require 'statistical/helpers'
2
+
3
+ module Statistical
4
+ module Distribution
5
+ # Two-Point distribution implementation that uses generic labels for states
6
+ # that it's random variables can take. The assumptions made would be that
7
+ # the states are comparable and failure < success in whatever scheme of
8
+ # comparison that the state objects implement. This defaults to behaving as
9
+ # the bernoulli distribution
10
+ #
11
+ # @note The states used to represent success & failure must be Numeric.
12
+ # Using it on generic state lables can cause strange outcomes!
13
+ #
14
+ # @note state_failure < state_sucesss, for the sake of sanity.
15
+ #
16
+ # @author Vaibhav Yenamandra
17
+ # @attr_reader [Float] p probability of the success state
18
+ # @attr_reader [Float] q probability of the failure state
19
+ # @attr_reader [Hash] states Hash with keys :failure, :success to hold
20
+ # their respective state objects(defaults to 0, 1 respectively)
21
+ class TwoPoint
22
+ # This is probably the best but the least descriptive variable name
23
+ attr_reader :p, :q, :states, :support
24
+
25
+ # Returns a new instance of the TwoPoint distribution
26
+ #
27
+ # @note The states used to represent success & failure must be Numeric.
28
+ # Using it on generic state lables can cause strange outcomes!
29
+ #
30
+ # @note state_failure < state_sucesss, required to have a sane CDF.
31
+ #
32
+ # @param [Float] prob_success The probability of success
33
+ # @param [Numeric] state_success An object to describe the 1-state of
34
+ # success
35
+ # @param [Numeric] state_failure An object to describe the 0-state of
36
+ # failure
37
+ def initialize(prob_success = 0.5, state_failure = 0, state_success = 1)
38
+ if state_failure == state_success
39
+ raise ArgumentError,
40
+ 'Success & failure must be two distinct states'
41
+ end
42
+
43
+ if state_failure > state_success
44
+ raise ArgumentError,
45
+ 'Failure state must be smaller that the success state!'
46
+ end
47
+
48
+ unless (state_failure + state_success).is_a?(Numeric)
49
+ raise ArgumentError,
50
+ "States must be Numeric! Found #{state_failure.class} and #{state_success.class}"
51
+ end
52
+
53
+ if prob_success > 1 || prob_success < 0
54
+ raise ArgumentError,
55
+ "Probabilty of success must be within [0, 1]. Found #{prob_success}"
56
+ end
57
+
58
+ @p = prob_success
59
+ @q = 1 - prob_success
60
+ @states = {
61
+ failure: state_failure,
62
+ success: state_success
63
+ }
64
+ @support = @states.values.sort
65
+ self
66
+ end
67
+
68
+ # Returns value of probability density function at a given state of the
69
+ # random variate X. Essentially: "what's P(X=x)?"
70
+ #
71
+ # @param x [Numeric] The state the the random variable takes. Can be 0, 1
72
+ # @return [Float] * p if state (x) is 1.
73
+ # @raise [ArgumentError] if x is not of the states this instance was
74
+ # initialized with
75
+ def pdf(x)
76
+ return @p if @states[:success] == x
77
+ return @q if @states[:failure] == x
78
+ return 0
79
+ end
80
+
81
+ # Returns value of cumulative density function at a point. Calculated
82
+ # using some technique that you might want to name
83
+ #
84
+ # @param x [Numeric] The state the the random variable takes. Can be 0, 1
85
+ # @return [Float] The cumulative probability over all of the random
86
+ # variates states.
87
+ def cdf(x)
88
+ return 0 if x < @states[:failure]
89
+ return @q if x.between?(@states[:failure], @states[:success])
90
+ return 1 if x >= @states[:success]
91
+ end
92
+
93
+ # Returns value of inverse CDF for a given probability
94
+ #
95
+ # @see #p_value
96
+ #
97
+ # @param [Numeric] p a value within [0, 1]
98
+ # @return Inverse CDF for valid p
99
+ # @raise [RangeError] if p > 1 or p < 0
100
+ def quantile(p)
101
+ raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
102
+ return @states[:failure] if p <= @q
103
+ return @states[:success] if p > @q
104
+ end
105
+
106
+ # Returns the expected mean value for the calling instance.
107
+ #
108
+ # @return Mean of the distribution
109
+ def mean
110
+ return @p * @states[:success] + @q * @states[:failure]
111
+ end
112
+
113
+ # Returns the expected value of variance for the calling instance.
114
+ #
115
+ # @return Variance of the distribution
116
+ def variance
117
+ return @p * (@states[:success]**2) + @q * (@states[:failure]**2) -
118
+ (mean**2)
119
+ end
120
+
121
+ # Compares two distribution instances and returns a boolean outcome
122
+ # Available publicly as #==
123
+ #
124
+ # @note This also compares the states over which the distribution exists
125
+ # in addition to he other parameters
126
+ #
127
+ # @private
128
+ #
129
+ # @param other A distribution object (preferred)
130
+ # @return [Boolean] true if-and-only-if two instances are of the same
131
+ # class and have the same parameters.
132
+ def eql?(other)
133
+ return other.is_a?(self.class) &&
134
+ @p == other.p &&
135
+ @states == other.states
136
+ end
137
+
138
+ alias :== :eql?
139
+ alias :p_value :quantile
140
+
141
+ private :eql?
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,98 @@
1
+ require 'statistical/helpers'
2
+
3
+ module Statistical
4
+ module Distribution
5
+ # An abstraction of the common statistical properties of the uniform
6
+ # distribution. Provides a PDF, CDF, Inverse-CDF, mean, variance
7
+ #
8
+ # @note If initialized with lower and upper parameters in reverse order, it
9
+ # swaps them. Eg. initializing with lower = 10 and upper = 2 is the same
10
+ # as lower = 2 and upper = 10, due to the swap during call to new(,)
11
+ #
12
+ # @author Vaibhav Yenamandra
13
+ #
14
+ # @attr_reader [Numeric] lower The lower bound of the uniform distribution.
15
+ # Defaults to 0.0.
16
+ # @attr_reader [Numeric] upper The upper bound of the uniform distrbution.
17
+ # Defaults to 1.0.
18
+ class Uniform
19
+ attr_reader :lower, :upper, :support
20
+
21
+ # Returns a new `Statistical::Distribution::Uniform` instance
22
+ #
23
+ # @note if given lower > upper, it swaps them internally
24
+ #
25
+ # @param [Numeric] start lower bound of the distribution.
26
+ # @param [Numeric] finish upper bound of the distribution.
27
+ # @return `Statistical::Distribution::Uniform` instance
28
+ def initialize(start = 0.0, finish = 1.0)
29
+ @lower = [start, finish].min
30
+ @upper = [start, finish].max
31
+ @support = Domain[@lower, @upper, :closed]
32
+ end
33
+
34
+ # Returns value of probability density function at a point
35
+ #
36
+ # @param [Numeric] x A real valued point
37
+ # @return [Float] 1 if x is within [lower, upper], 0 otherwise
38
+ def pdf(x)
39
+ return [1.0 / (@upper - @lower), 0.0, 0.0][@support <=> x]
40
+ end
41
+
42
+ # Returns value of cumulative density function at a point
43
+ #
44
+ # @param [Numeric] x A real valued point
45
+ # @return [Float] 1 if x is within [lower, upper], 0 otherwise
46
+ def cdf(x)
47
+ return [(x - @lower).fdiv(@upper - @lower), 1.0, 0.0][@support <=> x]
48
+ end
49
+
50
+ # Returns value of inverse CDF for a given probability
51
+ #
52
+ # @see #p_value
53
+ #
54
+ # @param [Numeric] p a value within [0, 1]
55
+ # @return [Numeric] Inverse CDF for valid p
56
+ # @raise [RangeError] if p > 1 or p < 0
57
+ def quantile(p)
58
+ raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
59
+ return @lower + p * (@upper - @lower)
60
+ end
61
+
62
+ # Returns the expected value of mean value for the calling instance.
63
+ #
64
+ # @author Vaibhav Yenamandra
65
+ #
66
+ # @return [Float] Mean of the distribution
67
+ def mean
68
+ return 0.5 * (@upper + @lower)
69
+ end
70
+
71
+ # Returns the expected value of variance for the calling instance.
72
+ #
73
+ # @return [Float] Variance of the distribution
74
+ def variance
75
+ return ((@upper - @lower)**2) / 12.0
76
+ end
77
+
78
+ # Compares two distribution instances and returns a boolean outcome
79
+ # Available publicly as #==
80
+ #
81
+ # @private
82
+ #
83
+ # @param other A distribution object (preferred)
84
+ # @return [Boolean] true if-and-only-if two instances are of the same
85
+ # class and have the same parameters.
86
+ def eql?(other)
87
+ return other.is_a?(self.class) &&
88
+ @lower == other.lower &&
89
+ @upper == other.upper
90
+ end
91
+
92
+ alias :== :eql?
93
+ alias :p_value :quantile
94
+
95
+ private :eql?
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,133 @@
1
+ module Statistical
2
+ # Module to collect all abstractions of distributions
3
+ module Distribution
4
+ # This class abstracts the discrete uniform distribution over a given set
5
+ # of elements
6
+ #
7
+ # @author Vaibhav Yenamandra
8
+ # @attr_reader [Array, Numeric] support The support set of valid values a
9
+ # random variate from the distribution can take. Must have at least 1 value
10
+ class UniformDiscrete
11
+ attr_reader :count, :support, :lower, :upper
12
+ # Returns a model for the discrete uniform distribution on all elements
13
+ # present in the given set of elemets `elems`
14
+ #
15
+ # @note The constructor sorts the array of elements given to it, as this
16
+ # is a key assumption of the discrete uniform distribution. This set
17
+ # must also be homogenous
18
+ #
19
+ # @param [Array] elems The elements over which the distribution exists
20
+ # in [lower, upper]
21
+ # @raise [RangeError] if elems isn't one of Array, Range, Fixnum or
22
+ # Bignum
23
+ def initialize(elems)
24
+ case elems
25
+ when Fixnum, Bignum
26
+ @support = [elems]
27
+ when Array
28
+ @support = elems.sort
29
+ when Range
30
+ @support = elems.to_a
31
+ else
32
+ raise ArgumentError,
33
+ "Expected Fixnum, Bignum, Array or Range, found #{elems.class}"
34
+ end
35
+ @count = @support.length
36
+ @lower = @support[0]
37
+ @upper = @support[-1]
38
+ self
39
+ end
40
+
41
+ # Returns value of probability density function at a point on the real
42
+ # line
43
+ #
44
+ # @param [Fixnum, Bignum] k Point at which pdf is desired
45
+ # @return [Float] 0 if k doesn't belong to the elements over which the
46
+ # current instance is distributed. 1/n otherwise where n is number
47
+ # of elements
48
+ def pdf(k)
49
+ return 1.0 / @count if @support.include?(k)
50
+ return 0.0
51
+ end
52
+
53
+ # Returns value of cumulative density function at a point on the real line
54
+ # Uses a binary search on the support array internally.
55
+ #
56
+ # @note This suffers from some floating point comparison issues. Errors
57
+ # start appearing when dealing with precision > 1E-18
58
+ #
59
+ # @param [Fixnum, Bignum] k Point at which cdf value is desired
60
+ # @return [Float] 0 if k is on the left of the support,
61
+ # 1 if k on the right support and the
62
+ # evaluates CDF for any other legal value
63
+ def cdf(k)
64
+ return 0.0 if k < @lower
65
+ return 1.0 if k >= @upper
66
+
67
+ # Ruby has a Array#bsearch_index already but it supports find-min mode
68
+ # What we need is a find-max mode. This can be achieved by reversing
69
+ # and then searching, but reverse is O(N) so it defeats the purpose
70
+ low = 0
71
+ high = @count - 1
72
+ while low < high
73
+ mid = (low + high) / 2
74
+ if @support[mid] <= k
75
+ low = mid + 1
76
+ else
77
+ high = mid
78
+ end
79
+ end
80
+ # This should be true for all i > low
81
+ return low.fdiv(@count)
82
+ end
83
+
84
+ # Returns value of inverse CDF for a given probability.
85
+ #
86
+ # @see #p_value
87
+ #
88
+ # @param [Numeric] p a value within [0, 1]
89
+ # @return [Numeric] Returns inverse CDF for valid p
90
+ # @raise [RangeError] if p > 1 or p < 0
91
+ def quantile(p)
92
+ raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
93
+ return @lower if p.zero?
94
+ return @upper if (p - 1).zero?
95
+ return @support[(p * count).ceil - 1]
96
+ end
97
+
98
+ # Returns the mean value for the calling instance. Calculated mean, and
99
+ # not inferred from simulations
100
+ #
101
+ # @return [Float] Mean of the distribution
102
+ def mean
103
+ return @support.mean
104
+ end
105
+
106
+ # Returns the expected value of population variance for the calling
107
+ # instance.
108
+ #
109
+ # @return [Float] Variance of the distribution
110
+ def variance
111
+ return @support.variance
112
+ end
113
+
114
+ # Compares two distribution instances and returns a boolean
115
+ # Available publicly as #==
116
+ #
117
+ # @private
118
+ #
119
+ # @param other A distribution object (preferred)
120
+ # @return [Boolean] true if-and-only-if two instances are of the same
121
+ # class and have the same parameters.
122
+ def eql?(other)
123
+ return other.is_a?(self.class) &&
124
+ @support == other.support
125
+ end
126
+
127
+ alias :== :eql?
128
+ alias :p_value :quantile
129
+
130
+ private :eql?
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,99 @@
1
+ require 'statistical/helpers'
2
+
3
+ module Statistical
4
+ module Distribution
5
+ # Say something useful about this class.
6
+ #
7
+ # @note Any caveats you want to talk about go here...
8
+ #
9
+ # @author Vaibhav Yenamandra
10
+ # @attr_reader [Float] scale The distribution's scale parameter
11
+ # @attr_reader [Float] shape The distribution's shape parameter
12
+ class Weibull
13
+ attr_reader :scale, :shape, :support
14
+
15
+ # Returns a new `Statistical::Distribution::Weibull` instance
16
+ #
17
+ # @param [Numeric] scale The distribution's scale parameter
18
+ # @param [Numeric] shape The distribution's shape parameter
19
+ # @return `Statistical::Distribution::Weibull` instance
20
+ def initialize(scale = 1, shape = 1)
21
+ @scale = scale.to_f
22
+ @shape = shape.to_f
23
+ @support = Domain[0.0, Float::INFINITY, :right_open]
24
+ self
25
+ end
26
+
27
+ # Returns value of probability density function at a point. Calculated
28
+ # using some technique that you might want to name
29
+ #
30
+ # @param [Numeric] x A real valued point
31
+ # @return
32
+ def pdf(x)
33
+ return [(@shape / @scale) * ((x / @scale)**(@shape - 1)) *
34
+ Math.exp(-((x / @scale)**@shape)),
35
+ 0.0,
36
+ 0.0
37
+ ][@support <=> x]
38
+ end
39
+
40
+ # Returns value of cumulative density function at a point. Calculated
41
+ # using some technique that you might want to name
42
+ #
43
+ # @param [Numeric] x A real valued point
44
+ # @return
45
+ def cdf(x)
46
+ return [1 - Math.exp(-((x / @scale)**@shape)),
47
+ 1.0,
48
+ 0.0
49
+ ][@support <=> x]
50
+ end
51
+
52
+ # Returns value of inverse CDF for a given probability
53
+ #
54
+ # @see #p_value
55
+ #
56
+ # @param [Numeric] p a value within [0, 1]
57
+ # @return Inverse CDF for valid p
58
+ # @raise [RangeError] if p > 1 or p < 0
59
+ def quantile(p)
60
+ raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
61
+ return @scale * ((-Math.log(1 - p))**(1 / @shape))
62
+ end
63
+
64
+ # Returns the expected value of mean for the calling instance.
65
+ #
66
+ # @return Mean of the distribution
67
+ def mean
68
+ return @scale * Math.gamma(1 + 1 / @shape)
69
+ end
70
+
71
+ # Returns the expected value of variance for the calling instance.
72
+ #
73
+ # @return Variance of the distribution
74
+ def variance
75
+ m = mean
76
+ (@scale * @scale) * Math.gamma(1 + 2 / @shape) - (m * m)
77
+ end
78
+
79
+ # Compares two distribution instances and returns a boolean outcome
80
+ # Available publicly as #==
81
+ #
82
+ # @private
83
+ #
84
+ # @param other A distribution object (preferred)
85
+ # @return [Boolean] true if-and-only-if two instances are of the same
86
+ # class and have the same parameters.
87
+ def eql?(other)
88
+ return other.is_a?(self.class) &&
89
+ other.shape == @shape &&
90
+ other.scale == @scale
91
+ end
92
+
93
+ alias :== :eql?
94
+ alias :p_value :quantile
95
+
96
+ private :eql?
97
+ end
98
+ end
99
+ end