statistical 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,144 @@
1
+ require 'statistical/helpers'
2
+
3
+ module Statistical
4
+ module Distribution
5
+ # Two-Point distribution implementation that uses generic labels for states
6
+ # that it's random variables can take. The assumptions made would be that
7
+ # the states are comparable and failure < success in whatever scheme of
8
+ # comparison that the state objects implement. This defaults to behaving as
9
+ # the bernoulli distribution
10
+ #
11
+ # @note The states used to represent success & failure must be Numeric.
12
+ # Using it on generic state lables can cause strange outcomes!
13
+ #
14
+ # @note state_failure < state_sucesss, for the sake of sanity.
15
+ #
16
+ # @author Vaibhav Yenamandra
17
+ # @attr_reader [Float] p probability of the success state
18
+ # @attr_reader [Float] q probability of the failure state
19
+ # @attr_reader [Hash] states Hash with keys :failure, :success to hold
20
+ # their respective state objects(defaults to 0, 1 respectively)
21
+ class TwoPoint
22
+ # This is probably the best but the least descriptive variable name
23
+ attr_reader :p, :q, :states, :support
24
+
25
+ # Returns a new instance of the TwoPoint distribution
26
+ #
27
+ # @note The states used to represent success & failure must be Numeric.
28
+ # Using it on generic state lables can cause strange outcomes!
29
+ #
30
+ # @note state_failure < state_sucesss, required to have a sane CDF.
31
+ #
32
+ # @param [Float] prob_success The probability of success
33
+ # @param [Numeric] state_success An object to describe the 1-state of
34
+ # success
35
+ # @param [Numeric] state_failure An object to describe the 0-state of
36
+ # failure
37
+ def initialize(prob_success = 0.5, state_failure = 0, state_success = 1)
38
+ if state_failure == state_success
39
+ raise ArgumentError,
40
+ 'Success & failure must be two distinct states'
41
+ end
42
+
43
+ if state_failure > state_success
44
+ raise ArgumentError,
45
+ 'Failure state must be smaller that the success state!'
46
+ end
47
+
48
+ unless (state_failure + state_success).is_a?(Numeric)
49
+ raise ArgumentError,
50
+ "States must be Numeric! Found #{state_failure.class} and #{state_success.class}"
51
+ end
52
+
53
+ if prob_success > 1 || prob_success < 0
54
+ raise ArgumentError,
55
+ "Probabilty of success must be within [0, 1]. Found #{prob_success}"
56
+ end
57
+
58
+ @p = prob_success
59
+ @q = 1 - prob_success
60
+ @states = {
61
+ failure: state_failure,
62
+ success: state_success
63
+ }
64
+ @support = @states.values.sort
65
+ self
66
+ end
67
+
68
+ # Returns value of probability density function at a given state of the
69
+ # random variate X. Essentially: "what's P(X=x)?"
70
+ #
71
+ # @param x [Numeric] The state the the random variable takes. Can be 0, 1
72
+ # @return [Float] * p if state (x) is 1.
73
+ # @raise [ArgumentError] if x is not of the states this instance was
74
+ # initialized with
75
+ def pdf(x)
76
+ return @p if @states[:success] == x
77
+ return @q if @states[:failure] == x
78
+ return 0
79
+ end
80
+
81
+ # Returns value of cumulative density function at a point. Calculated
82
+ # using some technique that you might want to name
83
+ #
84
+ # @param x [Numeric] The state the the random variable takes. Can be 0, 1
85
+ # @return [Float] The cumulative probability over all of the random
86
+ # variates states.
87
+ def cdf(x)
88
+ return 0 if x < @states[:failure]
89
+ return @q if x.between?(@states[:failure], @states[:success])
90
+ return 1 if x >= @states[:success]
91
+ end
92
+
93
+ # Returns value of inverse CDF for a given probability
94
+ #
95
+ # @see #p_value
96
+ #
97
+ # @param [Numeric] p a value within [0, 1]
98
+ # @return Inverse CDF for valid p
99
+ # @raise [RangeError] if p > 1 or p < 0
100
+ def quantile(p)
101
+ raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
102
+ return @states[:failure] if p <= @q
103
+ return @states[:success] if p > @q
104
+ end
105
+
106
+ # Returns the expected mean value for the calling instance.
107
+ #
108
+ # @return Mean of the distribution
109
+ def mean
110
+ return @p * @states[:success] + @q * @states[:failure]
111
+ end
112
+
113
+ # Returns the expected value of variance for the calling instance.
114
+ #
115
+ # @return Variance of the distribution
116
+ def variance
117
+ return @p * (@states[:success]**2) + @q * (@states[:failure]**2) -
118
+ (mean**2)
119
+ end
120
+
121
+ # Compares two distribution instances and returns a boolean outcome
122
+ # Available publicly as #==
123
+ #
124
+ # @note This also compares the states over which the distribution exists
125
+ # in addition to he other parameters
126
+ #
127
+ # @private
128
+ #
129
+ # @param other A distribution object (preferred)
130
+ # @return [Boolean] true if-and-only-if two instances are of the same
131
+ # class and have the same parameters.
132
+ def eql?(other)
133
+ return other.is_a?(self.class) &&
134
+ @p == other.p &&
135
+ @states == other.states
136
+ end
137
+
138
+ alias :== :eql?
139
+ alias :p_value :quantile
140
+
141
+ private :eql?
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,98 @@
1
+ require 'statistical/helpers'
2
+
3
+ module Statistical
4
+ module Distribution
5
+ # An abstraction of the common statistical properties of the uniform
6
+ # distribution. Provides a PDF, CDF, Inverse-CDF, mean, variance
7
+ #
8
+ # @note If initialized with lower and upper parameters in reverse order, it
9
+ # swaps them. Eg. initializing with lower = 10 and upper = 2 is the same
10
+ # as lower = 2 and upper = 10, due to the swap during call to new(,)
11
+ #
12
+ # @author Vaibhav Yenamandra
13
+ #
14
+ # @attr_reader [Numeric] lower The lower bound of the uniform distribution.
15
+ # Defaults to 0.0.
16
+ # @attr_reader [Numeric] upper The upper bound of the uniform distrbution.
17
+ # Defaults to 1.0.
18
+ class Uniform
19
+ attr_reader :lower, :upper, :support
20
+
21
+ # Returns a new `Statistical::Distribution::Uniform` instance
22
+ #
23
+ # @note if given lower > upper, it swaps them internally
24
+ #
25
+ # @param [Numeric] start lower bound of the distribution.
26
+ # @param [Numeric] finish upper bound of the distribution.
27
+ # @return `Statistical::Distribution::Uniform` instance
28
+ def initialize(start = 0.0, finish = 1.0)
29
+ @lower = [start, finish].min
30
+ @upper = [start, finish].max
31
+ @support = Domain[@lower, @upper, :closed]
32
+ end
33
+
34
+ # Returns value of probability density function at a point
35
+ #
36
+ # @param [Numeric] x A real valued point
37
+ # @return [Float] 1 if x is within [lower, upper], 0 otherwise
38
+ def pdf(x)
39
+ return [1.0 / (@upper - @lower), 0.0, 0.0][@support <=> x]
40
+ end
41
+
42
+ # Returns value of cumulative density function at a point
43
+ #
44
+ # @param [Numeric] x A real valued point
45
+ # @return [Float] 1 if x is within [lower, upper], 0 otherwise
46
+ def cdf(x)
47
+ return [(x - @lower).fdiv(@upper - @lower), 1.0, 0.0][@support <=> x]
48
+ end
49
+
50
+ # Returns value of inverse CDF for a given probability
51
+ #
52
+ # @see #p_value
53
+ #
54
+ # @param [Numeric] p a value within [0, 1]
55
+ # @return [Numeric] Inverse CDF for valid p
56
+ # @raise [RangeError] if p > 1 or p < 0
57
+ def quantile(p)
58
+ raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
59
+ return @lower + p * (@upper - @lower)
60
+ end
61
+
62
+ # Returns the expected value of mean value for the calling instance.
63
+ #
64
+ # @author Vaibhav Yenamandra
65
+ #
66
+ # @return [Float] Mean of the distribution
67
+ def mean
68
+ return 0.5 * (@upper + @lower)
69
+ end
70
+
71
+ # Returns the expected value of variance for the calling instance.
72
+ #
73
+ # @return [Float] Variance of the distribution
74
+ def variance
75
+ return ((@upper - @lower)**2) / 12.0
76
+ end
77
+
78
+ # Compares two distribution instances and returns a boolean outcome
79
+ # Available publicly as #==
80
+ #
81
+ # @private
82
+ #
83
+ # @param other A distribution object (preferred)
84
+ # @return [Boolean] true if-and-only-if two instances are of the same
85
+ # class and have the same parameters.
86
+ def eql?(other)
87
+ return other.is_a?(self.class) &&
88
+ @lower == other.lower &&
89
+ @upper == other.upper
90
+ end
91
+
92
+ alias :== :eql?
93
+ alias :p_value :quantile
94
+
95
+ private :eql?
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,133 @@
1
+ module Statistical
2
+ # Module to collect all abstractions of distributions
3
+ module Distribution
4
+ # This class abstracts the discrete uniform distribution over a given set
5
+ # of elements
6
+ #
7
+ # @author Vaibhav Yenamandra
8
+ # @attr_reader [Array, Numeric] support The support set of valid values a
9
+ # random variate from the distribution can take. Must have at least 1 value
10
+ class UniformDiscrete
11
+ attr_reader :count, :support, :lower, :upper
12
+ # Returns a model for the discrete uniform distribution on all elements
13
+ # present in the given set of elemets `elems`
14
+ #
15
+ # @note The constructor sorts the array of elements given to it, as this
16
+ # is a key assumption of the discrete uniform distribution. This set
17
+ # must also be homogenous
18
+ #
19
+ # @param [Array] elems The elements over which the distribution exists
20
+ # in [lower, upper]
21
+ # @raise [RangeError] if elems isn't one of Array, Range, Fixnum or
22
+ # Bignum
23
+ def initialize(elems)
24
+ case elems
25
+ when Fixnum, Bignum
26
+ @support = [elems]
27
+ when Array
28
+ @support = elems.sort
29
+ when Range
30
+ @support = elems.to_a
31
+ else
32
+ raise ArgumentError,
33
+ "Expected Fixnum, Bignum, Array or Range, found #{elems.class}"
34
+ end
35
+ @count = @support.length
36
+ @lower = @support[0]
37
+ @upper = @support[-1]
38
+ self
39
+ end
40
+
41
+ # Returns value of probability density function at a point on the real
42
+ # line
43
+ #
44
+ # @param [Fixnum, Bignum] k Point at which pdf is desired
45
+ # @return [Float] 0 if k doesn't belong to the elements over which the
46
+ # current instance is distributed. 1/n otherwise where n is number
47
+ # of elements
48
+ def pdf(k)
49
+ return 1.0 / @count if @support.include?(k)
50
+ return 0.0
51
+ end
52
+
53
+ # Returns value of cumulative density function at a point on the real line
54
+ # Uses a binary search on the support array internally.
55
+ #
56
+ # @note This suffers from some floating point comparison issues. Errors
57
+ # start appearing when dealing with precision > 1E-18
58
+ #
59
+ # @param [Fixnum, Bignum] k Point at which cdf value is desired
60
+ # @return [Float] 0 if k is on the left of the support,
61
+ # 1 if k on the right support and the
62
+ # evaluates CDF for any other legal value
63
+ def cdf(k)
64
+ return 0.0 if k < @lower
65
+ return 1.0 if k >= @upper
66
+
67
+ # Ruby has a Array#bsearch_index already but it supports find-min mode
68
+ # What we need is a find-max mode. This can be achieved by reversing
69
+ # and then searching, but reverse is O(N) so it defeats the purpose
70
+ low = 0
71
+ high = @count - 1
72
+ while low < high
73
+ mid = (low + high) / 2
74
+ if @support[mid] <= k
75
+ low = mid + 1
76
+ else
77
+ high = mid
78
+ end
79
+ end
80
+ # This should be true for all i > low
81
+ return low.fdiv(@count)
82
+ end
83
+
84
+ # Returns value of inverse CDF for a given probability.
85
+ #
86
+ # @see #p_value
87
+ #
88
+ # @param [Numeric] p a value within [0, 1]
89
+ # @return [Numeric] Returns inverse CDF for valid p
90
+ # @raise [RangeError] if p > 1 or p < 0
91
+ def quantile(p)
92
+ raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
93
+ return @lower if p.zero?
94
+ return @upper if (p - 1).zero?
95
+ return @support[(p * count).ceil - 1]
96
+ end
97
+
98
+ # Returns the mean value for the calling instance. Calculated mean, and
99
+ # not inferred from simulations
100
+ #
101
+ # @return [Float] Mean of the distribution
102
+ def mean
103
+ return @support.mean
104
+ end
105
+
106
+ # Returns the expected value of population variance for the calling
107
+ # instance.
108
+ #
109
+ # @return [Float] Variance of the distribution
110
+ def variance
111
+ return @support.variance
112
+ end
113
+
114
+ # Compares two distribution instances and returns a boolean
115
+ # Available publicly as #==
116
+ #
117
+ # @private
118
+ #
119
+ # @param other A distribution object (preferred)
120
+ # @return [Boolean] true if-and-only-if two instances are of the same
121
+ # class and have the same parameters.
122
+ def eql?(other)
123
+ return other.is_a?(self.class) &&
124
+ @support == other.support
125
+ end
126
+
127
+ alias :== :eql?
128
+ alias :p_value :quantile
129
+
130
+ private :eql?
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,99 @@
1
+ require 'statistical/helpers'
2
+
3
+ module Statistical
4
+ module Distribution
5
+ # Say something useful about this class.
6
+ #
7
+ # @note Any caveats you want to talk about go here...
8
+ #
9
+ # @author Vaibhav Yenamandra
10
+ # @attr_reader [Float] scale The distribution's scale parameter
11
+ # @attr_reader [Float] shape The distribution's shape parameter
12
+ class Weibull
13
+ attr_reader :scale, :shape, :support
14
+
15
+ # Returns a new `Statistical::Distribution::Weibull` instance
16
+ #
17
+ # @param [Numeric] scale The distribution's scale parameter
18
+ # @param [Numeric] shape The distribution's shape parameter
19
+ # @return `Statistical::Distribution::Weibull` instance
20
+ def initialize(scale = 1, shape = 1)
21
+ @scale = scale.to_f
22
+ @shape = shape.to_f
23
+ @support = Domain[0.0, Float::INFINITY, :right_open]
24
+ self
25
+ end
26
+
27
+ # Returns value of probability density function at a point. Calculated
28
+ # using some technique that you might want to name
29
+ #
30
+ # @param [Numeric] x A real valued point
31
+ # @return
32
+ def pdf(x)
33
+ return [(@shape / @scale) * ((x / @scale)**(@shape - 1)) *
34
+ Math.exp(-((x / @scale)**@shape)),
35
+ 0.0,
36
+ 0.0
37
+ ][@support <=> x]
38
+ end
39
+
40
+ # Returns value of cumulative density function at a point. Calculated
41
+ # using some technique that you might want to name
42
+ #
43
+ # @param [Numeric] x A real valued point
44
+ # @return
45
+ def cdf(x)
46
+ return [1 - Math.exp(-((x / @scale)**@shape)),
47
+ 1.0,
48
+ 0.0
49
+ ][@support <=> x]
50
+ end
51
+
52
+ # Returns value of inverse CDF for a given probability
53
+ #
54
+ # @see #p_value
55
+ #
56
+ # @param [Numeric] p a value within [0, 1]
57
+ # @return Inverse CDF for valid p
58
+ # @raise [RangeError] if p > 1 or p < 0
59
+ def quantile(p)
60
+ raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
61
+ return @scale * ((-Math.log(1 - p))**(1 / @shape))
62
+ end
63
+
64
+ # Returns the expected value of mean for the calling instance.
65
+ #
66
+ # @return Mean of the distribution
67
+ def mean
68
+ return @scale * Math.gamma(1 + 1 / @shape)
69
+ end
70
+
71
+ # Returns the expected value of variance for the calling instance.
72
+ #
73
+ # @return Variance of the distribution
74
+ def variance
75
+ m = mean
76
+ (@scale * @scale) * Math.gamma(1 + 2 / @shape) - (m * m)
77
+ end
78
+
79
+ # Compares two distribution instances and returns a boolean outcome
80
+ # Available publicly as #==
81
+ #
82
+ # @private
83
+ #
84
+ # @param other A distribution object (preferred)
85
+ # @return [Boolean] true if-and-only-if two instances are of the same
86
+ # class and have the same parameters.
87
+ def eql?(other)
88
+ return other.is_a?(self.class) &&
89
+ other.shape == @shape &&
90
+ other.scale == @scale
91
+ end
92
+
93
+ alias :== :eql?
94
+ alias :p_value :quantile
95
+
96
+ private :eql?
97
+ end
98
+ end
99
+ end