statistical 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rubocop.yml +111 -0
- data/.travis.yml +7 -0
- data/CONTRIBUTING.md +73 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +37 -0
- data/Rakefile +43 -0
- data/bin/console +11 -0
- data/bin/distribution +53 -0
- data/bin/setup +8 -0
- data/data/template/distribution.erb +84 -0
- data/data/template/rng.erb +53 -0
- data/data/template/spec.erb +142 -0
- data/lib/core_extensions.rb +35 -0
- data/lib/statistical.rb +7 -0
- data/lib/statistical/distribution.rb +36 -0
- data/lib/statistical/distribution/bernoulli.rb +29 -0
- data/lib/statistical/distribution/exponential.rb +85 -0
- data/lib/statistical/distribution/laplace.rb +101 -0
- data/lib/statistical/distribution/two_point.rb +144 -0
- data/lib/statistical/distribution/uniform.rb +98 -0
- data/lib/statistical/distribution/uniform_discrete.rb +133 -0
- data/lib/statistical/distribution/weibull.rb +99 -0
- data/lib/statistical/helpers.rb +132 -0
- data/lib/statistical/rng.rb +37 -0
- data/lib/statistical/rng/bernoulli.rb +29 -0
- data/lib/statistical/rng/exponential.rb +56 -0
- data/lib/statistical/rng/laplace.rb +57 -0
- data/lib/statistical/rng/two_point.rb +70 -0
- data/lib/statistical/rng/uniform.rb +62 -0
- data/lib/statistical/rng/uniform_discrete.rb +78 -0
- data/lib/statistical/rng/weibull.rb +58 -0
- data/lib/statistical/version.rb +3 -0
- data/statistical.gemspec +28 -0
- metadata +165 -0
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'statistical/helpers'
|
2
|
+
|
3
|
+
module Statistical
|
4
|
+
module Distribution
|
5
|
+
# Two-Point distribution implementation that uses generic labels for states
|
6
|
+
# that it's random variables can take. The assumptions made would be that
|
7
|
+
# the states are comparable and failure < success in whatever scheme of
|
8
|
+
# comparison that the state objects implement. This defaults to behaving as
|
9
|
+
# the bernoulli distribution
|
10
|
+
#
|
11
|
+
# @note The states used to represent success & failure must be Numeric.
|
12
|
+
# Using it on generic state lables can cause strange outcomes!
|
13
|
+
#
|
14
|
+
# @note state_failure < state_sucesss, for the sake of sanity.
|
15
|
+
#
|
16
|
+
# @author Vaibhav Yenamandra
|
17
|
+
# @attr_reader [Float] p probability of the success state
|
18
|
+
# @attr_reader [Float] q probability of the failure state
|
19
|
+
# @attr_reader [Hash] states Hash with keys :failure, :success to hold
|
20
|
+
# their respective state objects(defaults to 0, 1 respectively)
|
21
|
+
class TwoPoint
|
22
|
+
# This is probably the best but the least descriptive variable name
|
23
|
+
attr_reader :p, :q, :states, :support
|
24
|
+
|
25
|
+
# Returns a new instance of the TwoPoint distribution
|
26
|
+
#
|
27
|
+
# @note The states used to represent success & failure must be Numeric.
|
28
|
+
# Using it on generic state lables can cause strange outcomes!
|
29
|
+
#
|
30
|
+
# @note state_failure < state_sucesss, required to have a sane CDF.
|
31
|
+
#
|
32
|
+
# @param [Float] prob_success The probability of success
|
33
|
+
# @param [Numeric] state_success An object to describe the 1-state of
|
34
|
+
# success
|
35
|
+
# @param [Numeric] state_failure An object to describe the 0-state of
|
36
|
+
# failure
|
37
|
+
def initialize(prob_success = 0.5, state_failure = 0, state_success = 1)
|
38
|
+
if state_failure == state_success
|
39
|
+
raise ArgumentError,
|
40
|
+
'Success & failure must be two distinct states'
|
41
|
+
end
|
42
|
+
|
43
|
+
if state_failure > state_success
|
44
|
+
raise ArgumentError,
|
45
|
+
'Failure state must be smaller that the success state!'
|
46
|
+
end
|
47
|
+
|
48
|
+
unless (state_failure + state_success).is_a?(Numeric)
|
49
|
+
raise ArgumentError,
|
50
|
+
"States must be Numeric! Found #{state_failure.class} and #{state_success.class}"
|
51
|
+
end
|
52
|
+
|
53
|
+
if prob_success > 1 || prob_success < 0
|
54
|
+
raise ArgumentError,
|
55
|
+
"Probabilty of success must be within [0, 1]. Found #{prob_success}"
|
56
|
+
end
|
57
|
+
|
58
|
+
@p = prob_success
|
59
|
+
@q = 1 - prob_success
|
60
|
+
@states = {
|
61
|
+
failure: state_failure,
|
62
|
+
success: state_success
|
63
|
+
}
|
64
|
+
@support = @states.values.sort
|
65
|
+
self
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns value of probability density function at a given state of the
|
69
|
+
# random variate X. Essentially: "what's P(X=x)?"
|
70
|
+
#
|
71
|
+
# @param x [Numeric] The state the the random variable takes. Can be 0, 1
|
72
|
+
# @return [Float] * p if state (x) is 1.
|
73
|
+
# @raise [ArgumentError] if x is not of the states this instance was
|
74
|
+
# initialized with
|
75
|
+
def pdf(x)
|
76
|
+
return @p if @states[:success] == x
|
77
|
+
return @q if @states[:failure] == x
|
78
|
+
return 0
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns value of cumulative density function at a point. Calculated
|
82
|
+
# using some technique that you might want to name
|
83
|
+
#
|
84
|
+
# @param x [Numeric] The state the the random variable takes. Can be 0, 1
|
85
|
+
# @return [Float] The cumulative probability over all of the random
|
86
|
+
# variates states.
|
87
|
+
def cdf(x)
|
88
|
+
return 0 if x < @states[:failure]
|
89
|
+
return @q if x.between?(@states[:failure], @states[:success])
|
90
|
+
return 1 if x >= @states[:success]
|
91
|
+
end
|
92
|
+
|
93
|
+
# Returns value of inverse CDF for a given probability
|
94
|
+
#
|
95
|
+
# @see #p_value
|
96
|
+
#
|
97
|
+
# @param [Numeric] p a value within [0, 1]
|
98
|
+
# @return Inverse CDF for valid p
|
99
|
+
# @raise [RangeError] if p > 1 or p < 0
|
100
|
+
def quantile(p)
|
101
|
+
raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
|
102
|
+
return @states[:failure] if p <= @q
|
103
|
+
return @states[:success] if p > @q
|
104
|
+
end
|
105
|
+
|
106
|
+
# Returns the expected mean value for the calling instance.
|
107
|
+
#
|
108
|
+
# @return Mean of the distribution
|
109
|
+
def mean
|
110
|
+
return @p * @states[:success] + @q * @states[:failure]
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns the expected value of variance for the calling instance.
|
114
|
+
#
|
115
|
+
# @return Variance of the distribution
|
116
|
+
def variance
|
117
|
+
return @p * (@states[:success]**2) + @q * (@states[:failure]**2) -
|
118
|
+
(mean**2)
|
119
|
+
end
|
120
|
+
|
121
|
+
# Compares two distribution instances and returns a boolean outcome
|
122
|
+
# Available publicly as #==
|
123
|
+
#
|
124
|
+
# @note This also compares the states over which the distribution exists
|
125
|
+
# in addition to he other parameters
|
126
|
+
#
|
127
|
+
# @private
|
128
|
+
#
|
129
|
+
# @param other A distribution object (preferred)
|
130
|
+
# @return [Boolean] true if-and-only-if two instances are of the same
|
131
|
+
# class and have the same parameters.
|
132
|
+
def eql?(other)
|
133
|
+
return other.is_a?(self.class) &&
|
134
|
+
@p == other.p &&
|
135
|
+
@states == other.states
|
136
|
+
end
|
137
|
+
|
138
|
+
alias :== :eql?
|
139
|
+
alias :p_value :quantile
|
140
|
+
|
141
|
+
private :eql?
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'statistical/helpers'
|
2
|
+
|
3
|
+
module Statistical
|
4
|
+
module Distribution
|
5
|
+
# An abstraction of the common statistical properties of the uniform
|
6
|
+
# distribution. Provides a PDF, CDF, Inverse-CDF, mean, variance
|
7
|
+
#
|
8
|
+
# @note If initialized with lower and upper parameters in reverse order, it
|
9
|
+
# swaps them. Eg. initializing with lower = 10 and upper = 2 is the same
|
10
|
+
# as lower = 2 and upper = 10, due to the swap during call to new(,)
|
11
|
+
#
|
12
|
+
# @author Vaibhav Yenamandra
|
13
|
+
#
|
14
|
+
# @attr_reader [Numeric] lower The lower bound of the uniform distribution.
|
15
|
+
# Defaults to 0.0.
|
16
|
+
# @attr_reader [Numeric] upper The upper bound of the uniform distrbution.
|
17
|
+
# Defaults to 1.0.
|
18
|
+
class Uniform
|
19
|
+
attr_reader :lower, :upper, :support
|
20
|
+
|
21
|
+
# Returns a new `Statistical::Distribution::Uniform` instance
|
22
|
+
#
|
23
|
+
# @note if given lower > upper, it swaps them internally
|
24
|
+
#
|
25
|
+
# @param [Numeric] start lower bound of the distribution.
|
26
|
+
# @param [Numeric] finish upper bound of the distribution.
|
27
|
+
# @return `Statistical::Distribution::Uniform` instance
|
28
|
+
def initialize(start = 0.0, finish = 1.0)
|
29
|
+
@lower = [start, finish].min
|
30
|
+
@upper = [start, finish].max
|
31
|
+
@support = Domain[@lower, @upper, :closed]
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns value of probability density function at a point
|
35
|
+
#
|
36
|
+
# @param [Numeric] x A real valued point
|
37
|
+
# @return [Float] 1 if x is within [lower, upper], 0 otherwise
|
38
|
+
def pdf(x)
|
39
|
+
return [1.0 / (@upper - @lower), 0.0, 0.0][@support <=> x]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Returns value of cumulative density function at a point
|
43
|
+
#
|
44
|
+
# @param [Numeric] x A real valued point
|
45
|
+
# @return [Float] 1 if x is within [lower, upper], 0 otherwise
|
46
|
+
def cdf(x)
|
47
|
+
return [(x - @lower).fdiv(@upper - @lower), 1.0, 0.0][@support <=> x]
|
48
|
+
end
|
49
|
+
|
50
|
+
# Returns value of inverse CDF for a given probability
|
51
|
+
#
|
52
|
+
# @see #p_value
|
53
|
+
#
|
54
|
+
# @param [Numeric] p a value within [0, 1]
|
55
|
+
# @return [Numeric] Inverse CDF for valid p
|
56
|
+
# @raise [RangeError] if p > 1 or p < 0
|
57
|
+
def quantile(p)
|
58
|
+
raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
|
59
|
+
return @lower + p * (@upper - @lower)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Returns the expected value of mean value for the calling instance.
|
63
|
+
#
|
64
|
+
# @author Vaibhav Yenamandra
|
65
|
+
#
|
66
|
+
# @return [Float] Mean of the distribution
|
67
|
+
def mean
|
68
|
+
return 0.5 * (@upper + @lower)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Returns the expected value of variance for the calling instance.
|
72
|
+
#
|
73
|
+
# @return [Float] Variance of the distribution
|
74
|
+
def variance
|
75
|
+
return ((@upper - @lower)**2) / 12.0
|
76
|
+
end
|
77
|
+
|
78
|
+
# Compares two distribution instances and returns a boolean outcome
|
79
|
+
# Available publicly as #==
|
80
|
+
#
|
81
|
+
# @private
|
82
|
+
#
|
83
|
+
# @param other A distribution object (preferred)
|
84
|
+
# @return [Boolean] true if-and-only-if two instances are of the same
|
85
|
+
# class and have the same parameters.
|
86
|
+
def eql?(other)
|
87
|
+
return other.is_a?(self.class) &&
|
88
|
+
@lower == other.lower &&
|
89
|
+
@upper == other.upper
|
90
|
+
end
|
91
|
+
|
92
|
+
alias :== :eql?
|
93
|
+
alias :p_value :quantile
|
94
|
+
|
95
|
+
private :eql?
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
module Statistical
|
2
|
+
# Module to collect all abstractions of distributions
|
3
|
+
module Distribution
|
4
|
+
# This class abstracts the discrete uniform distribution over a given set
|
5
|
+
# of elements
|
6
|
+
#
|
7
|
+
# @author Vaibhav Yenamandra
|
8
|
+
# @attr_reader [Array, Numeric] support The support set of valid values a
|
9
|
+
# random variate from the distribution can take. Must have at least 1 value
|
10
|
+
class UniformDiscrete
|
11
|
+
attr_reader :count, :support, :lower, :upper
|
12
|
+
# Returns a model for the discrete uniform distribution on all elements
|
13
|
+
# present in the given set of elemets `elems`
|
14
|
+
#
|
15
|
+
# @note The constructor sorts the array of elements given to it, as this
|
16
|
+
# is a key assumption of the discrete uniform distribution. This set
|
17
|
+
# must also be homogenous
|
18
|
+
#
|
19
|
+
# @param [Array] elems The elements over which the distribution exists
|
20
|
+
# in [lower, upper]
|
21
|
+
# @raise [RangeError] if elems isn't one of Array, Range, Fixnum or
|
22
|
+
# Bignum
|
23
|
+
def initialize(elems)
|
24
|
+
case elems
|
25
|
+
when Fixnum, Bignum
|
26
|
+
@support = [elems]
|
27
|
+
when Array
|
28
|
+
@support = elems.sort
|
29
|
+
when Range
|
30
|
+
@support = elems.to_a
|
31
|
+
else
|
32
|
+
raise ArgumentError,
|
33
|
+
"Expected Fixnum, Bignum, Array or Range, found #{elems.class}"
|
34
|
+
end
|
35
|
+
@count = @support.length
|
36
|
+
@lower = @support[0]
|
37
|
+
@upper = @support[-1]
|
38
|
+
self
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns value of probability density function at a point on the real
|
42
|
+
# line
|
43
|
+
#
|
44
|
+
# @param [Fixnum, Bignum] k Point at which pdf is desired
|
45
|
+
# @return [Float] 0 if k doesn't belong to the elements over which the
|
46
|
+
# current instance is distributed. 1/n otherwise where n is number
|
47
|
+
# of elements
|
48
|
+
def pdf(k)
|
49
|
+
return 1.0 / @count if @support.include?(k)
|
50
|
+
return 0.0
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns value of cumulative density function at a point on the real line
|
54
|
+
# Uses a binary search on the support array internally.
|
55
|
+
#
|
56
|
+
# @note This suffers from some floating point comparison issues. Errors
|
57
|
+
# start appearing when dealing with precision > 1E-18
|
58
|
+
#
|
59
|
+
# @param [Fixnum, Bignum] k Point at which cdf value is desired
|
60
|
+
# @return [Float] 0 if k is on the left of the support,
|
61
|
+
# 1 if k on the right support and the
|
62
|
+
# evaluates CDF for any other legal value
|
63
|
+
def cdf(k)
|
64
|
+
return 0.0 if k < @lower
|
65
|
+
return 1.0 if k >= @upper
|
66
|
+
|
67
|
+
# Ruby has a Array#bsearch_index already but it supports find-min mode
|
68
|
+
# What we need is a find-max mode. This can be achieved by reversing
|
69
|
+
# and then searching, but reverse is O(N) so it defeats the purpose
|
70
|
+
low = 0
|
71
|
+
high = @count - 1
|
72
|
+
while low < high
|
73
|
+
mid = (low + high) / 2
|
74
|
+
if @support[mid] <= k
|
75
|
+
low = mid + 1
|
76
|
+
else
|
77
|
+
high = mid
|
78
|
+
end
|
79
|
+
end
|
80
|
+
# This should be true for all i > low
|
81
|
+
return low.fdiv(@count)
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns value of inverse CDF for a given probability.
|
85
|
+
#
|
86
|
+
# @see #p_value
|
87
|
+
#
|
88
|
+
# @param [Numeric] p a value within [0, 1]
|
89
|
+
# @return [Numeric] Returns inverse CDF for valid p
|
90
|
+
# @raise [RangeError] if p > 1 or p < 0
|
91
|
+
def quantile(p)
|
92
|
+
raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
|
93
|
+
return @lower if p.zero?
|
94
|
+
return @upper if (p - 1).zero?
|
95
|
+
return @support[(p * count).ceil - 1]
|
96
|
+
end
|
97
|
+
|
98
|
+
# Returns the mean value for the calling instance. Calculated mean, and
|
99
|
+
# not inferred from simulations
|
100
|
+
#
|
101
|
+
# @return [Float] Mean of the distribution
|
102
|
+
def mean
|
103
|
+
return @support.mean
|
104
|
+
end
|
105
|
+
|
106
|
+
# Returns the expected value of population variance for the calling
|
107
|
+
# instance.
|
108
|
+
#
|
109
|
+
# @return [Float] Variance of the distribution
|
110
|
+
def variance
|
111
|
+
return @support.variance
|
112
|
+
end
|
113
|
+
|
114
|
+
# Compares two distribution instances and returns a boolean
|
115
|
+
# Available publicly as #==
|
116
|
+
#
|
117
|
+
# @private
|
118
|
+
#
|
119
|
+
# @param other A distribution object (preferred)
|
120
|
+
# @return [Boolean] true if-and-only-if two instances are of the same
|
121
|
+
# class and have the same parameters.
|
122
|
+
def eql?(other)
|
123
|
+
return other.is_a?(self.class) &&
|
124
|
+
@support == other.support
|
125
|
+
end
|
126
|
+
|
127
|
+
alias :== :eql?
|
128
|
+
alias :p_value :quantile
|
129
|
+
|
130
|
+
private :eql?
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
require 'statistical/helpers'
|
2
|
+
|
3
|
+
module Statistical
|
4
|
+
module Distribution
|
5
|
+
# Say something useful about this class.
|
6
|
+
#
|
7
|
+
# @note Any caveats you want to talk about go here...
|
8
|
+
#
|
9
|
+
# @author Vaibhav Yenamandra
|
10
|
+
# @attr_reader [Float] scale The distribution's scale parameter
|
11
|
+
# @attr_reader [Float] shape The distribution's shape parameter
|
12
|
+
class Weibull
|
13
|
+
attr_reader :scale, :shape, :support
|
14
|
+
|
15
|
+
# Returns a new `Statistical::Distribution::Weibull` instance
|
16
|
+
#
|
17
|
+
# @param [Numeric] scale The distribution's scale parameter
|
18
|
+
# @param [Numeric] shape The distribution's shape parameter
|
19
|
+
# @return `Statistical::Distribution::Weibull` instance
|
20
|
+
def initialize(scale = 1, shape = 1)
|
21
|
+
@scale = scale.to_f
|
22
|
+
@shape = shape.to_f
|
23
|
+
@support = Domain[0.0, Float::INFINITY, :right_open]
|
24
|
+
self
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns value of probability density function at a point. Calculated
|
28
|
+
# using some technique that you might want to name
|
29
|
+
#
|
30
|
+
# @param [Numeric] x A real valued point
|
31
|
+
# @return
|
32
|
+
def pdf(x)
|
33
|
+
return [(@shape / @scale) * ((x / @scale)**(@shape - 1)) *
|
34
|
+
Math.exp(-((x / @scale)**@shape)),
|
35
|
+
0.0,
|
36
|
+
0.0
|
37
|
+
][@support <=> x]
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns value of cumulative density function at a point. Calculated
|
41
|
+
# using some technique that you might want to name
|
42
|
+
#
|
43
|
+
# @param [Numeric] x A real valued point
|
44
|
+
# @return
|
45
|
+
def cdf(x)
|
46
|
+
return [1 - Math.exp(-((x / @scale)**@shape)),
|
47
|
+
1.0,
|
48
|
+
0.0
|
49
|
+
][@support <=> x]
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns value of inverse CDF for a given probability
|
53
|
+
#
|
54
|
+
# @see #p_value
|
55
|
+
#
|
56
|
+
# @param [Numeric] p a value within [0, 1]
|
57
|
+
# @return Inverse CDF for valid p
|
58
|
+
# @raise [RangeError] if p > 1 or p < 0
|
59
|
+
def quantile(p)
|
60
|
+
raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
|
61
|
+
return @scale * ((-Math.log(1 - p))**(1 / @shape))
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns the expected value of mean for the calling instance.
|
65
|
+
#
|
66
|
+
# @return Mean of the distribution
|
67
|
+
def mean
|
68
|
+
return @scale * Math.gamma(1 + 1 / @shape)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Returns the expected value of variance for the calling instance.
|
72
|
+
#
|
73
|
+
# @return Variance of the distribution
|
74
|
+
def variance
|
75
|
+
m = mean
|
76
|
+
(@scale * @scale) * Math.gamma(1 + 2 / @shape) - (m * m)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Compares two distribution instances and returns a boolean outcome
|
80
|
+
# Available publicly as #==
|
81
|
+
#
|
82
|
+
# @private
|
83
|
+
#
|
84
|
+
# @param other A distribution object (preferred)
|
85
|
+
# @return [Boolean] true if-and-only-if two instances are of the same
|
86
|
+
# class and have the same parameters.
|
87
|
+
def eql?(other)
|
88
|
+
return other.is_a?(self.class) &&
|
89
|
+
other.shape == @shape &&
|
90
|
+
other.scale == @scale
|
91
|
+
end
|
92
|
+
|
93
|
+
alias :== :eql?
|
94
|
+
alias :p_value :quantile
|
95
|
+
|
96
|
+
private :eql?
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|