statistical 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rubocop.yml +111 -0
- data/.travis.yml +7 -0
- data/CONTRIBUTING.md +73 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +37 -0
- data/Rakefile +43 -0
- data/bin/console +11 -0
- data/bin/distribution +53 -0
- data/bin/setup +8 -0
- data/data/template/distribution.erb +84 -0
- data/data/template/rng.erb +53 -0
- data/data/template/spec.erb +142 -0
- data/lib/core_extensions.rb +35 -0
- data/lib/statistical.rb +7 -0
- data/lib/statistical/distribution.rb +36 -0
- data/lib/statistical/distribution/bernoulli.rb +29 -0
- data/lib/statistical/distribution/exponential.rb +85 -0
- data/lib/statistical/distribution/laplace.rb +101 -0
- data/lib/statistical/distribution/two_point.rb +144 -0
- data/lib/statistical/distribution/uniform.rb +98 -0
- data/lib/statistical/distribution/uniform_discrete.rb +133 -0
- data/lib/statistical/distribution/weibull.rb +99 -0
- data/lib/statistical/helpers.rb +132 -0
- data/lib/statistical/rng.rb +37 -0
- data/lib/statistical/rng/bernoulli.rb +29 -0
- data/lib/statistical/rng/exponential.rb +56 -0
- data/lib/statistical/rng/laplace.rb +57 -0
- data/lib/statistical/rng/two_point.rb +70 -0
- data/lib/statistical/rng/uniform.rb +62 -0
- data/lib/statistical/rng/uniform_discrete.rb +78 -0
- data/lib/statistical/rng/weibull.rb +58 -0
- data/lib/statistical/version.rb +3 -0
- data/statistical.gemspec +28 -0
- metadata +165 -0
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'statistical/helpers'
|
2
|
+
|
3
|
+
module Statistical
|
4
|
+
module Distribution
|
5
|
+
# Two-Point distribution implementation that uses generic labels for states
|
6
|
+
# that it's random variables can take. The assumptions made would be that
|
7
|
+
# the states are comparable and failure < success in whatever scheme of
|
8
|
+
# comparison that the state objects implement. This defaults to behaving as
|
9
|
+
# the bernoulli distribution
|
10
|
+
#
|
11
|
+
# @note The states used to represent success & failure must be Numeric.
|
12
|
+
# Using it on generic state lables can cause strange outcomes!
|
13
|
+
#
|
14
|
+
# @note state_failure < state_sucesss, for the sake of sanity.
|
15
|
+
#
|
16
|
+
# @author Vaibhav Yenamandra
|
17
|
+
# @attr_reader [Float] p probability of the success state
|
18
|
+
# @attr_reader [Float] q probability of the failure state
|
19
|
+
# @attr_reader [Hash] states Hash with keys :failure, :success to hold
|
20
|
+
# their respective state objects(defaults to 0, 1 respectively)
|
21
|
+
class TwoPoint
|
22
|
+
# This is probably the best but the least descriptive variable name
|
23
|
+
attr_reader :p, :q, :states, :support
|
24
|
+
|
25
|
+
# Returns a new instance of the TwoPoint distribution
|
26
|
+
#
|
27
|
+
# @note The states used to represent success & failure must be Numeric.
|
28
|
+
# Using it on generic state lables can cause strange outcomes!
|
29
|
+
#
|
30
|
+
# @note state_failure < state_sucesss, required to have a sane CDF.
|
31
|
+
#
|
32
|
+
# @param [Float] prob_success The probability of success
|
33
|
+
# @param [Numeric] state_success An object to describe the 1-state of
|
34
|
+
# success
|
35
|
+
# @param [Numeric] state_failure An object to describe the 0-state of
|
36
|
+
# failure
|
37
|
+
def initialize(prob_success = 0.5, state_failure = 0, state_success = 1)
|
38
|
+
if state_failure == state_success
|
39
|
+
raise ArgumentError,
|
40
|
+
'Success & failure must be two distinct states'
|
41
|
+
end
|
42
|
+
|
43
|
+
if state_failure > state_success
|
44
|
+
raise ArgumentError,
|
45
|
+
'Failure state must be smaller that the success state!'
|
46
|
+
end
|
47
|
+
|
48
|
+
unless (state_failure + state_success).is_a?(Numeric)
|
49
|
+
raise ArgumentError,
|
50
|
+
"States must be Numeric! Found #{state_failure.class} and #{state_success.class}"
|
51
|
+
end
|
52
|
+
|
53
|
+
if prob_success > 1 || prob_success < 0
|
54
|
+
raise ArgumentError,
|
55
|
+
"Probabilty of success must be within [0, 1]. Found #{prob_success}"
|
56
|
+
end
|
57
|
+
|
58
|
+
@p = prob_success
|
59
|
+
@q = 1 - prob_success
|
60
|
+
@states = {
|
61
|
+
failure: state_failure,
|
62
|
+
success: state_success
|
63
|
+
}
|
64
|
+
@support = @states.values.sort
|
65
|
+
self
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns value of probability density function at a given state of the
|
69
|
+
# random variate X. Essentially: "what's P(X=x)?"
|
70
|
+
#
|
71
|
+
# @param x [Numeric] The state the the random variable takes. Can be 0, 1
|
72
|
+
# @return [Float] * p if state (x) is 1.
|
73
|
+
# @raise [ArgumentError] if x is not of the states this instance was
|
74
|
+
# initialized with
|
75
|
+
def pdf(x)
|
76
|
+
return @p if @states[:success] == x
|
77
|
+
return @q if @states[:failure] == x
|
78
|
+
return 0
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns value of cumulative density function at a point. Calculated
|
82
|
+
# using some technique that you might want to name
|
83
|
+
#
|
84
|
+
# @param x [Numeric] The state the the random variable takes. Can be 0, 1
|
85
|
+
# @return [Float] The cumulative probability over all of the random
|
86
|
+
# variates states.
|
87
|
+
def cdf(x)
|
88
|
+
return 0 if x < @states[:failure]
|
89
|
+
return @q if x.between?(@states[:failure], @states[:success])
|
90
|
+
return 1 if x >= @states[:success]
|
91
|
+
end
|
92
|
+
|
93
|
+
# Returns value of inverse CDF for a given probability
|
94
|
+
#
|
95
|
+
# @see #p_value
|
96
|
+
#
|
97
|
+
# @param [Numeric] p a value within [0, 1]
|
98
|
+
# @return Inverse CDF for valid p
|
99
|
+
# @raise [RangeError] if p > 1 or p < 0
|
100
|
+
def quantile(p)
|
101
|
+
raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
|
102
|
+
return @states[:failure] if p <= @q
|
103
|
+
return @states[:success] if p > @q
|
104
|
+
end
|
105
|
+
|
106
|
+
# Returns the expected mean value for the calling instance.
|
107
|
+
#
|
108
|
+
# @return Mean of the distribution
|
109
|
+
def mean
|
110
|
+
return @p * @states[:success] + @q * @states[:failure]
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns the expected value of variance for the calling instance.
|
114
|
+
#
|
115
|
+
# @return Variance of the distribution
|
116
|
+
def variance
|
117
|
+
return @p * (@states[:success]**2) + @q * (@states[:failure]**2) -
|
118
|
+
(mean**2)
|
119
|
+
end
|
120
|
+
|
121
|
+
# Compares two distribution instances and returns a boolean outcome
|
122
|
+
# Available publicly as #==
|
123
|
+
#
|
124
|
+
# @note This also compares the states over which the distribution exists
|
125
|
+
# in addition to he other parameters
|
126
|
+
#
|
127
|
+
# @private
|
128
|
+
#
|
129
|
+
# @param other A distribution object (preferred)
|
130
|
+
# @return [Boolean] true if-and-only-if two instances are of the same
|
131
|
+
# class and have the same parameters.
|
132
|
+
def eql?(other)
|
133
|
+
return other.is_a?(self.class) &&
|
134
|
+
@p == other.p &&
|
135
|
+
@states == other.states
|
136
|
+
end
|
137
|
+
|
138
|
+
alias :== :eql?
|
139
|
+
alias :p_value :quantile
|
140
|
+
|
141
|
+
private :eql?
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'statistical/helpers'
|
2
|
+
|
3
|
+
module Statistical
|
4
|
+
module Distribution
|
5
|
+
# An abstraction of the common statistical properties of the uniform
|
6
|
+
# distribution. Provides a PDF, CDF, Inverse-CDF, mean, variance
|
7
|
+
#
|
8
|
+
# @note If initialized with lower and upper parameters in reverse order, it
|
9
|
+
# swaps them. Eg. initializing with lower = 10 and upper = 2 is the same
|
10
|
+
# as lower = 2 and upper = 10, due to the swap during call to new(,)
|
11
|
+
#
|
12
|
+
# @author Vaibhav Yenamandra
|
13
|
+
#
|
14
|
+
# @attr_reader [Numeric] lower The lower bound of the uniform distribution.
|
15
|
+
# Defaults to 0.0.
|
16
|
+
# @attr_reader [Numeric] upper The upper bound of the uniform distrbution.
|
17
|
+
# Defaults to 1.0.
|
18
|
+
class Uniform
|
19
|
+
attr_reader :lower, :upper, :support
|
20
|
+
|
21
|
+
# Returns a new `Statistical::Distribution::Uniform` instance
|
22
|
+
#
|
23
|
+
# @note if given lower > upper, it swaps them internally
|
24
|
+
#
|
25
|
+
# @param [Numeric] start lower bound of the distribution.
|
26
|
+
# @param [Numeric] finish upper bound of the distribution.
|
27
|
+
# @return `Statistical::Distribution::Uniform` instance
|
28
|
+
def initialize(start = 0.0, finish = 1.0)
|
29
|
+
@lower = [start, finish].min
|
30
|
+
@upper = [start, finish].max
|
31
|
+
@support = Domain[@lower, @upper, :closed]
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns value of probability density function at a point
|
35
|
+
#
|
36
|
+
# @param [Numeric] x A real valued point
|
37
|
+
# @return [Float] 1 if x is within [lower, upper], 0 otherwise
|
38
|
+
def pdf(x)
|
39
|
+
return [1.0 / (@upper - @lower), 0.0, 0.0][@support <=> x]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Returns value of cumulative density function at a point
|
43
|
+
#
|
44
|
+
# @param [Numeric] x A real valued point
|
45
|
+
# @return [Float] 1 if x is within [lower, upper], 0 otherwise
|
46
|
+
def cdf(x)
|
47
|
+
return [(x - @lower).fdiv(@upper - @lower), 1.0, 0.0][@support <=> x]
|
48
|
+
end
|
49
|
+
|
50
|
+
# Returns value of inverse CDF for a given probability
|
51
|
+
#
|
52
|
+
# @see #p_value
|
53
|
+
#
|
54
|
+
# @param [Numeric] p a value within [0, 1]
|
55
|
+
# @return [Numeric] Inverse CDF for valid p
|
56
|
+
# @raise [RangeError] if p > 1 or p < 0
|
57
|
+
def quantile(p)
|
58
|
+
raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
|
59
|
+
return @lower + p * (@upper - @lower)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Returns the expected value of mean value for the calling instance.
|
63
|
+
#
|
64
|
+
# @author Vaibhav Yenamandra
|
65
|
+
#
|
66
|
+
# @return [Float] Mean of the distribution
|
67
|
+
def mean
|
68
|
+
return 0.5 * (@upper + @lower)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Returns the expected value of variance for the calling instance.
|
72
|
+
#
|
73
|
+
# @return [Float] Variance of the distribution
|
74
|
+
def variance
|
75
|
+
return ((@upper - @lower)**2) / 12.0
|
76
|
+
end
|
77
|
+
|
78
|
+
# Compares two distribution instances and returns a boolean outcome
|
79
|
+
# Available publicly as #==
|
80
|
+
#
|
81
|
+
# @private
|
82
|
+
#
|
83
|
+
# @param other A distribution object (preferred)
|
84
|
+
# @return [Boolean] true if-and-only-if two instances are of the same
|
85
|
+
# class and have the same parameters.
|
86
|
+
def eql?(other)
|
87
|
+
return other.is_a?(self.class) &&
|
88
|
+
@lower == other.lower &&
|
89
|
+
@upper == other.upper
|
90
|
+
end
|
91
|
+
|
92
|
+
alias :== :eql?
|
93
|
+
alias :p_value :quantile
|
94
|
+
|
95
|
+
private :eql?
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
module Statistical
|
2
|
+
# Module to collect all abstractions of distributions
|
3
|
+
module Distribution
|
4
|
+
# This class abstracts the discrete uniform distribution over a given set
|
5
|
+
# of elements
|
6
|
+
#
|
7
|
+
# @author Vaibhav Yenamandra
|
8
|
+
# @attr_reader [Array, Numeric] support The support set of valid values a
|
9
|
+
# random variate from the distribution can take. Must have at least 1 value
|
10
|
+
class UniformDiscrete
|
11
|
+
attr_reader :count, :support, :lower, :upper
|
12
|
+
# Returns a model for the discrete uniform distribution on all elements
|
13
|
+
# present in the given set of elemets `elems`
|
14
|
+
#
|
15
|
+
# @note The constructor sorts the array of elements given to it, as this
|
16
|
+
# is a key assumption of the discrete uniform distribution. This set
|
17
|
+
# must also be homogenous
|
18
|
+
#
|
19
|
+
# @param [Array] elems The elements over which the distribution exists
|
20
|
+
# in [lower, upper]
|
21
|
+
# @raise [RangeError] if elems isn't one of Array, Range, Fixnum or
|
22
|
+
# Bignum
|
23
|
+
def initialize(elems)
|
24
|
+
case elems
|
25
|
+
when Fixnum, Bignum
|
26
|
+
@support = [elems]
|
27
|
+
when Array
|
28
|
+
@support = elems.sort
|
29
|
+
when Range
|
30
|
+
@support = elems.to_a
|
31
|
+
else
|
32
|
+
raise ArgumentError,
|
33
|
+
"Expected Fixnum, Bignum, Array or Range, found #{elems.class}"
|
34
|
+
end
|
35
|
+
@count = @support.length
|
36
|
+
@lower = @support[0]
|
37
|
+
@upper = @support[-1]
|
38
|
+
self
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns value of probability density function at a point on the real
|
42
|
+
# line
|
43
|
+
#
|
44
|
+
# @param [Fixnum, Bignum] k Point at which pdf is desired
|
45
|
+
# @return [Float] 0 if k doesn't belong to the elements over which the
|
46
|
+
# current instance is distributed. 1/n otherwise where n is number
|
47
|
+
# of elements
|
48
|
+
def pdf(k)
|
49
|
+
return 1.0 / @count if @support.include?(k)
|
50
|
+
return 0.0
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns value of cumulative density function at a point on the real line
|
54
|
+
# Uses a binary search on the support array internally.
|
55
|
+
#
|
56
|
+
# @note This suffers from some floating point comparison issues. Errors
|
57
|
+
# start appearing when dealing with precision > 1E-18
|
58
|
+
#
|
59
|
+
# @param [Fixnum, Bignum] k Point at which cdf value is desired
|
60
|
+
# @return [Float] 0 if k is on the left of the support,
|
61
|
+
# 1 if k on the right support and the
|
62
|
+
# evaluates CDF for any other legal value
|
63
|
+
def cdf(k)
|
64
|
+
return 0.0 if k < @lower
|
65
|
+
return 1.0 if k >= @upper
|
66
|
+
|
67
|
+
# Ruby has a Array#bsearch_index already but it supports find-min mode
|
68
|
+
# What we need is a find-max mode. This can be achieved by reversing
|
69
|
+
# and then searching, but reverse is O(N) so it defeats the purpose
|
70
|
+
low = 0
|
71
|
+
high = @count - 1
|
72
|
+
while low < high
|
73
|
+
mid = (low + high) / 2
|
74
|
+
if @support[mid] <= k
|
75
|
+
low = mid + 1
|
76
|
+
else
|
77
|
+
high = mid
|
78
|
+
end
|
79
|
+
end
|
80
|
+
# This should be true for all i > low
|
81
|
+
return low.fdiv(@count)
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns value of inverse CDF for a given probability.
|
85
|
+
#
|
86
|
+
# @see #p_value
|
87
|
+
#
|
88
|
+
# @param [Numeric] p a value within [0, 1]
|
89
|
+
# @return [Numeric] Returns inverse CDF for valid p
|
90
|
+
# @raise [RangeError] if p > 1 or p < 0
|
91
|
+
def quantile(p)
|
92
|
+
raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
|
93
|
+
return @lower if p.zero?
|
94
|
+
return @upper if (p - 1).zero?
|
95
|
+
return @support[(p * count).ceil - 1]
|
96
|
+
end
|
97
|
+
|
98
|
+
# Returns the mean value for the calling instance. Calculated mean, and
|
99
|
+
# not inferred from simulations
|
100
|
+
#
|
101
|
+
# @return [Float] Mean of the distribution
|
102
|
+
def mean
|
103
|
+
return @support.mean
|
104
|
+
end
|
105
|
+
|
106
|
+
# Returns the expected value of population variance for the calling
|
107
|
+
# instance.
|
108
|
+
#
|
109
|
+
# @return [Float] Variance of the distribution
|
110
|
+
def variance
|
111
|
+
return @support.variance
|
112
|
+
end
|
113
|
+
|
114
|
+
# Compares two distribution instances and returns a boolean
|
115
|
+
# Available publicly as #==
|
116
|
+
#
|
117
|
+
# @private
|
118
|
+
#
|
119
|
+
# @param other A distribution object (preferred)
|
120
|
+
# @return [Boolean] true if-and-only-if two instances are of the same
|
121
|
+
# class and have the same parameters.
|
122
|
+
def eql?(other)
|
123
|
+
return other.is_a?(self.class) &&
|
124
|
+
@support == other.support
|
125
|
+
end
|
126
|
+
|
127
|
+
alias :== :eql?
|
128
|
+
alias :p_value :quantile
|
129
|
+
|
130
|
+
private :eql?
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
require 'statistical/helpers'
|
2
|
+
|
3
|
+
module Statistical
|
4
|
+
module Distribution
|
5
|
+
# Say something useful about this class.
|
6
|
+
#
|
7
|
+
# @note Any caveats you want to talk about go here...
|
8
|
+
#
|
9
|
+
# @author Vaibhav Yenamandra
|
10
|
+
# @attr_reader [Float] scale The distribution's scale parameter
|
11
|
+
# @attr_reader [Float] shape The distribution's shape parameter
|
12
|
+
class Weibull
|
13
|
+
attr_reader :scale, :shape, :support
|
14
|
+
|
15
|
+
# Returns a new `Statistical::Distribution::Weibull` instance
|
16
|
+
#
|
17
|
+
# @param [Numeric] scale The distribution's scale parameter
|
18
|
+
# @param [Numeric] shape The distribution's shape parameter
|
19
|
+
# @return `Statistical::Distribution::Weibull` instance
|
20
|
+
def initialize(scale = 1, shape = 1)
|
21
|
+
@scale = scale.to_f
|
22
|
+
@shape = shape.to_f
|
23
|
+
@support = Domain[0.0, Float::INFINITY, :right_open]
|
24
|
+
self
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns value of probability density function at a point. Calculated
|
28
|
+
# using some technique that you might want to name
|
29
|
+
#
|
30
|
+
# @param [Numeric] x A real valued point
|
31
|
+
# @return
|
32
|
+
def pdf(x)
|
33
|
+
return [(@shape / @scale) * ((x / @scale)**(@shape - 1)) *
|
34
|
+
Math.exp(-((x / @scale)**@shape)),
|
35
|
+
0.0,
|
36
|
+
0.0
|
37
|
+
][@support <=> x]
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns value of cumulative density function at a point. Calculated
|
41
|
+
# using some technique that you might want to name
|
42
|
+
#
|
43
|
+
# @param [Numeric] x A real valued point
|
44
|
+
# @return
|
45
|
+
def cdf(x)
|
46
|
+
return [1 - Math.exp(-((x / @scale)**@shape)),
|
47
|
+
1.0,
|
48
|
+
0.0
|
49
|
+
][@support <=> x]
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns value of inverse CDF for a given probability
|
53
|
+
#
|
54
|
+
# @see #p_value
|
55
|
+
#
|
56
|
+
# @param [Numeric] p a value within [0, 1]
|
57
|
+
# @return Inverse CDF for valid p
|
58
|
+
# @raise [RangeError] if p > 1 or p < 0
|
59
|
+
def quantile(p)
|
60
|
+
raise RangeError, "`p` must be in [0, 1], found: #{p}" if p < 0 || p > 1
|
61
|
+
return @scale * ((-Math.log(1 - p))**(1 / @shape))
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns the expected value of mean for the calling instance.
|
65
|
+
#
|
66
|
+
# @return Mean of the distribution
|
67
|
+
def mean
|
68
|
+
return @scale * Math.gamma(1 + 1 / @shape)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Returns the expected value of variance for the calling instance.
|
72
|
+
#
|
73
|
+
# @return Variance of the distribution
|
74
|
+
def variance
|
75
|
+
m = mean
|
76
|
+
(@scale * @scale) * Math.gamma(1 + 2 / @shape) - (m * m)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Compares two distribution instances and returns a boolean outcome
|
80
|
+
# Available publicly as #==
|
81
|
+
#
|
82
|
+
# @private
|
83
|
+
#
|
84
|
+
# @param other A distribution object (preferred)
|
85
|
+
# @return [Boolean] true if-and-only-if two instances are of the same
|
86
|
+
# class and have the same parameters.
|
87
|
+
def eql?(other)
|
88
|
+
return other.is_a?(self.class) &&
|
89
|
+
other.shape == @shape &&
|
90
|
+
other.scale == @scale
|
91
|
+
end
|
92
|
+
|
93
|
+
alias :== :eql?
|
94
|
+
alias :p_value :quantile
|
95
|
+
|
96
|
+
private :eql?
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|