ml_ratiosolve 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +32 -0
- data/Rakefile +8 -0
- data/bin/ml_ratiosolve +18 -0
- data/lib/ml_ratiosolve/error_bootstrapping.rb +133 -0
- data/lib/ml_ratiosolve/ml_ratiosolve.rb +563 -0
- data/lib/ml_ratiosolve/version.rb +3 -0
- data/lib/ml_ratiosolve.rb +62 -0
- data/ml_ratiosolve.gemspec +26 -0
- data/spec/error_bootstrapping_spec.rb +51 -0
- data/spec/ml_ratiosolve_spec.rb +147 -0
- metadata +132 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c76acec151175240892ce4280bbdb360af79d268
|
4
|
+
data.tar.gz: 50e5f40213dec0644ad76c57672c031139f516a7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: df72bc79f9019b6fd6142da383d79492dbff2157fa0ff0db22677b7c4727886f2f1dfb31b15f8df12bde9d290b542266ebb8f48be64c9335f82f36ca615fe2d7
|
7
|
+
data.tar.gz: 23619b6eea6159c3535bf44f06065b7efad19e20c3b8e6a67e6ed393e8f55ead1f1ed627e7ec719e62aaabbc3cfe07a614d24d9406f3a7f2ed7b498fbd7c1801
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Colin J. Fuller
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# MlRatiosolve
|
2
|
+
|
3
|
+
A tool for parameter estimations for (not-quite-)ratio problems.
|
4
|
+
|
5
|
+
In scientific data, frequently there is uncontrolled experiment-to-experiment variability such that quantitiative comparison between experiments is only meaningful when data are expressed as a ratio to an internal control that is part of each experiment. However, even if the values of the control and other treatments would be Gaussian distributed, this ratio is not, making accurate estimates of means and errors difficult.
|
6
|
+
|
7
|
+
MLRatiosolve uses the fact that this problem contains more information than just the ratios to obtain better estimates of distribution parameters than is possible for a ratio problem.
|
8
|
+
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Dependencies:
|
13
|
+
|
14
|
+
MLRatioSolve requires the [NMatrix linear algebra library](https://github.com/SciRuby/nmatrix), which will be installed automatically. NMatrix, however, requires that you first install ATLAS. If you don't have this installed already, see the instructions at https://github.com/SciRuby/nmatrix/wiki/Installation
|
15
|
+
|
16
|
+
`$ gem install ml_ratiosolve`
|
17
|
+
|
18
|
+
Or, if you have ruby installed globally on your computer:
|
19
|
+
|
20
|
+
`$ sudo gem install ml_ratiosolve`
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
MLRatiosolve takes a CSV file of data as input. Run `ml_ratiosolve --help` from the command line for further information and additional options.
|
25
|
+
|
26
|
+
## Contributing
|
27
|
+
|
28
|
+
1. Fork it
|
29
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
30
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
31
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
32
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/bin/ml_ratiosolve
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'ml_ratiosolve'
|
4
|
+
|
5
|
+
require 'trollop'
|
6
|
+
|
7
|
+
opts = Trollop::options do
|
8
|
+
opt :file, "Input file, containing CSV-formatted data with columns corresponding to independent experiments and rows as treatments.", type: :string
|
9
|
+
opt :n_starts, "Number of distinct starting guesses to try for parameter estimation", type: :integer, default: 1000
|
10
|
+
opt :n_iter, "Maximum number of iterations to run per starting guess", type: :integer, default: 1500
|
11
|
+
opt :n_bootstrap, "Number of samples to calculate for parametric bootstrapping of the confidence intervals", type: :integer, default: 1000
|
12
|
+
opt :tol, "If the absolute change in log likelihood of an estimate is less than this, iteration will terminate.", type: :float, default: 1.0e-6
|
13
|
+
opt :ci_level, "Float in the range 0-1 specifiying the level of confidence interval to calculate. E.g. 0.95 will calculate a 95% confidence interval", type: :float, default: 0.95
|
14
|
+
opt :skip, "If some experiments are missing datapoints, specifiy which ones here. These should be specified as (0-indexed) treatment,experiment pairs, with a comma separating treatment and experiment and a colon separating successive pairs. E.g. to skip the first and second treatments in the first experiment: '0,0:1,0'", type: :string, default: ""
|
15
|
+
opt :norm_index, "Normalize so that the mean value of the specified treatment is set to 1.", type: :integer, default: 0
|
16
|
+
end
|
17
|
+
|
18
|
+
MLRatioSolveBin.go(opts)
|
@@ -0,0 +1,133 @@
|
|
1
|
+
#--
|
2
|
+
# error_bootstrapping.rb
|
3
|
+
# Copyright (c) 2013 Colin J. Fuller
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the Software), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
# SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
#
|
25
|
+
# Methods for using parametric bootstrapping to estimate confidence intervals
|
26
|
+
# for the ML ratio estimation.
|
27
|
+
#
|
28
|
+
# @author Colin J. Fuller
|
29
|
+
#
|
30
|
+
module ErrorBootstrapping
|
31
|
+
class << self
|
32
|
+
|
33
|
+
#
|
34
|
+
# Generate a random normal variate using the Box-Muller transform
|
35
|
+
# @param mu [Numeric] The desired mean
|
36
|
+
# @param s2 [Numeric] The desired variance
|
37
|
+
#
|
38
|
+
# @return [Float] A random variate from the normal distribution with
|
39
|
+
# supplied parameters
|
40
|
+
def randnorm(mu, s2)
|
41
|
+
ru0 = rand
|
42
|
+
ru1 = rand
|
43
|
+
([Math.sqrt(-2.0 * Math.log(ru0)) * Math.cos(2*Math::PI*ru1), Math.sqrt(-2.0 * Math.log(ru0)) * Math.sin(2*Math::PI*ru1)].sample)*Math.sqrt(s2) + mu
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
#
|
48
|
+
# Generate a set of simulated data consisting of random numbers drawn from
|
49
|
+
# the distributions with the supplied parameters
|
50
|
+
# @param parameters [Hash] A hash containing the mean, variance, and scale
|
51
|
+
# parameters formatted like the output from
|
52
|
+
# MLRatioSolve::do_iters_with_start
|
53
|
+
# @param x [NMatrix] The original experimental data
|
54
|
+
#
|
55
|
+
# @return [NMatrix] A matrix of simulated data with the same dimensions as
|
56
|
+
# x. Any skipped data points (as returned by MLRatioSolve::skip_indices)
|
57
|
+
# will be set to 0 here.
|
58
|
+
def gen_data(parameters, x)
|
59
|
+
mu = parameters[:mu]
|
60
|
+
sig2 = parameters[:sig2]
|
61
|
+
gamma = parameters[:gamma]
|
62
|
+
size_i = x.shape[0]
|
63
|
+
size_n = x.shape[1]
|
64
|
+
|
65
|
+
sim_data_out = N.zeros_like(x)
|
66
|
+
|
67
|
+
size_i.times do |i|
|
68
|
+
size_n.times do |n|
|
69
|
+
next if MLRatioSolve.skip_indices.include?([i,n])
|
70
|
+
sim_data_out[i,n] = randnorm(mu[i]/gamma[n], sig2[i]/gamma[n]**2)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
sim_data_out
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
#
|
79
|
+
# Re-estimate distribution parameters by generating simulated data a number
|
80
|
+
# of times and performing the iterative estimation in MLRatioSolve
|
81
|
+
# @param n_gen [Numeric] number of datasets to simulate
|
82
|
+
# @param parameters [Hash] A hash containing the parameters from the estimation run on the simulated data.
|
83
|
+
# @param x [NMatrix] The original experimental data
|
84
|
+
# @param n_iter [Numeric] max number of iterations per estimate
|
85
|
+
# @param tol=nil [Numeric] if non-nil, the iterations will terminate early
|
86
|
+
# if the absolute change in the likelihood between two successive
|
87
|
+
# iterations is less than this
|
88
|
+
#
|
89
|
+
# @return [Array] An array containing n_gen hashes, each of which is the
|
90
|
+
# result of the estimation run on one simulated dataset.
|
91
|
+
def estimate_with_gen_data(n_gen, parameters, x, n_iter, tol=nil)
|
92
|
+
result = Array.new(n_gen) { nil }
|
93
|
+
n_gen.times do |i|
|
94
|
+
xi = gen_data(parameters, x)
|
95
|
+
result[i] = MLRatioSolve.do_iters_with_start(n_iter, xi, parameters[:gamma], tol)
|
96
|
+
end
|
97
|
+
result
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
#
|
102
|
+
# Calculate a bootstrapped confidence interval from the output of
|
103
|
+
# #estimate_with_gen_data.
|
104
|
+
# @param results [Array] An array of hashes as output from
|
105
|
+
# #estimate_with_gen_data
|
106
|
+
# @param level [Float] A number between 0 and 1 that is the level of the
|
107
|
+
# confidence interval. For instance, a value of 0.95 will lead to
|
108
|
+
# calculation of the bounds on the central 95% of values.
|
109
|
+
#
|
110
|
+
# @return [Array] an array of two NMatrix objects, the lower bound on the
|
111
|
+
# CI and the upper bound on the CI
|
112
|
+
def bootstrap_ci(results, level)
|
113
|
+
means = results.map { |e| e[:mu].to_a }
|
114
|
+
means = N[*means]
|
115
|
+
size_i = means.shape[1]
|
116
|
+
size_sim = means.shape[0]
|
117
|
+
ci_lower = N.new([size_i], 0.0)
|
118
|
+
ci_upper = N.zeros_like(ci_lower)
|
119
|
+
size_i.times do |i|
|
120
|
+
means_i = means[0...size_sim, i].to_a
|
121
|
+
means_i.flatten!
|
122
|
+
means_i = means_i.select { |e| e.finite? }
|
123
|
+
means_i.sort!
|
124
|
+
lower_ci_index = ((1.0-level)/2.0 * means_i.length).ceil
|
125
|
+
upper_ci_index = ((1.0 - (1.0-level)/2.0) * means_i.length).floor
|
126
|
+
ci_lower[i] = means_i[lower_ci_index]
|
127
|
+
ci_upper[i] = means_i[upper_ci_index]
|
128
|
+
end
|
129
|
+
[ci_lower, ci_upper]
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,563 @@
|
|
1
|
+
#--
|
2
|
+
# ml_ratiosolve.rb
|
3
|
+
# Copyright (c) 2013 Colin J. Fuller
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the Software), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
# SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'csv'
|
25
|
+
|
26
|
+
require 'nmatrix'
|
27
|
+
#
|
28
|
+
# Functions for ML estimates of distribution parameters for ratios of Gaussian
|
29
|
+
# measurements.
|
30
|
+
#
|
31
|
+
# This module aims to deal with the case where one needs to average
|
32
|
+
# measurements that are made relative to an internal control, such that each
|
33
|
+
# measurement to be averaged is the ratio of two Gaussian-distributed
|
34
|
+
# variables, which is not itself Gaussian-distributed.
|
35
|
+
#
|
36
|
+
# Each independent experiment is assumed to consist of I treatments, and there
|
37
|
+
# are N total experiments. Each measurement x_{i,n} is assumed to be a Gaussian
|
38
|
+
# random variable with mean mu_i/gamma_n and variance s2_i/gamma_n. Each mu and
|
39
|
+
# s2 parameter depends only on the treatment and not the experiment number,
|
40
|
+
# whereas the scale factors gamma depend only on the experiment and not the
|
41
|
+
# treatment.
|
42
|
+
#
|
43
|
+
# This is a more general case of the ratio-to-internal-control problem, and the
|
44
|
+
# latter is simply the case where the gamma_n are each set to the value of the
|
45
|
+
# internal control for experiment n.
|
46
|
+
#
|
47
|
+
# @author Colin J. Fuller
|
48
|
+
#
|
49
|
+
module MLRatioSolve
|
50
|
+
class << self
|
51
|
+
|
52
|
+
#
|
53
|
+
# The normal probability distribution.
|
54
|
+
# @param x [Numeric] The point at which to calculate the probability density
|
55
|
+
# @param m [Numeric] The mean of the distribution
|
56
|
+
# @param s2 [Numeric] The variance of the distribution
|
57
|
+
#
|
58
|
+
# @return [Float] The probability density at the specified point
|
59
|
+
def normpdf(x, m, s2)
|
60
|
+
1.0/Math.sqrt(2*Math::PI*s2)*Math.exp(-(x-m)**2/(2.0*s2))
|
61
|
+
end
|
62
|
+
|
63
|
+
#
|
64
|
+
# Sets up the array of indices to skip from a string.
|
65
|
+
# @param skip_str [String] A string containing treatment, experiment index
|
66
|
+
# pairs. Pairs should have a comma internally and a colon between
|
67
|
+
# successive pairs. E.g. '1,2;0,1'
|
68
|
+
#
|
69
|
+
# @return [void]
|
70
|
+
def set_skip_indices(skip_str)
|
71
|
+
if skip_str.empty?
|
72
|
+
@skip_indices = []
|
73
|
+
else
|
74
|
+
skip_pairs = skip_str.split(":")
|
75
|
+
skip_pairs.map! { |e| e.split(',').map { |ee| ee.to_i } }
|
76
|
+
@skip_indices = skip_pairs
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
#
|
81
|
+
# Gets and returns the list of (treatment, experiment) pairs to skip due to
|
82
|
+
# missing data
|
83
|
+
#
|
84
|
+
# @return [Array] An array of two element arrays containing treatment index,
|
85
|
+
# experiment index ordered pairs
|
86
|
+
def skip_indices
|
87
|
+
@skip_indices
|
88
|
+
end
|
89
|
+
|
90
|
+
#
|
91
|
+
# The log likelihood function for the datapoints given a set of distribution
|
92
|
+
# parameters.
|
93
|
+
# @param gamma [NMatrix] The per-experiment scale factors (column vector
|
94
|
+
# with N elements)
|
95
|
+
# @param x [NMatrix] The data points, an IxN matrix.
|
96
|
+
# @param mu [NMatrix] The mean parameters for each treatment (column
|
97
|
+
# vector with I elements)
|
98
|
+
# @param sig2 [NMatrix] The variance parameters for each treatment (column
|
99
|
+
# vector with I elements)
|
100
|
+
#
|
101
|
+
# @return [Float] The log likelihood of the data given the parameters
|
102
|
+
def log_l_fct(gamma, x, mu, sig2)
|
103
|
+
ltot = 0
|
104
|
+
i = mu.size
|
105
|
+
n = gamma.size
|
106
|
+
i.times do |ii|
|
107
|
+
n.times do |nn|
|
108
|
+
next if skip_indices.include? [ii,nn]
|
109
|
+
ltot += Math.log(normpdf(x[ii,nn], mu[ii]/gamma[nn], sig2[ii]/gamma[nn]**2))
|
110
|
+
end
|
111
|
+
end
|
112
|
+
ltot
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
#
|
117
|
+
# Given the per-experiment scale factors, calculate the ML estimate of the
|
118
|
+
# mean parameters
|
119
|
+
# @param gamma [NMatrix] The per-experiment scale factors (column vector
|
120
|
+
# with N elements)
|
121
|
+
# @param x [NMatrix] The data points, an IxN matrix.
|
122
|
+
#
|
123
|
+
# @return [NMatrix] The ML estimates of the mean parameters (column vector
|
124
|
+
# with I elements)
|
125
|
+
def calculate_mu_i(gamma, x)
|
126
|
+
n = gamma.size
|
127
|
+
i = x.shape[0]
|
128
|
+
mu = NMatrix.zeros([i,1], stype: :dense)
|
129
|
+
if skip_indices.empty? then
|
130
|
+
mu = x.dot(gamma) / n
|
131
|
+
else
|
132
|
+
i.times do |ii|
|
133
|
+
count = 0
|
134
|
+
n.times do |nn|
|
135
|
+
next if skip_indices.include? [ii,nn]
|
136
|
+
mu[ii] += gamma[nn]*x[ii,nn]
|
137
|
+
count += 1
|
138
|
+
end
|
139
|
+
mu[ii] /= count
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
mu
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
#
|
148
|
+
# Given the per-experiment scale factors, calculate the ML estimate of the
|
149
|
+
# variance parameters
|
150
|
+
# @param gamma [NMatrix] The per-experiment scale factors (column vector
|
151
|
+
# with N elements)
|
152
|
+
# @param x [NMatrix] The data points, an IxN matrix.
|
153
|
+
# @param mu [NMatrix] The precalculated ML estimates of the mean
|
154
|
+
# parameters for the provided gamma (column vector with I elements)
|
155
|
+
#
|
156
|
+
# @return [NMatrix] The ML estimates of the variance parameters (column
|
157
|
+
# vector with I elements)
|
158
|
+
def calculate_sig2_i(gamma, x, mu)
|
159
|
+
n = gamma.size
|
160
|
+
i = mu.size
|
161
|
+
sig2 = NMatrix.zeros([i,1], stype: :dense)
|
162
|
+
|
163
|
+
i.times do |ii|
|
164
|
+
count = 0
|
165
|
+
n.times do |nn|
|
166
|
+
next if skip_indices.include? [ii,nn]
|
167
|
+
sig2[ii] += (gamma[nn]*x[ii,nn] - mu[ii])**2
|
168
|
+
count += 1
|
169
|
+
end
|
170
|
+
sig2[ii] /= count
|
171
|
+
if sig2[ii] < Float::EPSILON**2 then
|
172
|
+
sig2[ii] = Float::EPSILON**2
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
sig2
|
177
|
+
end
|
178
|
+
|
179
|
+
#
|
180
|
+
# Given the mean and variance parameters, calculate the ML estimate of a
|
181
|
+
# single experimental scale factor.
|
182
|
+
#
|
183
|
+
# @param nn [Integer] the index of the scale factor to calculate.
|
184
|
+
# @see #calculate_gamma_n
|
185
|
+
#
|
186
|
+
# @return [Numeric] The ML estimate for the scale factor.
|
187
|
+
#
|
188
|
+
def calculate_single_gamma(nn, x, mu, sig2)
|
189
|
+
xm_over_s2 = 0
|
190
|
+
x2_over_s2 = 0
|
191
|
+
i = x.shape[0]
|
192
|
+
i.times do |ii|
|
193
|
+
next if skip_indices.include? [ii,nn]
|
194
|
+
xm_over_s2 += x[ii,nn]*mu[ii]/sig2[ii]
|
195
|
+
x2_over_s2 += x[ii,nn]**2/sig2[ii]
|
196
|
+
end
|
197
|
+
(xm_over_s2 + Math.sqrt(xm_over_s2**2 + 4*i*x2_over_s2))/(2*x2_over_s2)
|
198
|
+
end
|
199
|
+
|
200
|
+
#
|
201
|
+
# Given the mean and variance parameters, calculate the ML estimate of the
|
202
|
+
# experimental scale factors
|
203
|
+
# @param x [NMatrix] The data points, an IxN matrix.
|
204
|
+
# @param mu [NMatrix] The mean parameters (column vector with I elements)
|
205
|
+
# @param sig2 [NMatrix] The variance parameters (column vector with I
|
206
|
+
# elements)
|
207
|
+
#
|
208
|
+
# @return [NMatrix] The ML estimates of the per-expeirment scale factors (
|
209
|
+
# column vector with N elements)
|
210
|
+
def calculate_gamma_n(x, mu, sig2)
|
211
|
+
n = x.shape[1]
|
212
|
+
i = mu.size
|
213
|
+
gamma = NMatrix.zeros([n,1], stype: :dense)
|
214
|
+
0.upto(n-1) do |nn|
|
215
|
+
gamma[nn] = calculate_single_gamma(nn, x, mu, sig2)
|
216
|
+
end
|
217
|
+
gamma = gamma/gamma[0] #effectively set the units to be those of the first experiment
|
218
|
+
gamma
|
219
|
+
end
|
220
|
+
|
221
|
+
|
222
|
+
#
|
223
|
+
# Nicely prints the supplied distribution parameters
|
224
|
+
# @param mu [NMatrix] the mean parameters (column vector with I elements)
|
225
|
+
# @param sig2 [NMatrix] the variance parameters (column vector with I
|
226
|
+
# elements)
|
227
|
+
# @param gamma [NMatrix] the per-experiment scale factors (column vector
|
228
|
+
# with N elements)
|
229
|
+
# @param x [NMatrix] the experimental data (IxN matrix)
|
230
|
+
# @param iternum [NMatrix] the index of the current iteration
|
231
|
+
#
|
232
|
+
# @return [void]
|
233
|
+
def print_parameters(mu, sig2, gamma, x, iternum)
|
234
|
+
puts "="*10
|
235
|
+
puts "At iteration number #{iternum}:"
|
236
|
+
puts "mu = #{mu.to_s}"
|
237
|
+
puts "sig = #{sig2.map{ |e| Math.sqrt e }.to_s}"
|
238
|
+
puts "gamma = #{gamma.to_s}"
|
239
|
+
puts "Log likelihood = #{log_l_fct(gamma, x, mu, sig2)}"
|
240
|
+
nil
|
241
|
+
end
|
242
|
+
|
243
|
+
|
244
|
+
#
|
245
|
+
# Does one iteration of estimation, taking a starting gamma, calculating mu
|
246
|
+
# and s2, and then recalculating gamma from these
|
247
|
+
# @param gamma_in [NMatrix] the per-experiment scale factors (column
|
248
|
+
# vector with N elements)
|
249
|
+
# @param x [NMatrix] the experimental data (IxN matrix)
|
250
|
+
# @param iternum [NMatrix] the index of the current iteration
|
251
|
+
#
|
252
|
+
# @return [NMatrix] the new estimates of the per-experiment scale factors (
|
253
|
+
# column vector with N elements)
|
254
|
+
def do_single_iteration(gamma_in, x, iternum)
|
255
|
+
mu = calculate_mu_i(gamma_in, x)
|
256
|
+
sig2 = calculate_sig2_i(gamma_in,x,mu)
|
257
|
+
gamma_out = calculate_gamma_n(x, mu, sig2)
|
258
|
+
#print_parameters(mu, sig2, gamma_out, x, iternum)
|
259
|
+
gamma_out
|
260
|
+
end
|
261
|
+
|
262
|
+
#
|
263
|
+
# Do a series of iterations of estimation from a supplied starting estimate.
|
264
|
+
# @param n_iter [Numeric] The maximum number of iterations to perform.
|
265
|
+
# @param x [NMatrix] the experimental data (IxN matrix)
|
266
|
+
# @param gamma_start [NMatrix] the initial guess for the gamma
|
267
|
+
# experimental scale factors (column vector with N elements)
|
268
|
+
# @param tol=nil [Numeric] if non-nil, the iterations will terminate early
|
269
|
+
# if the absolute change in the likelihood between two successive
|
270
|
+
# iterations is less than this
|
271
|
+
#
|
272
|
+
# @return [Hash] the results of the iteration, containing keys for mu, sig2,
|
273
|
+
# gamma, and l, which are the mean, variance, scale factors, and
|
274
|
+
# likelihood, respectively, after iteration completed
|
275
|
+
def do_iters_with_start(n_iter, x, gamma_start, tol=nil, debug=false)
|
276
|
+
gamma = gamma_start
|
277
|
+
last_L = -1.0*Float::MAX
|
278
|
+
n_iter.times do |it|
|
279
|
+
gamma_candidate = do_single_iteration(gamma, x, it)
|
280
|
+
m = calculate_mu_i(gamma_candidate,x)
|
281
|
+
l = log_l_fct(gamma_candidate, x, m, calculate_sig2_i(gamma_candidate, x, m))
|
282
|
+
if (tol and (l-last_L).abs < tol) or gamma_candidate.to_a.flatten.any?{ |e| not e.finite? } then
|
283
|
+
break
|
284
|
+
end
|
285
|
+
gamma = gamma_candidate
|
286
|
+
last_L = l
|
287
|
+
end
|
288
|
+
{mu: calculate_mu_i(gamma, x), sig2: calculate_sig2_i(gamma, x, calculate_mu_i(gamma, x)),
|
289
|
+
gamma: gamma, l: log_l_fct(gamma, x, calculate_mu_i(gamma,x), calculate_sig2_i(gamma, x, calculate_mu_i(gamma,x)))}
|
290
|
+
end
|
291
|
+
|
292
|
+
#
|
293
|
+
# Estimates the mean given that one treatment is fixed at ~ zero variance.
|
294
|
+
#
|
295
|
+
# @param x [NMatrix] the experimental data (IxN matrix)
|
296
|
+
# @param i_zero [Integer] the index of the zero variance treatment
|
297
|
+
# @param mi [Numeric] the mean for the zero variance treatment
|
298
|
+
# @param perm [Array] a permutation array mapping the permuted data
|
299
|
+
# indices to their original indices (non-lapack format, see
|
300
|
+
# #invert_permutation_matrix)
|
301
|
+
#
|
302
|
+
# @return [NMatrix] the estimate of the means (Ix1)
|
303
|
+
#
|
304
|
+
def m_est_zerovar(x,i_zero,mi,perm)
|
305
|
+
n = x.shape[1]
|
306
|
+
i = x.shape[0]
|
307
|
+
m = N.zeros([i,1], dtype: x.dtype, stype: :dense)
|
308
|
+
i.times do |ii|
|
309
|
+
count = 0
|
310
|
+
n.times do |nn|
|
311
|
+
next if skip_indices.include? [ii,perm[nn]] or skip_indices.include? [i_zero, perm[nn]]
|
312
|
+
m[ii] += mi*x[ii,nn]/x[i_zero,nn]
|
313
|
+
count += 1
|
314
|
+
end
|
315
|
+
m[ii] /= count
|
316
|
+
end
|
317
|
+
m
|
318
|
+
end
|
319
|
+
|
320
|
+
#
|
321
|
+
# Estimates the variances given that one treatment is fixed at ~ zero
|
322
|
+
# variance.
|
323
|
+
#
|
324
|
+
# @param x [NMatrix] the experimental data (IxN matrix)
|
325
|
+
# @param i_zero [Integer] the index of the zero variance treatment
|
326
|
+
# @param m [NMatrix] the mean estimates (as output by #m_est_zerovar)
|
327
|
+
# @param perm [Array] a permutation array mapping the permuted data indices
|
328
|
+
# to their original indices (non-lapack format, see
|
329
|
+
# #invert_permutation_matrix)
|
330
|
+
#
|
331
|
+
# @return [NMatrix] the estimate of the variances (Ix1)
|
332
|
+
#
|
333
|
+
def s2_est_zerovar(x, i_zero, m, perm)
|
334
|
+
n = x.shape[1]
|
335
|
+
i = x.shape[0]
|
336
|
+
s2_est = N.zeros([i,1], dtype: x.dtype, stype: :dense)
|
337
|
+
i.times do |ii|
|
338
|
+
count = 0
|
339
|
+
n.times do |nn|
|
340
|
+
next if skip_indices.include? [ii,perm[nn]] or skip_indices.include? [i_zero, perm[nn]]
|
341
|
+
s2_est[ii] += (x[ii,nn]*m[i_zero]/x[i_zero,nn]-m[ii])**2
|
342
|
+
count += 1
|
343
|
+
end
|
344
|
+
s2_est[ii]/= count
|
345
|
+
end
|
346
|
+
s2_est
|
347
|
+
end
|
348
|
+
|
349
|
+
#
|
350
|
+
# Inverts a permutation so that it maps permuted data back to the original
|
351
|
+
# data.
|
352
|
+
#
|
353
|
+
# @param perm [Array] the permutation to invert. (Not lapack format but
|
354
|
+
# where perm[i] = j indicates that in the final permuted matrix column j is
|
355
|
+
# the original column i.
|
356
|
+
#
|
357
|
+
# @return [Array] the inverted permutation.
|
358
|
+
#
|
359
|
+
def invert_permutation_matrix(perm)
|
360
|
+
inv_perm = Array.new(perm.size, 0)
|
361
|
+
perm.each_with_index do |e, i|
|
362
|
+
inv_perm[e] = i
|
363
|
+
end
|
364
|
+
inv_perm
|
365
|
+
end
|
366
|
+
|
367
|
+
#
|
368
|
+
# Permute the rows of a matrix (using a transpose and permute_columns
|
369
|
+
# method).
|
370
|
+
#
|
371
|
+
# @param m [NMatrix] the matrix to permute
|
372
|
+
# @param perm [Array] the permutation (in lapack format)
|
373
|
+
#
|
374
|
+
# @return [NMatrix] the permuted data matrix
|
375
|
+
#
|
376
|
+
def permute_rows(m, perm)
|
377
|
+
m.transpose.permute_columns(perm).transpose
|
378
|
+
end
|
379
|
+
|
380
|
+
#
|
381
|
+
# Find a permutation of experiments such that the ith treatment is not skipped in the
|
382
|
+
# first experiment after the permutation.
|
383
|
+
#
|
384
|
+
# @param x [NMatrix] the experimental data
|
385
|
+
# @param i [Integer] the index of the treatment to make sure is not skipped
|
386
|
+
# in the first experiment
|
387
|
+
#
|
388
|
+
# @return [Array] an array containing two elements: a permuation array in
|
389
|
+
# each of lapack and non-lapack formats. (See #invert_permutation_matrix
|
390
|
+
# for non-lapack format.)
|
391
|
+
#
|
392
|
+
def find_permutation_nonskip(x, i)
|
393
|
+
n = x.shape[1]
|
394
|
+
lapack_perm = Array.new(n) { |nn| nn }
|
395
|
+
full_perm = Array.new(n) { |nn| nn }
|
396
|
+
first = 0
|
397
|
+
n.times do |nn|
|
398
|
+
first = nn
|
399
|
+
break unless skip_indices.include? [i,nn]
|
400
|
+
end
|
401
|
+
lapack_perm[0] = first
|
402
|
+
full_perm[0] = first
|
403
|
+
full_perm[first] = 0
|
404
|
+
[lapack_perm, full_perm]
|
405
|
+
end
|
406
|
+
|
407
|
+
#
|
408
|
+
# Test a single low variance solution.
|
409
|
+
#
|
410
|
+
# @param ii [Integer] the index of the low variance treatment.
|
411
|
+
# @see #test_all_low_variance_solutions
|
412
|
+
#
|
413
|
+
def test_single_low_variance_solution(ii, n_iter, x, tol=nil)
|
414
|
+
lapack_perm, full_perm = find_permutation_nonskip(x, ii)
|
415
|
+
inv_perm = invert_permutation_matrix(full_perm)
|
416
|
+
x_old = x
|
417
|
+
x = x.permute_columns(lapack_perm)
|
418
|
+
|
419
|
+
m_est = m_est_zerovar(x, ii, x[ii,0], inv_perm)
|
420
|
+
s2_est = s2_est_zerovar(x, ii, m_est, inv_perm)
|
421
|
+
s2_est[ii] = 1.0e-16
|
422
|
+
|
423
|
+
x = x_old
|
424
|
+
|
425
|
+
gamma_start = calculate_gamma_n(x, m_est, s2_est)
|
426
|
+
do_iters_with_start(n_iter, x, gamma_start, tol)
|
427
|
+
end
|
428
|
+
|
429
|
+
|
430
|
+
#
|
431
|
+
# Test all solutions where the variance of one treatment is ~ zero.
|
432
|
+
#
|
433
|
+
# @param n_iter [Integer] the number of iterations to run starting from the
|
434
|
+
# low variance solution.
|
435
|
+
# @param x [NMatrix] the experimental data (IxN)
|
436
|
+
# @param tol [Numeric] if non-nil, the iterations will terminate early
|
437
|
+
# if the absolute change in the likelihood between two successive
|
438
|
+
# iterations is less than this
|
439
|
+
#
|
440
|
+
# @return [Hash] the maximum likelihood solution from those tested.
|
441
|
+
# (see #do_iters_with_start for format)
|
442
|
+
#
|
443
|
+
def test_all_low_variance_solutions(n_iter, x, tol=nil)
|
444
|
+
n = x.shape[1]
|
445
|
+
i = x.shape[0]
|
446
|
+
best = {l: -1.0*Float::MAX}
|
447
|
+
i.times do |ii|
|
448
|
+
result = test_single_low_variance_solution(ii, n_iter, x, tol)
|
449
|
+
if result[:l] > best[:l] then
|
450
|
+
best = result
|
451
|
+
end
|
452
|
+
end
|
453
|
+
best
|
454
|
+
end
|
455
|
+
|
456
|
+
#
|
457
|
+
# Successively yields points on an n-dimensional grid.
|
458
|
+
# @param max_depth [Integer] The number of dimensions.
|
459
|
+
# @param n_per [Integer] The number of points on the grid in each dimension.
|
460
|
+
# @param min [Float] The minimum value of gridded points.
|
461
|
+
# @param max [Float] The maximum value of gridded points.
|
462
|
+
# @param curr [Array] The current points in progress of being built.
|
463
|
+
# This should be an array of max_depth elements. Contents will be
|
464
|
+
# overwritten.
|
465
|
+
# @param i [Integer] The current dimension. Supply 0 initially.
|
466
|
+
#
|
467
|
+
# @return [void]
|
468
|
+
def grid_recursive(max_depth, n_per, min, max, curr, i, &b)
|
469
|
+
if i == max_depth then
|
470
|
+
yield curr
|
471
|
+
return
|
472
|
+
end
|
473
|
+
step = (max-min)/(n_per-1)
|
474
|
+
min.step(max, step) do |val|
|
475
|
+
curr[i] = val
|
476
|
+
grid_recursive(max_depth, n_per, min, max, curr, i+1, &b)
|
477
|
+
end
|
478
|
+
nil
|
479
|
+
end
|
480
|
+
|
481
|
+
#
|
482
|
+
# Runs iterations of estimation from starting guesses arrayed on a grid.
|
483
|
+
# @param n_starts [Numeric] Total number of guesses to generate
|
484
|
+
# @param n_gen [Integer] Number of dimensions
|
485
|
+
# @param n_iter [Integer] Maximum number of iterations to run per guess
|
486
|
+
# @param x [NMatrix] The datapoints (IxN matrix)
|
487
|
+
# @param tol=nil [Numeric] if non-nil, the iterations will terminate early
|
488
|
+
# if the absolute change in the likelihood between two successive
|
489
|
+
# iterations is less than this
|
490
|
+
#
|
491
|
+
# @return [Hash] The result with the maximum likelihood among those found (
|
492
|
+
# see #do_iters_with_start for format)
|
493
|
+
def grid_multiple_iters(n_starts, n_gen, n_iter, x, tol=nil)
|
494
|
+
n_per = (n_starts**(1.0/n_gen)).floor
|
495
|
+
min_range = 0.01
|
496
|
+
max_range = 5.0
|
497
|
+
|
498
|
+
best = {l: -1.0*Float::MAX}
|
499
|
+
counter = 0
|
500
|
+
|
501
|
+
grid_recursive(n_gen, n_per, min_range, max_range, Array.new(n_gen, 0.0), 0) do |gamma|
|
502
|
+
result = do_iters_with_start(n_iter, x, N.new([gamma.size + 1,1],([1.0].concat(gamma))), tol)
|
503
|
+
if result[:l] > best[:l] then
|
504
|
+
best = result
|
505
|
+
end
|
506
|
+
counter += 1
|
507
|
+
if best[:mu] then
|
508
|
+
puts "Best solution found so far:"
|
509
|
+
print_parameters(best[:mu], best[:sig2], best[:gamma], x, counter)
|
510
|
+
end
|
511
|
+
end
|
512
|
+
best_lv = test_all_low_variance_solutions(n_iter, x, tol)
|
513
|
+
if best_lv[:l] > best[:l] then
|
514
|
+
best = best_lv
|
515
|
+
end
|
516
|
+
best
|
517
|
+
end
|
518
|
+
|
519
|
+
|
520
|
+
#
|
521
|
+
# Estimates the SEM for each treatment after normalization.
|
522
|
+
# @param result [Hash] The results hash as output by #do_iters_with_start
|
523
|
+
# @param norm_index [Integer] the index of the treatment to which to normalize
|
524
|
+
#
|
525
|
+
# @return [NMatrix] The SEM estimates for each treatment (vector with I components)
|
526
|
+
def ml_sem_estimate(result, norm_index)
|
527
|
+
ni_skip_count = 0
|
528
|
+
n = result[:gamma].size
|
529
|
+
skip_indices.each do |si|
|
530
|
+
if si[0] == norm_index then
|
531
|
+
ni_skip_count += 1
|
532
|
+
end
|
533
|
+
end
|
534
|
+
prop_errors = result[:sig2].map.with_index do |e,i|
|
535
|
+
skip_count = 0
|
536
|
+
skip_indices.each do |si|
|
537
|
+
if si[0] == i then
|
538
|
+
skip_count += 1
|
539
|
+
end
|
540
|
+
end
|
541
|
+
sem2_i = e/(n-skip_count-1)
|
542
|
+
sem2_norm = result[:sig2][norm_index]/(n-ni_skip_count-1)
|
543
|
+
sem2_i*1.0/result[:mu][norm_index]**2 + sem2_norm*(result[:mu][i]/result[:mu][norm_index]**2)**2
|
544
|
+
end
|
545
|
+
prop_errors.map { |e| Math.sqrt(e) }
|
546
|
+
end
|
547
|
+
|
548
|
+
|
549
|
+
#
|
550
|
+
# Reads the datapoints from a csv file. Data should be arranged with
|
551
|
+
# columns as independent experiments and rows as treatments.
|
552
|
+
# @param fn [String] The filename from which to read the data.
|
553
|
+
#
|
554
|
+
# @return [NMatrix] an IxN nmatrix containing the experimental data
|
555
|
+
def read_data_from_file(fn)
|
556
|
+
N[*(CSV.read(fn).map{ |e| e.map{ |i| i.to_f } })]
|
557
|
+
end
|
558
|
+
end
|
559
|
+
end
|
560
|
+
|
561
|
+
|
562
|
+
|
563
|
+
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#--
|
2
|
+
# ml_ratiosolve.rb
|
3
|
+
# Copyright (c) 2013 Colin J. Fuller
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the Software), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
# SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require "ml_ratiosolve/version"
|
25
|
+
require 'ml_ratiosolve/ml_ratiosolve'
|
26
|
+
require 'ml_ratiosolve/error_bootstrapping'
|
27
|
+
|
28
|
+
module MLRatioSolveBin
|
29
|
+
|
30
|
+
def self.go(opts)
|
31
|
+
n_starts = opts[:n_starts]
|
32
|
+
n_iter = opts[:n_iter]
|
33
|
+
n_for_bootstrap = opts[:n_bootstrap]
|
34
|
+
tol = opts[:tol]
|
35
|
+
ci_level = opts[:ci_level]
|
36
|
+
norm_index = opts[:norm_index]
|
37
|
+
|
38
|
+
MLRatioSolve.set_skip_indices(opts[:skip])
|
39
|
+
|
40
|
+
x = MLRatioSolve.read_data_from_file(opts[:file])
|
41
|
+
|
42
|
+
n_gammas_to_fit = x.shape[1] - 1
|
43
|
+
|
44
|
+
best = MLRatioSolve.grid_multiple_iters(n_starts, n_gammas_to_fit, n_iter, x, tol)
|
45
|
+
|
46
|
+
puts "Best solution found: "
|
47
|
+
puts "mu: #{best[:mu]/best[:mu][norm_index]}"
|
48
|
+
puts "sig: #{best[:sig2].map{ |e| Math.sqrt(e)/best[:mu][norm_index] }.to_s}"
|
49
|
+
puts "gamma: #{best[:gamma]}"
|
50
|
+
puts "log l: #{best[:l]}"
|
51
|
+
|
52
|
+
puts "Error estimate:"
|
53
|
+
puts MLRatioSolve.ml_sem_estimate(best, norm_index)
|
54
|
+
|
55
|
+
#sim_results = ErrorBootstrapping.estimate_with_gen_data(n_for_bootstrap, best, x, n_iter, tol)
|
56
|
+
#ci_lower, ci_upper = ErrorBootstrapping.bootstrap_ci(sim_results, ci_level)
|
57
|
+
# puts "boostrapped #{ci_level*100}% confidence interval: "
|
58
|
+
# puts ci_lower.to_a.join(", ")
|
59
|
+
# puts ci_upper.to_a.join(", ")
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'ml_ratiosolve/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "ml_ratiosolve"
|
8
|
+
spec.version = MlRatiosolve::VERSION
|
9
|
+
spec.authors = ["Colin J. Fuller"]
|
10
|
+
spec.email = ["cjfuller@gmail.com"]
|
11
|
+
spec.description = %q{Methods for using maximum likelihood calculations to estimate parmeters of ratios of gaussian variates}
|
12
|
+
spec.summary = %q{Methods for using maximum likelihood calculations to estimate parmeters of ratios of gaussian variates}
|
13
|
+
spec.homepage = "https://github.com/cjfuller/ml_ratiosolve"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "trollop", "~> 2.0"
|
22
|
+
spec.add_dependency "nmatrix", "~> 0.1"
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
24
|
+
spec.add_development_dependency "rake"
|
25
|
+
spec.add_development_dependency "rspec"
|
26
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#--
|
2
|
+
# error_bootstrapping_spec.rb
|
3
|
+
# Copyright (c) 2013 Colin J. Fuller
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the Software), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
# SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'ml_ratiosolve'
|
25
|
+
|
26
|
+
describe ErrorBootstrapping do
|
27
|
+
|
28
|
+
it "should generate random variables with the correct mean" do
|
29
|
+
ErrorBootstrapping.randnorm(10, 0).should eq 10.0
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should generate random variables with reasonable statitstics" do
|
33
|
+
vars = Array.new(100) { ErrorBootstrapping.randnorm(0, 1) }
|
34
|
+
N[*vars].mean.to_f.abs.should < 0.4
|
35
|
+
(N[*vars].std.to_f - 1).abs.should < 0.4
|
36
|
+
end
|
37
|
+
|
38
|
+
context "data generation" do
|
39
|
+
it "should generate a data matrix of the correct shape" do
|
40
|
+
MLRatioSolve.set_skip_indices ""
|
41
|
+
gamma = N.new([2,1],[1.0, 0.5])
|
42
|
+
mu = N.new([2,1], [0.0, 2.0])
|
43
|
+
sig2 = N.new([2,1], [1.0, 16.0])
|
44
|
+
x = N.new([2,2], [0.0, 0.0, 2.0, 2.0])
|
45
|
+
|
46
|
+
x_gen = ErrorBootstrapping.gen_data({mu: mu, sig2: sig2, gamma: gamma}, x)
|
47
|
+
x_gen.shape.should eq [2,2]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
@@ -0,0 +1,147 @@
|
|
1
|
+
#--
|
2
|
+
# ml_ratiosolve_spec.rb
|
3
|
+
# Copyright (c) 2013 Colin J. Fuller
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the Software), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
# SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'ml_ratiosolve'
|
25
|
+
|
26
|
+
describe MLRatioSolve do
|
27
|
+
|
28
|
+
before :each do
|
29
|
+
MLRatioSolve.set_skip_indices ""
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should calculate a normal PDF" do
|
33
|
+
MLRatioSolve.normpdf(0, 0, 1).should be_within(1.0e-6).of 0.3989423
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should set and retrieve skip indices" do
|
37
|
+
MLRatioSolve.set_skip_indices("0,1:0,2")
|
38
|
+
MLRatioSolve.skip_indices.should eq [[0,1],[0,2]]
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should calculate the log likelihood" do
|
42
|
+
gamma = N.new([2,1],[1.0, 0.5])
|
43
|
+
mu = N.new([2,1], [0.0, 2.0])
|
44
|
+
sig2 = N.new([2,1], [1.0, 16.0])
|
45
|
+
x = N.new([2,2], [0.0, 0.0, 2.0, 2.0])
|
46
|
+
expected = N[Math.log(0.199471140200716),
|
47
|
+
Math.log(0.398942280401433),
|
48
|
+
Math.log(0.0997355701003582),
|
49
|
+
Math.log(0.0483335146003562)].sum.to_f
|
50
|
+
MLRatioSolve.log_l_fct(gamma, x, mu, sig2).should be_within(1.0e-6).of expected
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should calculate the ML mean estimate with uniform gamma" do
|
55
|
+
x = N.new([1,3], [0,3,6], dtype: :float64)
|
56
|
+
g = N.new([3,1], [1,1,1], dtype: :float64)
|
57
|
+
result = MLRatioSolve.calculate_mu_i(g,x)
|
58
|
+
result.shape.should eq [1,1]
|
59
|
+
result[0].should eq 3.0
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should calculate the ML mean estimate with nonuniform gamma" do
|
63
|
+
x = N.new([1,2], [1,2], dtype: :float64)
|
64
|
+
g = N.new([2,1], [4,1], dtype: :float64)
|
65
|
+
result = MLRatioSolve.calculate_mu_i(g,x)
|
66
|
+
result.shape.should eq [1,1]
|
67
|
+
result[0].should eq 3.0
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should calculate the ML variance estimate" do
|
71
|
+
x = N.new([1,3], [0,3,6], dtype: :float64)
|
72
|
+
g = N.new([3,1], [1,2,1], dtype: :float64)
|
73
|
+
result = MLRatioSolve.calculate_sig2_i(g,x, MLRatioSolve.calculate_mu_i(g,x))
|
74
|
+
result.shape.should eq [1,1]
|
75
|
+
result[0].should eq N[0,6,6].variance.to_f*(2.0/3.0) #final fraction is to correct sample vs. population
|
76
|
+
end
|
77
|
+
|
78
|
+
it "should calculate a single gamma for fixed mean and standard deviation parameters" do
|
79
|
+
mu = N.new([2,1], [0.0, 2.0])
|
80
|
+
sig2 = N.new([2,1], [1.0, 16.0])
|
81
|
+
x = N.new([2,2], [0.0, 0.0, 2.0, 2.0])
|
82
|
+
MLRatioSolve.calculate_single_gamma(0, x, mu, sig2).should be_within(1.0e-6).of 3.3722813
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should correctly normalize the set of gammas" do
|
86
|
+
mu = N.new([2,1], [0.0, 2.0])
|
87
|
+
sig2 = N.new([2,1], [1.0, 16.0])
|
88
|
+
x = N.new([2,2], [0.0, 0.0, 2.0, 2.0])
|
89
|
+
result = MLRatioSolve.calculate_gamma_n(x, mu, sig2)
|
90
|
+
g0 = MLRatioSolve.calculate_single_gamma(0,x,mu,sig2)
|
91
|
+
g1 = MLRatioSolve.calculate_single_gamma(1,x,mu,sig2)
|
92
|
+
|
93
|
+
result.should eq N[[g0],[g1]]/g0
|
94
|
+
end
|
95
|
+
|
96
|
+
it "should perform a single iteration" do
|
97
|
+
gamma = N.new([3,1], [1,2,1], dtype: :float64)
|
98
|
+
x = N.new([2,3], [0,3,6,1,4,7], dtype: :float64)
|
99
|
+
m = MLRatioSolve.calculate_mu_i(gamma,x)
|
100
|
+
s2= MLRatioSolve.calculate_sig2_i(gamma, x, m)
|
101
|
+
gamma_new = MLRatioSolve.calculate_gamma_n(x, m, s2)
|
102
|
+
MLRatioSolve.do_single_iteration(gamma, x, 0).should eq gamma_new
|
103
|
+
end
|
104
|
+
|
105
|
+
context "grid search" do
|
106
|
+
it "should grid the correct number of iterations" do
|
107
|
+
count = 0
|
108
|
+
MLRatioSolve.grid_recursive(4, 5, 0.0, 1.0, [0.0, 0.0, 0.0, 0.0], 0) do
|
109
|
+
count += 1
|
110
|
+
end
|
111
|
+
count.should eq 5**4
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
context "low variance solution search" do
|
117
|
+
|
118
|
+
it "should calculate the starting means for the low variance case" do
|
119
|
+
x = N.new([2,2], [5.0, 1.0, 2.0, 3.0])
|
120
|
+
me = MLRatioSolve.m_est_zerovar(x, 0, x[0,0], [0,1])
|
121
|
+
me.should eq N.new([2,1], [5.0, 8.5])
|
122
|
+
end
|
123
|
+
|
124
|
+
it "should calculate the starting variances for the low variance case" do
|
125
|
+
x = N.new([2,2], [5.0, 1.0, 2.0, 3.0])
|
126
|
+
se = MLRatioSolve.s2_est_zerovar(x, 0, MLRatioSolve.m_est_zerovar(x, 0, x[0,0], [0,1]), [0,1])
|
127
|
+
se.should eq N.new([2,1], [0.0, 42.25])
|
128
|
+
end
|
129
|
+
|
130
|
+
it "should find a permutation such that the 0th entry of a given low variance treatment is not skipped" do
|
131
|
+
x = N.new([2,2], [0,1,2,3])
|
132
|
+
MLRatioSolve.set_skip_indices "0,0"
|
133
|
+
MLRatioSolve.find_permutation_nonskip(x, 0).should eq [[1,1], [1,0]]
|
134
|
+
end
|
135
|
+
|
136
|
+
it "should be able to invert a permutation matrix" do
|
137
|
+
MLRatioSolve.invert_permutation_matrix([0,2,3,1]).should eq [0,3,1,2]
|
138
|
+
end
|
139
|
+
|
140
|
+
it "should be able to permute rows of a matrix" do
|
141
|
+
m = N.new([2,2], [0,1,2,3])
|
142
|
+
MLRatioSolve.permute_rows(m, [1,1]).should eq N.new([2,2], [2,3,0,1])
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
metadata
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ml_ratiosolve
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Colin J. Fuller
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-11-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: trollop
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nmatrix
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.1'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.1'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: Methods for using maximum likelihood calculations to estimate parmeters
|
84
|
+
of ratios of gaussian variates
|
85
|
+
email:
|
86
|
+
- cjfuller@gmail.com
|
87
|
+
executables:
|
88
|
+
- ml_ratiosolve
|
89
|
+
extensions: []
|
90
|
+
extra_rdoc_files: []
|
91
|
+
files:
|
92
|
+
- .gitignore
|
93
|
+
- Gemfile
|
94
|
+
- LICENSE.txt
|
95
|
+
- README.md
|
96
|
+
- Rakefile
|
97
|
+
- bin/ml_ratiosolve
|
98
|
+
- lib/ml_ratiosolve.rb
|
99
|
+
- lib/ml_ratiosolve/error_bootstrapping.rb
|
100
|
+
- lib/ml_ratiosolve/ml_ratiosolve.rb
|
101
|
+
- lib/ml_ratiosolve/version.rb
|
102
|
+
- ml_ratiosolve.gemspec
|
103
|
+
- spec/error_bootstrapping_spec.rb
|
104
|
+
- spec/ml_ratiosolve_spec.rb
|
105
|
+
homepage: https://github.com/cjfuller/ml_ratiosolve
|
106
|
+
licenses:
|
107
|
+
- MIT
|
108
|
+
metadata: {}
|
109
|
+
post_install_message:
|
110
|
+
rdoc_options: []
|
111
|
+
require_paths:
|
112
|
+
- lib
|
113
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - '>='
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0'
|
123
|
+
requirements: []
|
124
|
+
rubyforge_project:
|
125
|
+
rubygems_version: 2.0.3
|
126
|
+
signing_key:
|
127
|
+
specification_version: 4
|
128
|
+
summary: Methods for using maximum likelihood calculations to estimate parmeters of
|
129
|
+
ratios of gaussian variates
|
130
|
+
test_files:
|
131
|
+
- spec/error_bootstrapping_spec.rb
|
132
|
+
- spec/ml_ratiosolve_spec.rb
|