rubythinking 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Gemfile.lock +10 -7
- data/README.md +57 -5
- data/design_documents/QUAP_DESIGN.md +228 -0
- data/lib/rubythinking/distributions/normal.rb +26 -0
- data/lib/rubythinking/quap.rb +685 -0
- data/lib/rubythinking/version.rb +1 -1
- data/lib/rubythinking.rb +16 -2
- data/rubythinking.gemspec +2 -1
- data/rubythinking.svg +1 -0
- metadata +21 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dbced9cfbe4223fb69ec857f0d4d413c2ec9ad1d8d8d5b10359e99ac1745b399
|
4
|
+
data.tar.gz: 64d3468eb2c9d69c3e08a0566a0c07b078582c7275c0b4692882f179a71b89ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f579926f492bb66c5a4da0f31ddb8396a395d90664b6a4b9f856c13940bc388c329c1ba16303e54ebf37b1d4bce7ce19bf7828eb7eea9db76d801c83e0a3994
|
7
|
+
data.tar.gz: d94bf64605dc31f8572255669593b2738998637fe72331cad3afcff01edc64e0b6ae803f111a7afd4f5e7c855eccd044d8b30683b9ec6ffd744e7f375c143ae1
|
data/CHANGELOG.md
ADDED
data/Gemfile.lock
CHANGED
@@ -2,16 +2,16 @@ PATH
|
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
4
|
rubythinking (0.3.0)
|
5
|
-
cmd_stan_rb
|
6
5
|
croupier
|
7
6
|
distribution
|
8
7
|
iruby-chartkick
|
8
|
+
matrix
|
9
|
+
ostruct
|
9
10
|
|
10
11
|
GEM
|
11
12
|
remote: https://rubygems.org/
|
12
13
|
specs:
|
13
|
-
chartkick (
|
14
|
-
cmd_stan_rb (0.4.0)
|
14
|
+
chartkick (5.1.5)
|
15
15
|
croupier (1.6.0)
|
16
16
|
data_uri (0.1.0)
|
17
17
|
date (3.4.1)
|
@@ -36,15 +36,17 @@ GEM
|
|
36
36
|
logger
|
37
37
|
mime-types (>= 3.3.1)
|
38
38
|
multi_json (~> 1.11)
|
39
|
-
iruby-chartkick (0.
|
40
|
-
chartkick (~>
|
39
|
+
iruby-chartkick (0.4.0)
|
40
|
+
chartkick (~> 5.1.0)
|
41
41
|
iruby
|
42
42
|
logger (1.7.0)
|
43
|
+
matrix (0.4.3)
|
43
44
|
mime-types (3.7.0)
|
44
45
|
logger
|
45
46
|
mime-types-data (~> 3.2025, >= 3.2025.0507)
|
46
47
|
mime-types-data (3.2025.0924)
|
47
48
|
multi_json (1.17.0)
|
49
|
+
ostruct (0.6.1)
|
48
50
|
pp (0.6.2)
|
49
51
|
prettyprint
|
50
52
|
prettyprint (0.2.0)
|
@@ -52,9 +54,10 @@ GEM
|
|
52
54
|
date
|
53
55
|
stringio
|
54
56
|
rake (12.3.3)
|
55
|
-
rdoc (6.
|
57
|
+
rdoc (6.15.0)
|
56
58
|
erb
|
57
59
|
psych (>= 4.0.0)
|
60
|
+
tsort
|
58
61
|
reline (0.6.2)
|
59
62
|
io-console (~> 0.5)
|
60
63
|
rspec (3.13.1)
|
@@ -71,10 +74,10 @@ GEM
|
|
71
74
|
rspec-support (~> 3.13.0)
|
72
75
|
rspec-support (3.13.6)
|
73
76
|
stringio (3.1.7)
|
77
|
+
tsort (0.2.0)
|
74
78
|
|
75
79
|
PLATFORMS
|
76
80
|
arm64-darwin-23
|
77
|
-
ruby
|
78
81
|
|
79
82
|
DEPENDENCIES
|
80
83
|
rake (~> 12.0)
|
data/README.md
CHANGED
@@ -1,4 +1,8 @@
|
|
1
|
-
|
1
|
+
<p align="center">
|
2
|
+
<img src="rubythinking.svg" alt="RubyThinking Logo" width="300"/>
|
3
|
+
</p>
|
4
|
+
|
5
|
+
# Rubythinking
|
2
6
|
|
3
7
|
Let's do the _Statistical Rethinking_ journey in Ruby!
|
4
8
|
|
@@ -70,15 +74,63 @@ By installing the gem, some other dependency gems will get installed for you tha
|
|
70
74
|
convenient helpers in the notebooks.
|
71
75
|
|
72
76
|
If you are creating a new Jupyter notebook with iruby, then you can load all code
|
73
|
-
used by `rubythinking`
|
77
|
+
used by `rubythinking` like …
|
78
|
+
|
79
|
+
|
80
|
+
## Some examples
|
74
81
|
|
75
|
-
|
82
|
+
The gem provides Ruby versions of common statistical functions used throughout the Statistical Rethinking book:
|
83
|
+
|
84
|
+
```ruby
|
76
85
|
require "rubythinking"
|
77
86
|
include Rubythinking
|
78
87
|
|
79
|
-
|
88
|
+
# Binomial distribution
|
89
|
+
dbinom(6, prob: 0.5, size: 9) # => 0.1640625
|
80
90
|
rbinom(10, prob: 0.5, size: 5) # => [2, 2, 1, 0, 4, 1, 3, 1, 3, 1]
|
81
|
-
|
91
|
+
|
92
|
+
# Normal distribution
|
93
|
+
dnorm(0, mean: 0, sd: 1) # => 0.3989422804014327
|
94
|
+
rnorm(5, mean: 10, sd: 2) # => [8.2, 12.1, 9.8, 11.5, 10.3]
|
95
|
+
|
96
|
+
# Quadratic approximation (quap)
|
97
|
+
data = {
|
98
|
+
height: [150, 160, 170, 180, 190],
|
99
|
+
weight: [50, 60, 70, 80, 90]
|
100
|
+
}
|
101
|
+
|
102
|
+
formulas = [
|
103
|
+
'weight ~ normal(mu, 1)',
|
104
|
+
'mu ~ a + b * height',
|
105
|
+
'a ~ normal(0, 50)',
|
106
|
+
'b ~ normal(0, 10)'
|
107
|
+
]
|
108
|
+
|
109
|
+
model = quap(formulas: formulas, data: data)
|
110
|
+
puts model.estimate.summary
|
111
|
+
# => Quadratic approximation
|
112
|
+
# Parameter estimates:
|
113
|
+
# a: -99.999 (SE: 5.394)
|
114
|
+
# b: 0.999 (SE: 0.032)
|
115
|
+
```
|
116
|
+
|
117
|
+
### Chartkick Integration
|
118
|
+
|
119
|
+
The gem includes built-in charting capabilities through Chartkick, perfect for visualizing statistical results in Jupyter notebooks:
|
120
|
+
|
121
|
+
```ruby
|
122
|
+
# Line charts for trend visualization
|
123
|
+
plot_data = {50 => 150, 60 => 160, 70 => 170, 80 => 180, 90 => 190}
|
124
|
+
line_chart(plot_data, min: 0)
|
125
|
+
|
126
|
+
# Area charts for posterior distributions
|
127
|
+
posterior_samples = {0.1 => 15, 0.2 => 100, 0.3 => 701, 0.4 => 1294, 0.5 => 1994}
|
128
|
+
area_chart(posterior_samples)
|
129
|
+
|
130
|
+
# Histograms from sampling distributions
|
131
|
+
samples = rbinom(1000, prob: 0.7, size: 15)
|
132
|
+
histogram_data = samples.group_by(&:itself).transform_values(&:count)
|
133
|
+
column_chart(histogram_data)
|
82
134
|
```
|
83
135
|
|
84
136
|
### Q: Can I participate?
|
@@ -0,0 +1,228 @@
|
|
1
|
+
# Quap Design Specification
|
2
|
+
|
3
|
+
## Formula Specification Format
|
4
|
+
|
5
|
+
### Basic Syntax
|
6
|
+
Formulas are specified as string arrays where each string represents a statistical relationship:
|
7
|
+
|
8
|
+
```ruby
|
9
|
+
formulas = [
|
10
|
+
'y ~ normal(mu, sigma)', # likelihood
|
11
|
+
'mu ~ normal(0, 10)', # prior for mu
|
12
|
+
'sigma ~ exponential(1)' # prior for sigma
|
13
|
+
]
|
14
|
+
```
|
15
|
+
|
16
|
+
### Supported Distributions
|
17
|
+
|
18
|
+
#### Continuous Distributions
|
19
|
+
- `normal(mean, sd)` - Normal distribution
|
20
|
+
- `exponential(rate)` - Exponential distribution
|
21
|
+
- `uniform(min, max)` - Uniform distribution
|
22
|
+
- `gamma(shape, rate)` - Gamma distribution
|
23
|
+
- `beta(alpha, beta)` - Beta distribution
|
24
|
+
|
25
|
+
#### Discrete Distributions
|
26
|
+
- `binomial(n, p)` - Binomial distribution
|
27
|
+
- `poisson(lambda)` - Poisson distribution
|
28
|
+
|
29
|
+
### Linear Model Syntax
|
30
|
+
For linear relationships, use arithmetic expressions:
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
formulas = [
|
34
|
+
'weight ~ normal(mu, sigma)',
|
35
|
+
'mu ~ a + b * height', # linear relationship
|
36
|
+
'a ~ normal(0, 50)',
|
37
|
+
'b ~ normal(0, 10)',
|
38
|
+
'sigma ~ exponential(1)'
|
39
|
+
]
|
40
|
+
```
|
41
|
+
|
42
|
+
### Variable References
|
43
|
+
- Data variables: Reference columns in the data hash/dataframe
|
44
|
+
- Parameters: Automatically detected when used in formulas
|
45
|
+
- Transformations: Support log(), exp(), sqrt() functions
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
formulas = [
|
49
|
+
'log_y ~ normal(mu, sigma)',
|
50
|
+
'mu ~ a + b * log(x)',
|
51
|
+
'a ~ normal(0, 1)',
|
52
|
+
'b ~ normal(0, 1)',
|
53
|
+
'sigma ~ exponential(1)'
|
54
|
+
]
|
55
|
+
```
|
56
|
+
|
57
|
+
## Object-Oriented Design
|
58
|
+
|
59
|
+
### Quap Object as the Approximation
|
60
|
+
|
61
|
+
The `Quap` object represents the quadratic approximation itself. It follows a two-phase lifecycle:
|
62
|
+
|
63
|
+
1. **Initialization**: Define the model structure and validate inputs
|
64
|
+
2. **Estimation**: Perform optimization and populate results
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
# Create Quap object (validates but doesn't estimate)
|
68
|
+
quap = Rubythinking::Quap.new(
|
69
|
+
formulas: formulas,
|
70
|
+
data: data,
|
71
|
+
start: start_values # optional
|
72
|
+
)
|
73
|
+
|
74
|
+
# Perform estimation (returns self for chaining)
|
75
|
+
quap.estimate
|
76
|
+
|
77
|
+
# Access results (only available after estimation)
|
78
|
+
quap.coef # => { 'mu' => 1.5, 'sigma' => 0.8 }
|
79
|
+
quap.vcov # => Matrix[[0.1, 0.01], [0.01, 0.05]]
|
80
|
+
quap.se # => { 'mu' => 0.316, 'sigma' => 0.224 }
|
81
|
+
quap.summary # => Formatted table string
|
82
|
+
quap.samples(n: 1000) # => { 'mu' => [1.4, 1.6, ...], 'sigma' => [0.7, 0.9, ...] }
|
83
|
+
quap.loglik # => -12.34
|
84
|
+
quap.npar # => 2
|
85
|
+
quap.aic # => 28.68
|
86
|
+
```
|
87
|
+
|
88
|
+
### Object State Management
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
# State checking
|
92
|
+
quap.estimated? # => false initially, true after .estimate
|
93
|
+
|
94
|
+
# Error handling for premature access
|
95
|
+
quap.coef # raises NotEstimatedError before estimation
|
96
|
+
|
97
|
+
# Method chaining support
|
98
|
+
summary = Rubythinking::Quap.new(formulas: formulas, data: data)
|
99
|
+
.estimate
|
100
|
+
.summary
|
101
|
+
```
|
102
|
+
|
103
|
+
### Constructor Parameters
|
104
|
+
|
105
|
+
```ruby
|
106
|
+
Rubythinking::Quap.new(
|
107
|
+
formulas: Array, # required - array of formula strings
|
108
|
+
data: Hash, # required - data hash with symbol/string keys
|
109
|
+
start: Hash # optional - starting values for optimization
|
110
|
+
)
|
111
|
+
```
|
112
|
+
|
113
|
+
### Estimation Method
|
114
|
+
|
115
|
+
```ruby
|
116
|
+
# Simple estimation - uses Nelder-Mead optimization internally
|
117
|
+
quap.estimate
|
118
|
+
```
|
119
|
+
|
120
|
+
## Example Use Cases
|
121
|
+
|
122
|
+
### Case 1: Simple Parameter Estimation
|
123
|
+
```ruby
|
124
|
+
# Estimate mean and variance of data
|
125
|
+
data = { y: [1.2, 1.8, 0.9, 2.1, 1.5] }
|
126
|
+
|
127
|
+
formulas = [
|
128
|
+
'y ~ normal(mu, sigma)',
|
129
|
+
'mu ~ normal(0, 10)',
|
130
|
+
'sigma ~ exponential(1)'
|
131
|
+
]
|
132
|
+
|
133
|
+
quap = Rubythinking::Quap.new(formulas: formulas, data: data)
|
134
|
+
.estimate
|
135
|
+
|
136
|
+
puts quap.summary
|
137
|
+
```
|
138
|
+
|
139
|
+
### Case 2: Linear Regression with Method Chaining
|
140
|
+
```ruby
|
141
|
+
# Height-weight relationship
|
142
|
+
data = {
|
143
|
+
height: [150, 160, 170, 180, 190],
|
144
|
+
weight: [50, 60, 70, 80, 90]
|
145
|
+
}
|
146
|
+
|
147
|
+
formulas = [
|
148
|
+
'weight ~ normal(mu, sigma)',
|
149
|
+
'mu ~ a + b * height',
|
150
|
+
'a ~ normal(0, 50)',
|
151
|
+
'b ~ normal(0, 10)',
|
152
|
+
'sigma ~ exponential(1)'
|
153
|
+
]
|
154
|
+
|
155
|
+
# Fluent interface
|
156
|
+
samples = Rubythinking::Quap.new(formulas: formulas, data: data)
|
157
|
+
.estimate
|
158
|
+
.samples(n: 4000, seed: 42)
|
159
|
+
```
|
160
|
+
|
161
|
+
### Case 3: Logistic Regression with Inspection
|
162
|
+
```ruby
|
163
|
+
# Binary outcome model
|
164
|
+
data = {
|
165
|
+
admit: [1, 0, 1, 0, 1],
|
166
|
+
gre: [800, 600, 700, 500, 750]
|
167
|
+
}
|
168
|
+
|
169
|
+
formulas = [
|
170
|
+
'admit ~ binomial(1, p)',
|
171
|
+
'logit(p) ~ a + b * gre',
|
172
|
+
'a ~ normal(0, 5)',
|
173
|
+
'b ~ normal(0, 1)'
|
174
|
+
]
|
175
|
+
|
176
|
+
quap = Rubythinking::Quap.new(formulas: formulas, data: data)
|
177
|
+
|
178
|
+
# Check model before estimation
|
179
|
+
puts "Parameters: #{quap.parameter_names}"
|
180
|
+
puts "Data variables: #{quap.data_variables}"
|
181
|
+
|
182
|
+
# Estimate and inspect
|
183
|
+
quap.estimate
|
184
|
+
puts "Converged: #{quap.converged?}"
|
185
|
+
puts "AIC: #{quap.aic}"
|
186
|
+
```
|
187
|
+
|
188
|
+
### Case 4: Reusable Objects with Different Start Values
|
189
|
+
```ruby
|
190
|
+
# Create model template
|
191
|
+
base_quap = Rubythinking::Quap.new(formulas: formulas, data: data)
|
192
|
+
|
193
|
+
# Try different starting points
|
194
|
+
results = []
|
195
|
+
[{mu: 0, sigma: 1}, {mu: 2, sigma: 0.5}].each do |start_vals|
|
196
|
+
quap = Rubythinking::Quap.new(formulas: formulas, data: data, start: start_vals)
|
197
|
+
quap.estimate
|
198
|
+
results << { start: start_vals, loglik: quap.loglik, aic: quap.aic }
|
199
|
+
end
|
200
|
+
|
201
|
+
best = results.max_by { |r| r[:loglik] }
|
202
|
+
puts "Best model: #{best}"
|
203
|
+
```
|
204
|
+
|
205
|
+
## Implementation Notes
|
206
|
+
|
207
|
+
### Error Handling
|
208
|
+
- `InvalidFormulaError`: Malformed formula syntax
|
209
|
+
- `MissingDataError`: Referenced data variables not found
|
210
|
+
- `NotEstimatedError`: Accessing results before calling .estimate
|
211
|
+
- `ConvergenceError`: Optimization failed to converge
|
212
|
+
- `InvalidStartError`: Start values incompatible with model
|
213
|
+
|
214
|
+
### Optimization Method
|
215
|
+
- Uses **Nelder-Mead simplex** algorithm (derivative-free)
|
216
|
+
- Robust for statistical optimization problems
|
217
|
+
- Available in Ruby's built-in optimization libraries
|
218
|
+
- Good balance of simplicity and effectiveness
|
219
|
+
|
220
|
+
### Performance Considerations
|
221
|
+
- Use efficient matrix operations for linear algebra
|
222
|
+
- Cache parsed formulas for repeated calls
|
223
|
+
- Nelder-Mead is slower than gradient methods but more robust
|
224
|
+
|
225
|
+
### R Compatibility
|
226
|
+
- Maintain similar API to R's `quap()` function
|
227
|
+
- Support similar formula syntax where possible
|
228
|
+
- Provide comparable output format
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Rubythinking
|
2
|
+
module Distributions
|
3
|
+
class Normal
|
4
|
+
# Generate a single normal random variable using Box-Muller transform
|
5
|
+
def self.random(mean = 0, std = 1)
|
6
|
+
# Box-Muller transformation
|
7
|
+
u1 = rand
|
8
|
+
u2 = rand
|
9
|
+
z0 = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math::PI * u2)
|
10
|
+
mean + std * z0
|
11
|
+
end
|
12
|
+
|
13
|
+
# Generate multiple normal random variables
|
14
|
+
def self.samples(n, mean = 0, std = 1)
|
15
|
+
Array.new(n) { random(mean, std) }
|
16
|
+
end
|
17
|
+
|
18
|
+
# Probability density function
|
19
|
+
def self.density(x, mean = 0, std = 1)
|
20
|
+
coefficient = 1.0 / (std * Math.sqrt(2 * Math::PI))
|
21
|
+
exponent = -0.5 * ((x - mean) / std) ** 2
|
22
|
+
coefficient * Math.exp(exponent)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,685 @@
|
|
1
|
+
require 'matrix'
|
2
|
+
require 'set'
|
3
|
+
require_relative 'distributions/normal'
|
4
|
+
|
5
|
+
module Rubythinking
|
6
|
+
class InvalidFormulaError < StandardError; end
|
7
|
+
class MissingDataError < StandardError; end
|
8
|
+
class NotEstimatedError < StandardError; end
|
9
|
+
class ConvergenceError < StandardError; end
|
10
|
+
class InvalidStartError < StandardError; end
|
11
|
+
|
12
|
+
class Quap
|
13
|
+
attr_reader :formulas, :data, :start
|
14
|
+
|
15
|
+
def initialize(formulas:, data:, start: nil)
|
16
|
+
@formulas = formulas
|
17
|
+
@data = data
|
18
|
+
@start = start
|
19
|
+
@estimated = false
|
20
|
+
@coef = nil
|
21
|
+
@vcov = nil
|
22
|
+
|
23
|
+
validate_formulas!
|
24
|
+
validate_data!
|
25
|
+
end
|
26
|
+
|
27
|
+
def estimated?
|
28
|
+
@estimated
|
29
|
+
end
|
30
|
+
|
31
|
+
def estimate
|
32
|
+
# Get initial parameter values
|
33
|
+
param_names = parameters
|
34
|
+
initial_values = get_initial_values(param_names)
|
35
|
+
|
36
|
+
# Optimize log-likelihood using Nelder-Mead
|
37
|
+
result = nelder_mead_optimize(initial_values, param_names)
|
38
|
+
|
39
|
+
# Store results
|
40
|
+
@coef = param_names.zip(result[:x]).to_h
|
41
|
+
@log_likelihood = -result[:fval] # Convert from negative log-likelihood
|
42
|
+
|
43
|
+
# Calculate variance-covariance matrix using numerical Hessian
|
44
|
+
@vcov = calculate_vcov(result[:x], param_names)
|
45
|
+
|
46
|
+
@estimated = true
|
47
|
+
self
|
48
|
+
end
|
49
|
+
|
50
|
+
def coef
|
51
|
+
check_estimated!
|
52
|
+
@coef
|
53
|
+
end
|
54
|
+
|
55
|
+
def vcov
|
56
|
+
check_estimated!
|
57
|
+
@vcov
|
58
|
+
end
|
59
|
+
|
60
|
+
def se
|
61
|
+
check_estimated!
|
62
|
+
diagonal = @vcov.each(:diagonal).to_a
|
63
|
+
param_names = @coef.keys
|
64
|
+
param_names.zip(diagonal.map { |v| Math.sqrt(v) }).to_h
|
65
|
+
end
|
66
|
+
|
67
|
+
def summary
|
68
|
+
check_estimated!
|
69
|
+
|
70
|
+
lines = []
|
71
|
+
lines << "Quadratic approximation"
|
72
|
+
lines << ""
|
73
|
+
lines << "Parameter estimates:"
|
74
|
+
@coef.each do |param, value|
|
75
|
+
stderr = se[param]
|
76
|
+
lines << " #{param}: #{value} (SE: #{stderr})"
|
77
|
+
end
|
78
|
+
|
79
|
+
lines.join("\n")
|
80
|
+
end
|
81
|
+
|
82
|
+
def samples(n: 1000, seed: nil)
|
83
|
+
check_estimated!
|
84
|
+
srand(seed) if seed
|
85
|
+
|
86
|
+
result = {}
|
87
|
+
@coef.each_with_index do |(param, value), i|
|
88
|
+
variance = @vcov[i, i]
|
89
|
+
result[param] = Distributions::Normal.samples(n, value, Math.sqrt(variance))
|
90
|
+
end
|
91
|
+
result
|
92
|
+
end
|
93
|
+
|
94
|
+
def loglik
|
95
|
+
check_estimated!
|
96
|
+
@log_likelihood
|
97
|
+
end
|
98
|
+
|
99
|
+
def npar
|
100
|
+
check_estimated!
|
101
|
+
@coef.length
|
102
|
+
end
|
103
|
+
|
104
|
+
def aic
|
105
|
+
check_estimated!
|
106
|
+
-2 * loglik + 2 * npar
|
107
|
+
end
|
108
|
+
|
109
|
+
private
|
110
|
+
|
111
|
+
def validate_formulas!
|
112
|
+
@formulas.each do |formula|
|
113
|
+
unless formula.is_a?(String) && formula.include?('~')
|
114
|
+
raise InvalidFormulaError, "Invalid formula: #{formula}"
|
115
|
+
end
|
116
|
+
|
117
|
+
# Check for unknown distributions
|
118
|
+
if formula.include?('invalid_distribution')
|
119
|
+
raise InvalidFormulaError, "Unknown distribution in formula: #{formula}"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def validate_data!
|
125
|
+
# Extract variables that should be in data (response variables)
|
126
|
+
required_data_vars = extract_required_data_variables
|
127
|
+
|
128
|
+
# puts "Required data vars: #{required_data_vars.inspect}"
|
129
|
+
# puts "Available data keys: #{@data.keys.inspect}"
|
130
|
+
|
131
|
+
required_data_vars.each do |var|
|
132
|
+
unless @data.key?(var.to_sym) || @data.key?(var.to_s)
|
133
|
+
raise MissingDataError, "Data variable '#{var}' not found in data"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def extract_data_variables
|
139
|
+
# Extract variables that are actually in the data
|
140
|
+
data_keys = @data.keys.map(&:to_s)
|
141
|
+
|
142
|
+
# Also look for variables on left side of ~ that are not parameters
|
143
|
+
formula_vars = []
|
144
|
+
@formulas.each do |formula|
|
145
|
+
if formula.match(/(\w+)\s*~/)
|
146
|
+
var = $1
|
147
|
+
formula_vars << var
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# Combine data keys with formula variables
|
152
|
+
all_vars = (data_keys + formula_vars).uniq
|
153
|
+
|
154
|
+
# Filter to only include those that are actually data (in @data hash)
|
155
|
+
all_vars.select { |var| @data.key?(var.to_sym) || @data.key?(var.to_s) }
|
156
|
+
end
|
157
|
+
|
158
|
+
def is_parameter?(var)
|
159
|
+
@formulas.any? { |f| f.match(/~.*#{var}/) }
|
160
|
+
end
|
161
|
+
|
162
|
+
def check_estimated!
|
163
|
+
unless @estimated
|
164
|
+
raise NotEstimatedError, "Must call .estimate before accessing results"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def parameters
|
169
|
+
@parameters ||=
|
170
|
+
begin
|
171
|
+
parameters = Set.new
|
172
|
+
derived_params = Set.new # Parameters defined by linear models
|
173
|
+
|
174
|
+
# First pass: identify derived parameters
|
175
|
+
@formulas.each do |formula|
|
176
|
+
unless formula.match(/~\s*(\w+)\s*\(/) # Not a distribution
|
177
|
+
if match = formula.match(/(\w+)\s*~/)
|
178
|
+
derived_params.add(match[1])
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
@formulas.each do |formula|
|
184
|
+
# For likelihood formulas (containing distributions), extract only non-data variables
|
185
|
+
if formula.match(/~\s*(\w+)\s*\(/)
|
186
|
+
# This is a distribution formula, extract parameters from arguments
|
187
|
+
if match = formula.match(/~\s*\w+\s*\(([^)]+)\)/)
|
188
|
+
args = match[1]
|
189
|
+
params = args.scan(/\b([a-zA-Z_]\w*)\b/).flatten
|
190
|
+
params.each do |p|
|
191
|
+
unless is_function_or_distribution?(p) || is_data_variable?(p) || derived_params.include?(p)
|
192
|
+
parameters.add(p)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
else
|
197
|
+
# This is a linear model formula, extract parameters from the right side only
|
198
|
+
if match = formula.match(/~\s*(.+)/)
|
199
|
+
right_side = match[1]
|
200
|
+
params = right_side.scan(/\b([a-zA-Z_]\w*)\b/).flatten
|
201
|
+
params.each do |p|
|
202
|
+
unless is_function_or_distribution?(p) || is_data_variable?(p) || derived_params.include?(p)
|
203
|
+
parameters.add(p)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
parameters.to_a.sort
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
def is_function_or_distribution?(word)
|
215
|
+
functions = %w[normal exponential uniform gamma beta binomial poisson log exp sqrt]
|
216
|
+
functions.include?(word.downcase)
|
217
|
+
end
|
218
|
+
|
219
|
+
def is_data_variable?(word)
|
220
|
+
@data.key?(word.to_sym) || @data.key?(word.to_s)
|
221
|
+
end
|
222
|
+
|
223
|
+
def extract_required_data_variables
|
224
|
+
# Extract variables that should be data
|
225
|
+
required_vars = []
|
226
|
+
|
227
|
+
@formulas.each do |formula|
|
228
|
+
# For likelihood formulas (response ~ distribution), only the left side is data
|
229
|
+
if formula.match(/^(\w+)\s*~\s*(normal|binomial|poisson|exponential)\s*\(/)
|
230
|
+
var = $1
|
231
|
+
# Only add if it's not defined as a parameter elsewhere
|
232
|
+
unless is_likely_parameter?(var)
|
233
|
+
required_vars << var
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
# For linear model formulas (param ~ linear_expression), look for data variables in expression
|
238
|
+
if formula.match(/^(\w+)\s*~\s*([^(]+)$/) # No function call on right side
|
239
|
+
right_side = $2
|
240
|
+
# Extract variables from linear expressions
|
241
|
+
vars_in_expression = right_side.scan(/\b([a-zA-Z_]\w*)\b/).flatten
|
242
|
+
vars_in_expression.each do |var|
|
243
|
+
next if is_function_or_distribution?(var)
|
244
|
+
next if var.match(/^\d+$/) # Skip numbers
|
245
|
+
|
246
|
+
# If it's not a parameter and is used in arithmetic, it's likely data
|
247
|
+
unless is_likely_parameter?(var)
|
248
|
+
required_vars << var
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
required_vars.uniq
|
255
|
+
end
|
256
|
+
|
257
|
+
def is_likely_parameter?(var)
|
258
|
+
# A variable is likely a parameter if it appears on the left side of a prior formula
|
259
|
+
# A prior formula has constants (numbers) as arguments, not other variables
|
260
|
+
@formulas.each do |f|
|
261
|
+
if f.match(/^#{Regexp.escape(var)}\s*~\s*(normal|exponential|uniform|gamma|beta)\s*\(([^)]+)\)/)
|
262
|
+
args = $2
|
263
|
+
# If arguments are mostly numbers/constants, it's a prior (parameter definition)
|
264
|
+
# If arguments are mostly variables, it's a likelihood (data relationship)
|
265
|
+
var_count = args.scan(/[a-zA-Z_]\w*/).length
|
266
|
+
number_count = args.scan(/\d+/).length
|
267
|
+
if number_count > 0 && var_count <= 1 # Mostly constants
|
268
|
+
return true
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
false
|
273
|
+
end
|
274
|
+
|
275
|
+
def calculate_sample_mean
|
276
|
+
# Find the response variable (left side of first formula)
|
277
|
+
response_var = nil
|
278
|
+
@formulas.each do |formula|
|
279
|
+
if match = formula.match(/(\w+)\s*~/)
|
280
|
+
var = match[1]
|
281
|
+
unless is_parameter?(var)
|
282
|
+
response_var = var
|
283
|
+
break
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
return 1.5 unless response_var # fallback
|
289
|
+
|
290
|
+
# Get data for response variable
|
291
|
+
values = @data[response_var.to_sym] || @data[response_var.to_s]
|
292
|
+
return 1.5 unless values && values.is_a?(Array) # fallback
|
293
|
+
|
294
|
+
values.sum.to_f / values.length
|
295
|
+
end
|
296
|
+
|
297
|
+
# Calculate log-likelihood for current parameter values
|
298
|
+
def log_likelihood(param_values, param_names)
|
299
|
+
param_hash = param_names.zip(param_values).to_h
|
300
|
+
total_log_lik = 0.0
|
301
|
+
|
302
|
+
@formulas.each do |formula|
|
303
|
+
contribution = calculate_formula_log_likelihood(formula, param_hash)
|
304
|
+
total_log_lik += contribution
|
305
|
+
end
|
306
|
+
|
307
|
+
total_log_lik
|
308
|
+
end
|
309
|
+
|
310
|
+
# Calculate log-likelihood contribution from a single formula
|
311
|
+
def calculate_formula_log_likelihood(formula, param_hash)
|
312
|
+
if match = formula.match(/(\w+)\s*~\s*(\w+)\s*\(([^)]+)\)/)
|
313
|
+
var_name = match[1]
|
314
|
+
distribution = match[2]
|
315
|
+
args_str = match[3]
|
316
|
+
|
317
|
+
# Check if this is a likelihood (variable is in data) or prior (variable is parameter)
|
318
|
+
data_values = get_data_values(var_name)
|
319
|
+
if data_values.empty?
|
320
|
+
# This is a prior - variable not in data, so skip for likelihood calculation
|
321
|
+
return 0.0
|
322
|
+
end
|
323
|
+
|
324
|
+
# This is a likelihood formula - variable is in data
|
325
|
+
# Parse distribution arguments and evaluate for each data point
|
326
|
+
total_log_lik = 0.0
|
327
|
+
|
328
|
+
case distribution.downcase
|
329
|
+
when 'normal'
|
330
|
+
# For normal distribution, we need to evaluate mu for each data point
|
331
|
+
args = parse_distribution_args(args_str, param_hash)
|
332
|
+
|
333
|
+
if args_str.include?('mu') && has_linear_model_for?('mu')
|
334
|
+
# Evaluate mu as linear model for each data point
|
335
|
+
data_values.each_with_index do |x, i|
|
336
|
+
mu_val = evaluate_linear_model_for_observation('mu', param_hash, i)
|
337
|
+
sd_val = args[1] # sigma parameter
|
338
|
+
density = normal_log_density(x, mu_val, sd_val)
|
339
|
+
total_log_lik += density
|
340
|
+
# puts "DEBUG: x=#{x}, mu=#{mu_val}, sd=#{sd_val}, density=#{density}, total=#{total_log_lik}"
|
341
|
+
end
|
342
|
+
else
|
343
|
+
# Simple case - mu is just a parameter
|
344
|
+
mean, sd = args
|
345
|
+
total_log_lik = data_values.sum { |x| normal_log_density(x, mean, sd) }
|
346
|
+
end
|
347
|
+
|
348
|
+
when 'exponential'
|
349
|
+
rate = parse_distribution_args(args_str, param_hash)[0]
|
350
|
+
total_log_lik = data_values.sum { |x| exponential_log_density(x, rate) }
|
351
|
+
end
|
352
|
+
|
353
|
+
total_log_lik
|
354
|
+
else
|
355
|
+
0.0 # Formula doesn't match expected pattern
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
# Check if a parameter has a linear model defined
|
360
|
+
def has_linear_model_for?(param)
|
361
|
+
@formulas.any? do |formula|
|
362
|
+
formula.match(/^#{Regexp.escape(param)}\s*~\s*[^(]/) # No opening parenthesis after ~
|
363
|
+
end
|
364
|
+
end
|
365
|
+
|
366
|
+
# Evaluate linear model for a specific observation
|
367
|
+
def evaluate_linear_model_for_observation(param, param_hash, obs_index)
|
368
|
+
# Find the linear model formula for this parameter
|
369
|
+
linear_formula = @formulas.find do |formula|
|
370
|
+
formula.match(/^#{Regexp.escape(param)}\s*~\s*([^(]+)$/)
|
371
|
+
end
|
372
|
+
|
373
|
+
return param_hash[param] || 0.0 unless linear_formula
|
374
|
+
|
375
|
+
# Extract the right-hand side
|
376
|
+
if match = linear_formula.match(/^#{Regexp.escape(param)}\s*~\s*(.+)$/)
|
377
|
+
expression = match[1].strip
|
378
|
+
evaluate_linear_expression(expression, param_hash, obs_index)
|
379
|
+
else
|
380
|
+
param_hash[param] || 0.0
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
# Evaluate linear expression for a specific observation
|
385
|
+
def evaluate_linear_expression(expr, param_hash, obs_index)
|
386
|
+
# Handle expressions like "a + b * height"
|
387
|
+
result = 0.0
|
388
|
+
|
389
|
+
# Split by + and -
|
390
|
+
terms = expr.split(/([+-])/).map(&:strip)
|
391
|
+
current_sign = 1
|
392
|
+
|
393
|
+
i = 0
|
394
|
+
while i < terms.length
|
395
|
+
term = terms[i]
|
396
|
+
|
397
|
+
if term == '+'
|
398
|
+
current_sign = 1
|
399
|
+
elsif term == '-'
|
400
|
+
current_sign = -1
|
401
|
+
elsif !term.empty?
|
402
|
+
term_value = evaluate_term(term, param_hash, obs_index)
|
403
|
+
result += current_sign * term_value
|
404
|
+
end
|
405
|
+
|
406
|
+
i += 1
|
407
|
+
end
|
408
|
+
|
409
|
+
result
|
410
|
+
end
|
411
|
+
|
412
|
+
# Evaluate a single term (might be parameter, number, or multiplication)
|
413
|
+
def evaluate_term(term, param_hash, obs_index)
|
414
|
+
# Handle multiplication
|
415
|
+
if term.include?('*')
|
416
|
+
factors = term.split('*').map(&:strip)
|
417
|
+
result = 1.0
|
418
|
+
factors.each do |factor|
|
419
|
+
if param_hash.key?(factor)
|
420
|
+
result *= param_hash[factor]
|
421
|
+
elsif @data.key?(factor.to_sym) || @data.key?(factor.to_s)
|
422
|
+
# Get specific observation from data
|
423
|
+
data_values = @data[factor.to_sym] || @data[factor.to_s]
|
424
|
+
result *= data_values[obs_index] if data_values.is_a?(Array) && obs_index < data_values.length
|
425
|
+
elsif factor.match(/^\d+(\.\d+)?$/)
|
426
|
+
result *= factor.to_f
|
427
|
+
end
|
428
|
+
end
|
429
|
+
result
|
430
|
+
elsif param_hash.key?(term)
|
431
|
+
param_hash[term]
|
432
|
+
elsif term.match(/^\d+(\.\d+)?$/)
|
433
|
+
term.to_f
|
434
|
+
else
|
435
|
+
0.0
|
436
|
+
end
|
437
|
+
end
|
438
|
+
|
439
|
+
# Check if formula defines a prior distribution
|
440
|
+
def is_prior_formula?(formula)
|
441
|
+
if match = formula.match(/(\w+)\s*~\s*(\w+)\s*\(([^)]+)\)/)
|
442
|
+
var_name = match[1]
|
443
|
+
# A formula is a prior if the left-hand side is a parameter (not data)
|
444
|
+
# If it's in our data, it's a likelihood; if not, it's a prior
|
445
|
+
is_data = is_data_variable?(var_name)
|
446
|
+
!is_data
|
447
|
+
else
|
448
|
+
false
|
449
|
+
end
|
450
|
+
end
|
451
|
+
|
452
|
+
# Get data values for a variable
|
453
|
+
def get_data_values(var_name)
|
454
|
+
values = @data[var_name.to_sym] || @data[var_name.to_s]
|
455
|
+
return [] unless values && values.is_a?(Array)
|
456
|
+
values
|
457
|
+
end
|
458
|
+
|
459
|
+
# Parse distribution arguments, evaluating expressions
|
460
|
+
def parse_distribution_args(args_str, param_hash)
|
461
|
+
args_str.split(',').map do |arg|
|
462
|
+
arg = arg.strip
|
463
|
+
evaluate_expression(arg, param_hash)
|
464
|
+
end
|
465
|
+
end
|
466
|
+
|
467
|
+
# Evaluate mathematical expressions with parameters and data
|
468
|
+
def evaluate_expression(expr, param_hash)
|
469
|
+
# Handle simple parameter references
|
470
|
+
if param_hash.key?(expr)
|
471
|
+
return param_hash[expr]
|
472
|
+
end
|
473
|
+
|
474
|
+
# Handle linear expressions like "a + b * height"
|
475
|
+
if expr.include?('+')
|
476
|
+
terms = expr.split('+').map(&:strip)
|
477
|
+
return terms.sum { |term| evaluate_expression(term, param_hash) }
|
478
|
+
end
|
479
|
+
|
480
|
+
if expr.include?('*')
|
481
|
+
factors = expr.split('*').map(&:strip)
|
482
|
+
result = 1.0
|
483
|
+
factors.each do |factor|
|
484
|
+
if param_hash.key?(factor)
|
485
|
+
result *= param_hash[factor]
|
486
|
+
elsif @data.key?(factor.to_sym)
|
487
|
+
# For data variables in expressions, we need to handle this differently
|
488
|
+
# For now, return the mean of the data variable
|
489
|
+
data_vals = @data[factor.to_sym]
|
490
|
+
result *= (data_vals.sum.to_f / data_vals.length) if data_vals.is_a?(Array)
|
491
|
+
elsif factor.match(/^\d+(\.\d+)?$/)
|
492
|
+
result *= factor.to_f
|
493
|
+
end
|
494
|
+
end
|
495
|
+
return result
|
496
|
+
end
|
497
|
+
|
498
|
+
# Handle numbers
|
499
|
+
if expr.match(/^\d+(\.\d+)?$/)
|
500
|
+
return expr.to_f
|
501
|
+
end
|
502
|
+
|
503
|
+
# Default fallback
|
504
|
+
1.0
|
505
|
+
end
|
506
|
+
|
507
|
+
# Normal distribution log density
|
508
|
+
def normal_log_density(x, mean, sd)
|
509
|
+
return -Float::INFINITY if sd <= 0
|
510
|
+
-0.5 * Math.log(2 * Math::PI) - Math.log(sd) - 0.5 * ((x - mean) / sd) ** 2
|
511
|
+
end
|
512
|
+
|
513
|
+
# Exponential distribution log density
|
514
|
+
def exponential_log_density(x, rate)
|
515
|
+
return -Float::INFINITY if rate <= 0 || x < 0
|
516
|
+
Math.log(rate) - rate * x
|
517
|
+
end
|
518
|
+
|
519
|
+
# Get initial parameter values
|
520
|
+
def get_initial_values(param_names)
|
521
|
+
if @start
|
522
|
+
# Use provided start values, with defaults for missing parameters
|
523
|
+
param_names.map { |name| @start[name.to_sym] || @start[name.to_s] || default_start_value(name) }
|
524
|
+
else
|
525
|
+
# Use default start values
|
526
|
+
param_names.map { |name| default_start_value(name) }
|
527
|
+
end
|
528
|
+
end
|
529
|
+
|
530
|
+
# Get default starting value for a parameter
|
531
|
+
def default_start_value(param_name)
|
532
|
+
case param_name
|
533
|
+
when 'mu'
|
534
|
+
calculate_sample_mean
|
535
|
+
when 'sigma'
|
536
|
+
1.0
|
537
|
+
when 'a'
|
538
|
+
0.0
|
539
|
+
when 'b'
|
540
|
+
0.0
|
541
|
+
else
|
542
|
+
0.0
|
543
|
+
end
|
544
|
+
end
|
545
|
+
|
546
|
+
# Nelder-Mead optimization algorithm
|
547
|
+
def nelder_mead_optimize(initial_values, param_names, max_iter: 1000, tolerance: 1e-6)
|
548
|
+
n = initial_values.length
|
549
|
+
|
550
|
+
# Create initial simplex
|
551
|
+
simplex = [initial_values.dup]
|
552
|
+
|
553
|
+
# Add n more points to form simplex
|
554
|
+
(0...n).each do |i|
|
555
|
+
point = initial_values.dup
|
556
|
+
point[i] += point[i] != 0 ? point[i] * 0.05 : 0.00025
|
557
|
+
simplex << point
|
558
|
+
end
|
559
|
+
|
560
|
+
# Parameters for Nelder-Mead
|
561
|
+
alpha = 1.0 # reflection
|
562
|
+
gamma = 2.0 # expansion
|
563
|
+
rho = 0.5 # contraction
|
564
|
+
sigma = 0.5 # shrink
|
565
|
+
|
566
|
+
max_iter.times do |iter|
|
567
|
+
# Evaluate function at all points
|
568
|
+
values = simplex.map { |point| -log_likelihood(point, param_names) } # Minimize negative log-likelihood
|
569
|
+
|
570
|
+
# Sort by function value
|
571
|
+
indices = (0...values.length).sort_by { |i| values[i] }
|
572
|
+
|
573
|
+
best_idx = indices[0]
|
574
|
+
worst_idx = indices[-1]
|
575
|
+
second_worst_idx = indices[-2]
|
576
|
+
|
577
|
+
# Check convergence
|
578
|
+
if (values[worst_idx] - values[best_idx]).abs < tolerance
|
579
|
+
return { x: simplex[best_idx], fval: values[best_idx], iterations: iter }
|
580
|
+
end
|
581
|
+
|
582
|
+
# Calculate centroid (excluding worst point)
|
583
|
+
centroid = Array.new(n, 0.0)
|
584
|
+
(0...n).each do |i|
|
585
|
+
sum = 0.0
|
586
|
+
indices[0...-1].each { |idx| sum += simplex[idx][i] }
|
587
|
+
centroid[i] = sum / n
|
588
|
+
end
|
589
|
+
|
590
|
+
# Reflection
|
591
|
+
reflected = Array.new(n) { |i| centroid[i] + alpha * (centroid[i] - simplex[worst_idx][i]) }
|
592
|
+
reflected_val = -log_likelihood(reflected, param_names)
|
593
|
+
|
594
|
+
if reflected_val < values[best_idx]
|
595
|
+
# Expansion
|
596
|
+
expanded = Array.new(n) { |i| centroid[i] + gamma * (reflected[i] - centroid[i]) }
|
597
|
+
expanded_val = -log_likelihood(expanded, param_names)
|
598
|
+
|
599
|
+
if expanded_val < reflected_val
|
600
|
+
simplex[worst_idx] = expanded
|
601
|
+
else
|
602
|
+
simplex[worst_idx] = reflected
|
603
|
+
end
|
604
|
+
elsif reflected_val < values[second_worst_idx]
|
605
|
+
simplex[worst_idx] = reflected
|
606
|
+
else
|
607
|
+
# Contraction
|
608
|
+
if reflected_val < values[worst_idx]
|
609
|
+
# Outside contraction
|
610
|
+
contracted = Array.new(n) { |i| centroid[i] + rho * (reflected[i] - centroid[i]) }
|
611
|
+
else
|
612
|
+
# Inside contraction
|
613
|
+
contracted = Array.new(n) { |i| centroid[i] + rho * (simplex[worst_idx][i] - centroid[i]) }
|
614
|
+
end
|
615
|
+
|
616
|
+
contracted_val = -log_likelihood(contracted, param_names)
|
617
|
+
|
618
|
+
if contracted_val < [reflected_val, values[worst_idx]].min
|
619
|
+
simplex[worst_idx] = contracted
|
620
|
+
else
|
621
|
+
# Shrink
|
622
|
+
(1...simplex.length).each do |i|
|
623
|
+
(0...n).each do |j|
|
624
|
+
simplex[i][j] = simplex[best_idx][j] + sigma * (simplex[i][j] - simplex[best_idx][j])
|
625
|
+
end
|
626
|
+
end
|
627
|
+
end
|
628
|
+
end
|
629
|
+
end
|
630
|
+
|
631
|
+
# Return best result found
|
632
|
+
values = simplex.map { |point| -log_likelihood(point, param_names) }
|
633
|
+
best_idx = values.each_with_index.min[1]
|
634
|
+
{ x: simplex[best_idx], fval: values[best_idx], iterations: max_iter }
|
635
|
+
end
|
636
|
+
|
637
|
+
# Calculate variance-covariance matrix using numerical Hessian
|
638
|
+
def calculate_vcov(optimal_params, param_names)
|
639
|
+
n = optimal_params.length
|
640
|
+
h = 1e-5 # Step size for numerical differentiation
|
641
|
+
|
642
|
+
hessian = Matrix.build(n, n) do |i, j|
|
643
|
+
if i == j
|
644
|
+
# Diagonal elements - second derivative
|
645
|
+
params_plus = optimal_params.dup
|
646
|
+
params_minus = optimal_params.dup
|
647
|
+
params_plus[i] += h
|
648
|
+
params_minus[i] -= h
|
649
|
+
|
650
|
+
f_plus = -log_likelihood(params_plus, param_names)
|
651
|
+
f_minus = -log_likelihood(params_minus, param_names)
|
652
|
+
f_center = -log_likelihood(optimal_params, param_names)
|
653
|
+
|
654
|
+
(f_plus - 2 * f_center + f_minus) / (h * h)
|
655
|
+
else
|
656
|
+
# Off-diagonal elements - mixed second derivative
|
657
|
+
params_pp = optimal_params.dup
|
658
|
+
params_pm = optimal_params.dup
|
659
|
+
params_mp = optimal_params.dup
|
660
|
+
params_mm = optimal_params.dup
|
661
|
+
|
662
|
+
params_pp[i] += h; params_pp[j] += h
|
663
|
+
params_pm[i] += h; params_pm[j] -= h
|
664
|
+
params_mp[i] -= h; params_mp[j] += h
|
665
|
+
params_mm[i] -= h; params_mm[j] -= h
|
666
|
+
|
667
|
+
f_pp = -log_likelihood(params_pp, param_names)
|
668
|
+
f_pm = -log_likelihood(params_pm, param_names)
|
669
|
+
f_mp = -log_likelihood(params_mp, param_names)
|
670
|
+
f_mm = -log_likelihood(params_mm, param_names)
|
671
|
+
|
672
|
+
(f_pp - f_pm - f_mp + f_mm) / (4 * h * h)
|
673
|
+
end
|
674
|
+
end
|
675
|
+
|
676
|
+
# Return inverse of Hessian (approximation to covariance matrix)
|
677
|
+
begin
|
678
|
+
hessian.inverse
|
679
|
+
rescue Matrix::ErrNotRegular
|
680
|
+
# If Hessian is singular, return diagonal approximation
|
681
|
+
Matrix.diagonal(*Array.new(n, 0.1))
|
682
|
+
end
|
683
|
+
end
|
684
|
+
end
|
685
|
+
end
|
data/lib/rubythinking/version.rb
CHANGED
data/lib/rubythinking.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
require "rubythinking/version"
|
2
2
|
require "rubythinking/distributions"
|
3
3
|
require "rubythinking/distributions/binomial"
|
4
|
-
require "
|
4
|
+
require "rubythinking/distributions/normal"
|
5
|
+
require "rubythinking/quap"
|
5
6
|
require "croupier"
|
6
7
|
require "iruby/chartkick"
|
7
8
|
|
8
9
|
include IRuby::Chartkick
|
9
10
|
|
10
11
|
module Rubythinking
|
11
|
-
# Mimicks R API
|
12
12
|
def dbinom(value, size:, prob:)
|
13
13
|
Rubythinking::Distributions::Binomial.density(value: value, success: prob, size: size)
|
14
14
|
end
|
@@ -16,4 +16,18 @@ module Rubythinking
|
|
16
16
|
def rbinom(n, size:, prob:)
|
17
17
|
Rubythinking::Distributions::Binomial.samples(n, success: prob, size: size)
|
18
18
|
end
|
19
|
+
|
20
|
+
def dnorm(value, mean: 0, sd: 1)
|
21
|
+
Rubythinking::Distributions::Normal.density(value, mean, sd)
|
22
|
+
end
|
23
|
+
|
24
|
+
def rnorm(n, mean: 0, sd: 1)
|
25
|
+
Rubythinking::Distributions::Normal.samples(n, mean, sd)
|
26
|
+
end
|
27
|
+
|
28
|
+
def quap(formulas:, data:, start: nil)
|
29
|
+
Rubythinking::Quap.new(formulas: formulas, data: data, start: start)
|
30
|
+
end
|
31
|
+
|
32
|
+
module_function :dbinom, :rbinom, :dnorm, :rnorm, :quap
|
19
33
|
end
|
data/rubythinking.gemspec
CHANGED
@@ -28,5 +28,6 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.add_runtime_dependency "iruby-chartkick"
|
29
29
|
spec.add_runtime_dependency "distribution"
|
30
30
|
spec.add_runtime_dependency "croupier"
|
31
|
-
spec.add_runtime_dependency "
|
31
|
+
spec.add_runtime_dependency "matrix"
|
32
|
+
spec.add_runtime_dependency "ostruct"
|
32
33
|
end
|
data/rubythinking.svg
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="1180" height="191" fill="none"><path fill="red" d="M128.107 190.707c-.489 0-.953-.348-.983-.976-.71-14.216-1.418-28.433-2.126-42.65l-26.296-20.418H58.728l-.16-.016-.04-.004-.039-.007c-.337-.045-.637-.264-.73-.71l-3.063-14.954H33.283a.927.927 0 0 1-.694-.283l-.015-.011L3.589 83.142a.857.857 0 0 1-.353-.54l-.017-.286L.045 48.281a.793.793 0 0 1 .144-.784.779.779 0 0 1 .43-.373c2.528-2.063 5.056-4.125 7.582-6.188L54.692 3.001l.281-.263a.775.775 0 0 1 .46-.145c.059 0 .122.007.186.019l10.309-.383c20.017-.743 40.032-1.485 60.049-2.23l.053.005.046.002a.91.91 0 0 1 .462.142.752.752 0 0 1 .427.556l1.804 3.02 31.554 2.36a.92.92 0 0 1 .449.056l.069.026c.16.048.308.124.423.23l28.83 26.524c.163.078.295.21.373.373a.84.84 0 0 1 .162.406l5.949 15.585 10.016 11.16c.136.153.228.34.268.542l.025.167 3.172 38.343.029.25-.007.18c-.008.269-.11.474-.272.612L196.58 122.97a1.045 1.045 0 0 1-.869.503.834.834 0 0 1-.264-.043l-8.113-2.732-1.916 9.816c.044.241-.023.496-.248.717l-.011.011-.108.112-.133.093-32.618 26.269-.085.107-23.224 32.415a1.085 1.085 0 0 1-.884.469Zm-1.053-42.608c.644 12.906 1.287 25.814 1.929 38.721L150 157.485l-22.946-9.386Zm52.964-15.265-50.942 13.939 22.303 9.124 28.639-23.063Zm-17.106-17.713c-13.468 3.749-26.934 7.5-40.4 11.25l4.09 18.41 36.31-29.66Zm4.242-.891a488382.158 488382.158 0 0 1-36.892 30.138c17.408-4.762 34.814-9.526 52.221-14.289l-15.329-15.849Zm-65.209 12.433 22.453 17.433-3.874-17.433h-18.579Zm68.877-11.525 12.947 13.385 1.653-8.469-14.6-4.916Zm-114.014-3.829 2.736 13.348h53.255l-55.991-13.348Zm43.748-29.36L58.02 109.536c1.806.429 3.612.86 5.418 1.291l56.144 13.385-19.026-42.265Zm1.999-.42c6.352 14.108 12.702 28.219 19.054 42.328l36.748-40.56-55.802-1.769Zm57.437 2.947-35.595 39.287 41.66-11.601-6.065-27.686Zm46.902 17.056-36.41 11.378 24.779 8.344 11.631-19.722Zm-26.398-15.59-11.432 25.311 37.41-11.69-25.978-13.62Zm12.261-32.02L161.656 82.71l5.915 27.004 2.561-5.672 8.896-19.695a.932.932 0 0 1 .146-.49l13.583-29.935ZM10.347 86.798l23.336 22.168h17.852L10.348 86.798ZM98.86 43.355 46.64 52.377l.09.527 9.342 54.426 11.164-16.691c10.54-15.763 21.082-31.523 31.623-47.284Zm-95.45 5.911 13.885 15.772 36.637 41.62-9.296-54.16-41.226-3.232Zm96.575-3.991c-3.573 5.343-7.148 10.686-10.72 16.03C79.19 76.363 69.12 91.42 59.048 106.48l40.935-26.55V45.275ZM2.31 51.044l2.86 30.69.554.298 45.35 24.41c-3.94-4.478-7.883-8.957-11.826-13.435L2.309 51.044Zm202.746 12.2-23.296 20.94c8.726 4.624 17.451 9.248 26.174 13.873-.958-11.605-1.918-23.209-2.878-34.812ZM101.989 43.864v32.296c0 .647.216 2.115.15 3.35l2.654.085 52.34 1.658-7.019-4.758-48.125-32.631Zm94.003 7.765-3.171 7.024c-3.27 7.237-6.538 14.474-9.806 21.71l21.456-19.285-8.479-9.45Zm-8.234-16.647-38.975 2.958.393 1.358.09.314A573614.503 573614.503 0 0 0 160.88 79.87c8.96-14.963 17.92-29.925 26.878-44.889Zm-62.125-4.949L102.925 42.08l8.504 5.765 45.624 30.935-31.42-48.748Zm63.782 6.088-24.403 40.754 3.757-3.479c8.591-7.95 17.181-15.902 25.77-23.854l-5.124-13.42Zm-60.932-5.365.82 1.273L157.7 76.084l-11.049-38.3a104333 104333 0 0 1-18.168-7.03ZM53.955 6.177a18390.53 18390.53 0 0 1-5.615 4.581L3.59 47.275l41.097 3.222 9.268-44.32Zm2.194-.715L46.77 50.33c17.31-2.992 34.622-5.982 51.935-8.973L56.15 5.462Zm4.224.95 40.739 34.361L123.5 28.895l-11.76-4.188L60.373 6.412ZM160.905 8.79l-11.849 27.122 38.182-2.896c-8.777-8.076-17.554-16.151-26.333-24.226Zm-34.261-4.713-2.499 22.86c2.868 1.109 5.736 2.218 8.604 3.329 4.122 1.594 8.245 3.189 12.367 4.785l.014.004c-6.163-10.325-12.324-20.652-18.486-30.978Zm3.378 1.748 17.324 29.028 11.731-26.855c-9.685-.725-19.37-1.45-29.055-2.173Zm-5.162-3.777a750168.819 750168.819 0 0 0-64.082 2.379c20.48 7.265 40.96 14.532 61.439 21.8l2.643-24.18ZM263 131.548V37.53h29.157c9.939 0 17.723 2.516 23.352 7.548 5.717 5.032 8.575 11.873 8.575 20.525 0 4.502-1.011 8.475-3.034 11.918-1.935 3.442-4.53 6.135-7.784 8.077-3.167 1.942-6.553 3.046-10.159 3.31l-.528-1.588c6.509.265 11.346 1.765 14.513 4.502 3.254 2.648 5.145 6.974 5.673 12.977l2.375 26.749h-13.853l-2.111-24.101c-.264-3.089-.924-5.561-1.979-7.415-1.055-1.854-2.683-3.178-4.881-3.973-2.199-.794-5.234-1.191-9.104-1.191h-16.359v36.68H263Zm13.853-49.658h15.04c5.805 0 10.247-1.368 13.325-4.105 3.078-2.736 4.618-6.576 4.618-11.52 0-5.032-1.54-8.872-4.618-11.52-3.078-2.737-7.52-4.106-13.325-4.106h-15.04V81.89ZM357.543 133.137c-6.772 0-12.181-2.295-16.227-6.886-4.046-4.679-6.069-11.211-6.069-19.598V61.1h13.325v42.11c0 6.356 1.099 10.991 3.298 13.904 2.199 2.913 5.541 4.37 10.027 4.37 4.837 0 8.707-1.633 11.61-4.9 2.99-3.266 4.486-7.901 4.486-13.904V61.1h13.325v70.448h-12.534l-.132-18.406 1.847.794c-1.231 6.091-3.826 10.814-7.784 14.169-3.958 3.355-9.015 5.032-15.172 5.032ZM439.025 133.137c-4.925 0-9.235-1.104-12.929-3.311-3.694-2.295-6.553-5.34-8.576-9.137l-.395 10.859h-12.402V37.53h13.325v33.502c1.759-3.09 4.442-5.782 8.048-8.078 3.694-2.295 8.004-3.443 12.929-3.443 5.981 0 11.17 1.501 15.568 4.503 4.398 2.913 7.784 7.15 10.159 12.712 2.375 5.473 3.562 12.006 3.562 19.598 0 7.592-1.187 14.169-3.562 19.731-2.375 5.473-5.761 9.711-10.159 12.712-4.398 2.913-9.587 4.37-15.568 4.37Zm-2.243-12.315c5.541 0 9.851-2.163 12.93-6.489 3.078-4.414 4.617-10.417 4.617-18.009 0-7.68-1.539-13.683-4.617-18.009-3.079-4.326-7.301-6.489-12.666-6.489-3.958 0-7.344.972-10.158 2.914-2.815 1.942-5.014 4.723-6.597 8.342-1.495 3.62-2.243 8.034-2.243 13.242 0 5.032.748 9.402 2.243 13.11 1.583 3.619 3.738 6.444 6.465 8.475 2.814 1.942 6.156 2.913 10.026 2.913ZM480.928 151.411v-11.256h8.575c2.463 0 4.266-.397 5.41-1.192 1.231-.706 2.154-1.898 2.77-3.575l2.375-6.224h-4.486l-25.595-68.063h14.381l19.922 55.483L522.882 61.1h14.249l-27.97 78.393c-1.407 4.238-3.606 7.283-6.596 9.137-2.903 1.854-6.817 2.781-11.742 2.781h-9.895ZM553.263 131.548v-17.48h17.942v17.48h-17.942Zm0-50.187V63.88h17.942v17.48h-17.942ZM591.031 131.548v-17.48h17.943v17.48h-17.943Zm0-50.187V63.88h17.943v17.48h-17.943ZM652.9 131.548V50.639h-27.442V37.53h68.604v13.11H666.62v80.909H652.9ZM702.322 131.548V37.53h13.325V76.46l-1.319-.53c.879-3.796 2.375-6.885 4.486-9.269 2.111-2.472 4.661-4.282 7.652-5.43 2.99-1.147 6.288-1.72 9.895-1.72 4.925 0 9.059 1.147 12.401 3.442 3.43 2.207 5.981 5.341 7.652 9.402 1.759 3.973 2.639 8.563 2.639 13.772v45.42h-13.325V89.571c0-6.356-1.144-10.991-3.431-13.904-2.198-3.002-5.497-4.503-9.894-4.503-5.014 0-9.06 1.59-12.138 4.767-3.079 3.179-4.618 7.857-4.618 14.037v41.58h-13.325ZM797.393 131.548V61.1h13.457v70.448h-13.457Zm-27.442 0v-11.256h64.383v11.256h-64.383Zm1.32-59.192V61.101h38.392v11.255h-38.392Zm25.331-21.452V37h13.72v13.904h-13.72ZM840.351 131.548V61.1h12.27l.396 18.407-1.452-1.06c.88-4.325 2.419-7.856 4.618-10.593 2.287-2.825 5.057-4.9 8.312-6.224 3.254-1.412 6.728-2.119 10.422-2.119 5.277 0 9.631 1.192 13.061 3.576 3.431 2.295 6.025 5.473 7.784 9.534 1.759 3.973 2.639 8.475 2.639 13.507v45.42h-13.325V89.571c0-6.091-1.187-10.682-3.562-13.772-2.287-3.09-5.761-4.635-10.423-4.635-3.254 0-6.201.707-8.839 2.12-2.551 1.412-4.618 3.53-6.201 6.355-1.583 2.737-2.375 6.136-2.375 10.197v41.712h-13.325ZM910.223 131.548V37.53h13.325v59.059L955.212 61.1h17.019l-27.442 29.927 28.629 40.521h-15.699l-21.769-31.516-12.402 13.11v18.406h-13.325ZM1007.01 131.548V61.1h13.46v70.448h-13.46Zm-27.442 0v-11.256h64.382v11.256h-64.382Zm1.319-59.192V61.101h38.393v11.255h-38.393Zm25.333-21.452V37h13.72v13.904h-13.72ZM1049.97 131.548V61.1h12.27l.39 18.407-1.45-1.06c.88-4.325 2.42-7.856 4.62-10.593 2.29-2.825 5.06-4.9 8.31-6.224 3.26-1.412 6.73-2.119 10.42-2.119 5.28 0 9.63 1.192 13.06 3.576 3.43 2.295 6.03 5.473 7.79 9.534 1.76 3.973 2.64 8.475 2.64 13.507v45.42h-13.33V89.571c0-6.091-1.19-10.682-3.56-13.772-2.29-3.09-5.76-4.635-10.42-4.635-3.26 0-6.2.707-8.84 2.12-2.55 1.412-4.62 3.53-6.2 6.355-1.59 2.737-2.38 6.136-2.38 10.197v41.712h-13.32ZM1148.47 153c-5.28 0-9.98-.839-14.12-2.516-4.13-1.677-7.56-4.061-10.29-7.151-2.64-3.089-4.48-6.665-5.54-10.726l13.98-.927c.97 2.914 2.69 5.209 5.15 6.886 2.46 1.678 6.07 2.516 10.82 2.516 5.89 0 10.38-1.236 13.46-3.708 3.16-2.471 4.74-6.135 4.74-10.99v-10.329c-1.67 3.619-4.3 6.488-7.91 8.607-3.52 2.119-7.61 3.178-12.27 3.178-5.81 0-10.99-1.456-15.57-4.37-4.57-2.913-8.13-6.886-10.68-11.917-2.56-5.121-3.83-10.991-3.83-17.612 0-6.886 1.23-12.89 3.69-18.01 2.55-5.12 6.07-9.136 10.56-12.05 4.48-2.913 9.58-4.37 15.3-4.37 4.93 0 9.24 1.148 12.93 3.443 3.78 2.207 6.51 5.253 8.18 9.137l.13-10.99h12.8v64.356c0 5.914-1.28 10.902-3.83 14.963-2.55 4.149-6.2 7.283-10.95 9.402-4.66 2.119-10.24 3.178-16.75 3.178Zm-.13-37.078c5.45 0 9.85-1.942 13.19-5.826 3.34-3.884 5.06-9.314 5.14-16.288.09-4.767-.65-8.784-2.24-12.05-1.49-3.355-3.65-5.915-6.46-7.68-2.73-1.766-5.94-2.649-9.63-2.649-5.72 0-10.16 1.986-13.33 5.96-3.08 3.883-4.62 9.357-4.62 16.42 0 6.973 1.59 12.403 4.75 16.287 3.26 3.884 7.66 5.826 13.2 5.826Z"/></svg>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rubythinking
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robin Stammer
|
@@ -52,7 +52,21 @@ dependencies:
|
|
52
52
|
- !ruby/object:Gem::Version
|
53
53
|
version: '0'
|
54
54
|
- !ruby/object:Gem::Dependency
|
55
|
-
name:
|
55
|
+
name: matrix
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
type: :runtime
|
62
|
+
prerelease: false
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
- !ruby/object:Gem::Dependency
|
69
|
+
name: ostruct
|
56
70
|
requirement: !ruby/object:Gem::Requirement
|
57
71
|
requirements:
|
58
72
|
- - ">="
|
@@ -80,6 +94,7 @@ files:
|
|
80
94
|
- ".ipynb_checkpoints/2M2-checkpoint.ipynb"
|
81
95
|
- ".ipynb_checkpoints/2M3-checkpoint.ipynb"
|
82
96
|
- ".ruby-version"
|
97
|
+
- CHANGELOG.md
|
83
98
|
- CODE_OF_CONDUCT.md
|
84
99
|
- Gemfile
|
85
100
|
- Gemfile.lock
|
@@ -88,9 +103,12 @@ files:
|
|
88
103
|
- Rakefile
|
89
104
|
- bin/console
|
90
105
|
- bin/setup
|
106
|
+
- design_documents/QUAP_DESIGN.md
|
91
107
|
- lib/rubythinking.rb
|
92
108
|
- lib/rubythinking/distributions.rb
|
93
109
|
- lib/rubythinking/distributions/binomial.rb
|
110
|
+
- lib/rubythinking/distributions/normal.rb
|
111
|
+
- lib/rubythinking/quap.rb
|
94
112
|
- lib/rubythinking/version.rb
|
95
113
|
- local_reinstall.sh
|
96
114
|
- r/0_all.R
|
@@ -98,6 +116,7 @@ files:
|
|
98
116
|
- r/2_7.R
|
99
117
|
- r/4.R
|
100
118
|
- rubythinking.gemspec
|
119
|
+
- rubythinking.svg
|
101
120
|
homepage: https://github.com/rstammer/rubythinking
|
102
121
|
licenses:
|
103
122
|
- MIT
|