mcup 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcup/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .data_generator import DataGenerator as DataGenerator
2
+ from .deming import DemingRegressor as DemingRegressor
3
+ from .weighted import WeightedRegressor as WeightedRegressor
4
+ from .xy_weighted import XYWeightedRegressor as XYWeightedRegressor
mcup/_analytical.py ADDED
@@ -0,0 +1,101 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable
4
+
5
+ import numpy as np
6
+ from numdifftools import Jacobian
7
+ from scipy.optimize import minimize
8
+
9
+
10
+ def ols_solve(
11
+ func: Callable,
12
+ X: np.ndarray,
13
+ y: np.ndarray,
14
+ p0: np.ndarray,
15
+ optimizer: str,
16
+ ) -> tuple[np.ndarray, np.ndarray]:
17
+ def cost(params: np.ndarray) -> float:
18
+ r = np.array([y[i] - func(X[i], params) for i in range(len(y))])
19
+ return float(r @ r)
20
+
21
+ result = minimize(cost, p0, method=optimizer)
22
+ params = result.x
23
+ n, p = len(y), len(params)
24
+
25
+ J = Jacobian(lambda q: np.array([func(X[i], q) for i in range(len(X))]))(params)
26
+ residuals = np.array([y[i] - func(X[i], params) for i in range(len(y))])
27
+ sigma2 = float(np.sum(residuals**2)) / max(n - p, 1)
28
+
29
+ try:
30
+ cov = sigma2 * np.linalg.inv(J.T @ J)
31
+ except np.linalg.LinAlgError:
32
+ cov = np.full((len(params), len(params)), np.nan)
33
+
34
+ return params, cov
35
+
36
+
37
+ def analytical_solve(
38
+ func: Callable,
39
+ X: np.ndarray,
40
+ y: np.ndarray,
41
+ weights: np.ndarray,
42
+ p0: np.ndarray,
43
+ optimizer: str,
44
+ ) -> tuple[np.ndarray, np.ndarray]:
45
+ W = np.diag(weights)
46
+
47
+ def cost(params: np.ndarray) -> float:
48
+ r = np.array([y[i] - func(X[i], params) for i in range(len(y))])
49
+ return float(r @ W @ r)
50
+
51
+ result = minimize(cost, p0, method=optimizer)
52
+ params = result.x
53
+
54
+ J = Jacobian(lambda p: np.array([func(X[i], p) for i in range(len(X))]))(params)
55
+ cov = np.linalg.inv(J.T @ W @ J)
56
+ return params, cov
57
+
58
+
59
+ def deming_analytical_solve(
60
+ func: Callable,
61
+ X_obs: np.ndarray,
62
+ y_obs: np.ndarray,
63
+ x_err: np.ndarray,
64
+ y_err: np.ndarray,
65
+ p0: np.ndarray,
66
+ optimizer: str,
67
+ ) -> tuple[np.ndarray, np.ndarray]:
68
+ n_beta = len(p0)
69
+ n = len(y_obs)
70
+ x_var: np.ndarray = x_err**2 # type: ignore[assignment]
71
+ y_var: np.ndarray = y_err**2 # type: ignore[assignment]
72
+ theta0 = np.concatenate([p0, X_obs.ravel()])
73
+
74
+ def cost(theta: np.ndarray) -> float:
75
+ beta = theta[:n_beta]
76
+ eta = theta[n_beta:].reshape(X_obs.shape)
77
+ x_term: float = float(np.sum((X_obs - eta) ** 2 / x_var))
78
+ y_term: float = float(
79
+ np.sum((y_obs - np.array([func(eta[i], beta) for i in range(n)])) ** 2 / y_var)
80
+ )
81
+ return x_term + y_term
82
+
83
+ def residuals(theta: np.ndarray) -> np.ndarray:
84
+ beta = theta[:n_beta]
85
+ eta = theta[n_beta:].reshape(X_obs.shape)
86
+ r_x = (X_obs - eta) / x_err
87
+ r_y = (y_obs - np.array([func(eta[i], beta) for i in range(n)])) / y_err
88
+ return np.concatenate([r_x.ravel(), r_y.ravel()]) # type: ignore[no-any-return]
89
+
90
+ result = minimize(cost, theta0, method=optimizer)
91
+ theta = result.x
92
+ beta = theta[:n_beta]
93
+
94
+ J = Jacobian(residuals)(theta)
95
+ try:
96
+ full_cov = np.linalg.inv(J.T @ J)
97
+ except np.linalg.LinAlgError:
98
+ full_cov = np.full((len(theta0), len(theta0)), np.nan)
99
+
100
+ cov = full_cov[:n_beta, :n_beta]
101
+ return beta, cov
mcup/_mc.py ADDED
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable, Optional
4
+
5
+ import numpy as np
6
+ from scipy.optimize import minimize
7
+
8
+ from ._utils import welford_finalize, welford_update
9
+
10
+
11
+ def mc_solve(
12
+ cost_fn_builder: Callable,
13
+ X: np.ndarray,
14
+ y: np.ndarray,
15
+ x_err: Optional[np.ndarray],
16
+ y_err: np.ndarray,
17
+ p0: np.ndarray,
18
+ n_iter: int,
19
+ rtol: Optional[float],
20
+ atol: Optional[float],
21
+ optimizer: str,
22
+ extract_params: Optional[Callable] = None,
23
+ p0_fn: Optional[Callable] = None,
24
+ ) -> tuple[np.ndarray, np.ndarray, int]:
25
+ def _default_extract(theta: np.ndarray) -> np.ndarray:
26
+ return theta
27
+
28
+ def _default_p0_fn(x_s: np.ndarray, y_s: np.ndarray) -> np.ndarray:
29
+ return p0
30
+
31
+ _extract: Callable = extract_params if extract_params is not None else _default_extract
32
+ _p0_fn: Callable = p0_fn if p0_fn is not None else _default_p0_fn
33
+
34
+ n_tracked = len(_extract(p0))
35
+ n, mean, cov_agg = 0, np.zeros(n_tracked), np.zeros((n_tracked, n_tracked))
36
+ current_est = _extract(p0).copy()
37
+
38
+ def _step(x_s: np.ndarray, y_s: np.ndarray) -> None:
39
+ nonlocal n, mean, cov_agg, current_est
40
+ cost = cost_fn_builder(x_s, y_s, current_est)
41
+ result = minimize(cost, _p0_fn(x_s, y_s), method=optimizer)
42
+ if result.success:
43
+ tracked = _extract(result.x)
44
+ n, mean, cov_agg = welford_update(n, mean, cov_agg, tracked)
45
+ current_est = mean.copy()
46
+
47
+ x_noise_shape = X.shape
48
+ y_noise_shape = y.shape
49
+
50
+ if rtol is not None and atol is not None:
51
+ mean_prev = np.full(n_tracked, np.inf)
52
+ std_prev = np.full(n_tracked, np.inf)
53
+ max_iter = n_iter if n_iter is not None else 100_000
54
+ for _ in range(max_iter):
55
+ if x_err is not None:
56
+ x_s = X + np.random.normal(0, 1, x_noise_shape) * x_err
57
+ else:
58
+ x_s = X.copy()
59
+ y_s = y + np.random.normal(0, 1, y_noise_shape) * y_err
60
+ _step(x_s, y_s)
61
+ if n > 1:
62
+ std = np.sqrt(np.diag(welford_finalize(n, cov_agg)))
63
+ if np.allclose(mean, mean_prev, rtol=rtol, atol=atol) and np.allclose(
64
+ std, std_prev, rtol=rtol, atol=atol
65
+ ):
66
+ break
67
+ mean_prev, std_prev = mean.copy(), std.copy()
68
+ else:
69
+ for _ in range(n_iter):
70
+ if x_err is not None:
71
+ x_s = X + np.random.normal(0, 1, x_noise_shape) * x_err
72
+ else:
73
+ x_s = X.copy()
74
+ y_s = y + np.random.normal(0, 1, y_noise_shape) * y_err
75
+ _step(x_s, y_s)
76
+
77
+ return mean, welford_finalize(n, cov_agg), n
mcup/_utils.py ADDED
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ from typing import Generator, Optional
5
+
6
+ import numpy as np
7
+
8
+
9
+ @contextlib.contextmanager
10
+ def local_numpy_seed(seed: Optional[int]) -> Generator[None, None, None]:
11
+ if seed is not None:
12
+ state = np.random.get_state()
13
+ np.random.seed(seed)
14
+ try:
15
+ yield
16
+ finally:
17
+ if seed is not None:
18
+ np.random.set_state(state)
19
+
20
+
21
+ def welford_update(
22
+ n: int,
23
+ mean: np.ndarray,
24
+ cov_agg: np.ndarray,
25
+ x: np.ndarray,
26
+ ) -> tuple[int, np.ndarray, np.ndarray]:
27
+ n = n + 1
28
+ delta = x - mean
29
+ mean = mean + delta / n
30
+ cov_agg = cov_agg + np.outer(delta, x - mean)
31
+ return n, mean, cov_agg
32
+
33
+
34
+ def welford_finalize(n: int, cov_agg: np.ndarray) -> np.ndarray:
35
+ if n < 2:
36
+ return np.full_like(cov_agg, np.nan) # type: ignore[no-any-return]
37
+ return cov_agg / (n - 1) # type: ignore[no-any-return]
mcup/base.py ADDED
@@ -0,0 +1,69 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Callable, Dict, Optional, Tuple, Union
4
+
5
+ import numpy as np
6
+
7
+
8
+ class BaseRegressor:
9
+ def __init__(
10
+ self,
11
+ func: Callable,
12
+ method: str = "mc",
13
+ n_iter: int = 10_000,
14
+ rtol: Optional[float] = None,
15
+ atol: Optional[float] = None,
16
+ optimizer: str = "Nelder-Mead",
17
+ ) -> None:
18
+ self.func = func
19
+ self.method = method
20
+ self.n_iter = n_iter
21
+ self.rtol = rtol
22
+ self.atol = atol
23
+ self.optimizer = optimizer
24
+
25
+ def get_params(self, deep: bool = True) -> Dict[str, Any]:
26
+ return {
27
+ "func": self.func,
28
+ "method": self.method,
29
+ "n_iter": self.n_iter,
30
+ "rtol": self.rtol,
31
+ "atol": self.atol,
32
+ "optimizer": self.optimizer,
33
+ }
34
+
35
+ def set_params(self, **params: Any) -> BaseRegressor:
36
+ for k, v in params.items():
37
+ setattr(self, k, v)
38
+ return self
39
+
40
+ def _check_is_fitted(self) -> None:
41
+ if not hasattr(self, "params_"):
42
+ raise ValueError("Estimator is not fitted. Call fit() first.")
43
+
44
+ def _validate_inputs(
45
+ self,
46
+ X: np.ndarray,
47
+ y: np.ndarray,
48
+ y_err: np.ndarray,
49
+ x_err: Optional[np.ndarray] = None,
50
+ ) -> Union[
51
+ Tuple[np.ndarray, np.ndarray, np.ndarray],
52
+ Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray],
53
+ ]:
54
+ X = np.asarray(X, dtype=float)
55
+ y = np.asarray(y, dtype=float)
56
+ y_err = np.asarray(y_err, dtype=float)
57
+ if X.shape[0] != y.shape[0]:
58
+ raise ValueError("X and y must have the same number of samples.")
59
+ if y_err.shape != y.shape:
60
+ raise ValueError("y_err must have the same shape as y.")
61
+ if x_err is not None:
62
+ x_err = np.asarray(x_err, dtype=float)
63
+ if x_err.shape != X.shape:
64
+ raise ValueError("x_err must have the same shape as X.")
65
+ return X, y, y_err, x_err
66
+ return X, y, y_err
67
+
68
+ def fit(self, X: np.ndarray, y: np.ndarray, **kwargs: Any) -> "BaseRegressor":
69
+ raise NotImplementedError
mcup/data_generator.py ADDED
@@ -0,0 +1,131 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Callable, Optional, Union
4
+
5
+ import numpy as np
6
+
7
+ from ._utils import local_numpy_seed
8
+
9
+
10
+ class DataGenerator:
11
+ """Generates synthetic x/y data with optional noise for testing estimators.
12
+
13
+ Parameters:
14
+ fun: Function for generating y data.
15
+ data_len: Length of the data.
16
+ boundaries: 1D ``[a, b]`` or 2D ``[[a_1, b_1], ...]`` array defining x intervals.
17
+ seed: Optional seed for the noise generator.
18
+ dtype: NumPy dtype for generated arrays. Default ``np.float64``.
19
+ params: Optional parameter array passed as second argument to ``fun``.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ fun: Callable,
25
+ data_len: int,
26
+ boundaries: Union[list, np.ndarray],
27
+ seed: Optional[int] = None,
28
+ dtype: Any = np.float64,
29
+ params: Optional[np.ndarray] = None,
30
+ ) -> None:
31
+ if not callable(fun):
32
+ raise TypeError("Argument fun has to be callable.")
33
+
34
+ if not isinstance(data_len, int):
35
+ raise TypeError("Argument data_len has to be integer.")
36
+
37
+ if not isinstance(boundaries, (list, np.ndarray)):
38
+ raise TypeError("Argument data_len has to be list or np.ndarray")
39
+
40
+ b: np.ndarray = np.array(boundaries) if isinstance(boundaries, list) else boundaries
41
+
42
+ if b.ndim != 2 and b.ndim != 1:
43
+ raise TypeError(
44
+ "Argument boundaries has to have exactly dimensionality of two or one."
45
+ )
46
+
47
+ if b.ndim == 2 and b.shape[0] == 1:
48
+ b = b[0]
49
+
50
+ if b.ndim == 2:
51
+ self.x_dim = b.shape[0]
52
+ if b.shape[1] != 2:
53
+ raise TypeError(
54
+ "Argument boundaries has to have defined all intervals "
55
+ "with exactly two numbers."
56
+ )
57
+
58
+ for dim_i in range(self.x_dim):
59
+ if b[dim_i][0] >= b[dim_i][1]:
60
+ raise TypeError("Invalid interval in argument boundaries.")
61
+
62
+ self.x = np.linspace(
63
+ b[:, 0],
64
+ b[:, 1],
65
+ data_len,
66
+ dtype=dtype,
67
+ endpoint=True,
68
+ )
69
+
70
+ elif b.ndim == 1:
71
+ if b.shape[0] != 2:
72
+ raise TypeError(
73
+ "Argument boundaries has to have interval with exactly two numbers."
74
+ )
75
+
76
+ self.x_dim = 1
77
+ if b[0] > b[1]:
78
+ raise TypeError("Invalid interval in argument boundaries.")
79
+
80
+ self.x = np.linspace(
81
+ b[0],
82
+ b[1],
83
+ data_len,
84
+ dtype=dtype,
85
+ endpoint=True,
86
+ )
87
+
88
+ self.data_len = data_len
89
+ self.seed = seed
90
+ self.y = np.zeros((data_len), dtype=dtype)
91
+ for i in range(self.data_len):
92
+ if params is None:
93
+ self.y[i] = fun(self.x[i])
94
+ else:
95
+ self.y[i] = fun(self.x[i], params)
96
+
97
+ def __add_noise(
98
+ self,
99
+ data: np.ndarray,
100
+ const_err: Optional[float] = None,
101
+ stat_error: Optional[float] = None,
102
+ ) -> np.ndarray:
103
+ assert const_err is not None or stat_error is not None
104
+
105
+ if stat_error is None:
106
+ assert const_err is not None
107
+ data_ret: np.ndarray = data + np.random.normal(loc=0.0, scale=const_err)
108
+ elif const_err is None:
109
+ data_ret = np.multiply(data, np.random.normal(loc=1.0, scale=stat_error))
110
+ else:
111
+ data_ret = np.multiply(
112
+ data, np.random.normal(loc=1.0, scale=stat_error)
113
+ ) + np.random.normal(loc=0.0, scale=const_err)
114
+
115
+ return data_ret
116
+
117
+ def add_noise_x(
118
+ self,
119
+ const_err: Optional[float] = None,
120
+ stat_error: Optional[float] = None,
121
+ ) -> np.ndarray:
122
+ with local_numpy_seed(self.seed):
123
+ return self.__add_noise(self.x, const_err=const_err, stat_error=stat_error)
124
+
125
+ def add_noise_y(
126
+ self,
127
+ const_err: Optional[float] = None,
128
+ stat_error: Optional[float] = None,
129
+ ) -> np.ndarray:
130
+ with local_numpy_seed(self.seed):
131
+ return self.__add_noise(self.y, const_err=const_err, stat_error=stat_error)
mcup/deming.py ADDED
@@ -0,0 +1,127 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable, Optional
4
+
5
+ import numpy as np
6
+
7
+ from ._analytical import deming_analytical_solve
8
+ from ._mc import mc_solve
9
+ from .base import BaseRegressor
10
+
11
+
12
+ class DemingRegressor(BaseRegressor):
13
+ """Regression estimator using Deming (total least squares) joint optimisation.
14
+
15
+ Optimises jointly over model parameters and latent true x values, giving an
16
+ exact treatment of both x and y measurement errors. Slower than
17
+ ``XYWeightedRegressor`` but more accurate when x errors are large or the
18
+ model is strongly nonlinear.
19
+
20
+ Supports two solvers selected via the ``method`` argument:
21
+
22
+ - ``"analytical"`` — joint optimisation with ``(J^T W J)^{-1}`` covariance.
23
+ - ``"mc"`` — Monte Carlo sampling with Welford online covariance
24
+ (default, robust for nonlinear models).
25
+
26
+ Parameters:
27
+ func: Model function with signature ``func(x, params) -> float``.
28
+ method: Solver to use, either ``"analytical"`` or ``"mc"``. Default ``"mc"``.
29
+ n_iter: Maximum number of Monte Carlo iterations. Default ``10_000``.
30
+ rtol: Relative tolerance for MC convergence stopping. Default ``None`` (disabled).
31
+ atol: Absolute tolerance for MC convergence stopping. Default ``None`` (disabled).
32
+ optimizer: SciPy optimizer name used for parameter fitting. Default ``"BFGS"``.
33
+
34
+ Attributes:
35
+ params_: Fitted parameter array.
36
+ params_std_: Standard deviations of fitted parameters.
37
+ covariance_: Full parameter covariance matrix.
38
+ n_iter_: Actual number of MC iterations run (MC method only).
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ func: Callable,
44
+ method: str = "mc",
45
+ n_iter: int = 10_000,
46
+ rtol: Optional[float] = None,
47
+ atol: Optional[float] = None,
48
+ optimizer: str = "BFGS",
49
+ ) -> None:
50
+ super().__init__(
51
+ func,
52
+ method=method,
53
+ n_iter=n_iter,
54
+ rtol=rtol,
55
+ atol=atol,
56
+ optimizer=optimizer,
57
+ )
58
+
59
+ def fit( # type: ignore[override]
60
+ self,
61
+ X: np.ndarray,
62
+ y: np.ndarray,
63
+ x_err: np.ndarray,
64
+ y_err: np.ndarray,
65
+ p0: np.ndarray,
66
+ ) -> "DemingRegressor":
67
+ X, y, y_err, x_err = self._validate_inputs(X, y, y_err, x_err) # type: ignore[misc]
68
+ p0 = np.asarray(p0, dtype=float)
69
+ n_beta = len(p0)
70
+ n_data = len(y)
71
+
72
+ if self.method == "analytical":
73
+ params, cov = deming_analytical_solve(
74
+ self.func, X, y, x_err, y_err, p0, self.optimizer
75
+ )
76
+ self.params_ = params
77
+ self.covariance_ = cov
78
+ self.params_std_ = np.sqrt(np.diag(cov))
79
+ else:
80
+ x_var: np.ndarray = x_err**2 # type: ignore[assignment]
81
+ y_var: np.ndarray = y_err**2 # type: ignore[assignment]
82
+
83
+ def cost_fn_builder(
84
+ x_s: np.ndarray, y_s: np.ndarray, params_est: np.ndarray
85
+ ) -> object:
86
+ def cost(theta: np.ndarray) -> float:
87
+ beta = theta[:n_beta]
88
+ eta = theta[n_beta:].reshape(X.shape)
89
+ x_term: float = float(np.sum((x_s - eta) ** 2 / x_var))
90
+ y_term: float = float(
91
+ np.sum(
92
+ (y_s - np.array([self.func(eta[i], beta) for i in range(n_data)])) ** 2
93
+ / y_var
94
+ )
95
+ )
96
+ return x_term + y_term
97
+
98
+ return cost
99
+
100
+ def extract_params(theta: np.ndarray) -> np.ndarray:
101
+ return theta[:n_beta]
102
+
103
+ def p0_fn(x_s: np.ndarray, y_s: np.ndarray) -> np.ndarray:
104
+ return np.concatenate([p0, x_s.ravel()]) # type: ignore[no-any-return]
105
+
106
+ theta0 = np.concatenate([p0, X.ravel()])
107
+
108
+ mean, cov, n = mc_solve(
109
+ cost_fn_builder,
110
+ X,
111
+ y,
112
+ x_err,
113
+ y_err,
114
+ theta0,
115
+ self.n_iter,
116
+ self.rtol,
117
+ self.atol,
118
+ self.optimizer,
119
+ extract_params=extract_params,
120
+ p0_fn=p0_fn,
121
+ )
122
+ self.params_ = mean
123
+ self.covariance_ = cov
124
+ self.params_std_ = np.sqrt(np.diag(cov))
125
+ self.n_iter_ = n
126
+
127
+ return self
mcup/weighted.py ADDED
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+
5
+ from ._analytical import analytical_solve
6
+ from ._mc import mc_solve
7
+ from .base import BaseRegressor
8
+
9
+
10
+ class WeightedRegressor(BaseRegressor):
11
+ """Regression estimator for data where only y has measurement errors.
12
+
13
+ Minimises the weighted chi-squared objective ``Σ (y - f(x))² / σ_y²``.
14
+ Supports two solvers selected via the ``method`` argument:
15
+
16
+ - ``"analytical"`` — weighted least squares with ``(J^T W J)^{-1}`` covariance (fast).
17
+ - ``"mc"`` — Monte Carlo sampling with Welford online covariance (robust for nonlinear models).
18
+
19
+ Parameters:
20
+ func: Model function with signature ``func(x, params) -> float``.
21
+ method: Solver to use, either ``"analytical"`` or ``"mc"``. Default ``"mc"``.
22
+ n_iter: Maximum number of Monte Carlo iterations. Default ``10_000``.
23
+ rtol: Relative tolerance for MC convergence stopping. Default ``None`` (disabled).
24
+ atol: Absolute tolerance for MC convergence stopping. Default ``None`` (disabled).
25
+ optimizer: SciPy optimizer name used for parameter fitting. Default ``"Nelder-Mead"``.
26
+
27
+ Attributes:
28
+ params_: Fitted parameter array.
29
+ params_std_: Standard deviations of fitted parameters.
30
+ covariance_: Full parameter covariance matrix.
31
+ n_iter_: Actual number of MC iterations run (MC method only).
32
+ """
33
+
34
+ def fit( # type: ignore[override]
35
+ self,
36
+ X: np.ndarray,
37
+ y: np.ndarray,
38
+ y_err: np.ndarray,
39
+ p0: np.ndarray,
40
+ ) -> "WeightedRegressor":
41
+ X, y, y_err = self._validate_inputs(X, y, y_err) # type: ignore[misc]
42
+ p0 = np.asarray(p0, dtype=float)
43
+ self._y_err_fit_ = y_err
44
+
45
+ weights = 1.0 / (y_err**2)
46
+
47
+ if self.method == "analytical":
48
+ params, cov = analytical_solve(self.func, X, y, weights, p0, self.optimizer)
49
+ self.params_ = params
50
+ self.covariance_ = cov
51
+ self.params_std_ = np.sqrt(np.diag(cov))
52
+ else:
53
+
54
+ def cost_fn_builder(
55
+ x_s: np.ndarray, y_s: np.ndarray, params_est: np.ndarray
56
+ ) -> object:
57
+ w = 1.0 / (y_err**2)
58
+
59
+ def cost(params: np.ndarray) -> float:
60
+ r = np.array([y_s[i] - self.func(x_s[i], params) for i in range(len(y_s))])
61
+ return float(np.dot(r**2, w))
62
+
63
+ return cost
64
+
65
+ mean, cov, n = mc_solve(
66
+ cost_fn_builder,
67
+ X,
68
+ y,
69
+ None,
70
+ y_err,
71
+ p0,
72
+ self.n_iter,
73
+ self.rtol,
74
+ self.atol,
75
+ self.optimizer,
76
+ )
77
+ self.params_ = mean
78
+ self.covariance_ = cov
79
+ self.params_std_ = np.sqrt(np.diag(cov))
80
+ self.n_iter_ = n
81
+
82
+ return self
mcup/xy_weighted.py ADDED
@@ -0,0 +1,106 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable
4
+
5
+ import numpy as np
6
+ from numdifftools import Gradient
7
+
8
+ from ._analytical import analytical_solve
9
+ from ._mc import mc_solve
10
+ from .base import BaseRegressor
11
+
12
+
13
+ def _combined_weights(
14
+ func: Callable,
15
+ X: np.ndarray,
16
+ params: np.ndarray,
17
+ x_err: np.ndarray,
18
+ y_err: np.ndarray,
19
+ ) -> np.ndarray:
20
+ var: np.ndarray = y_err**2 # type: ignore[assignment]
21
+ for i in range(len(X)):
22
+ xi = np.atleast_1d(X[i])
23
+ df_dx = Gradient(lambda x: func(x, params))(xi)
24
+ xe = np.atleast_1d(x_err[i])
25
+ var[i] += float(np.dot(df_dx.ravel() ** 2, xe.ravel() ** 2))
26
+ return 1.0 / var # type: ignore[no-any-return]
27
+
28
+
29
+ class XYWeightedRegressor(BaseRegressor):
30
+ """Regression estimator for data where both x and y have measurement errors.
31
+
32
+ Uses iteratively reweighted least squares (IRLS) to combine x and y variances
33
+ via error propagation: ``σ_combined² = σ_y² + (∂f/∂x)² σ_x²``. Faster than
34
+ ``DemingRegressor`` and well-suited to mildly nonlinear models.
35
+
36
+ Supports two solvers selected via the ``method`` argument:
37
+
38
+ - ``"analytical"`` — IRLS with ``(J^T W J)^{-1}`` covariance (fast).
39
+ - ``"mc"`` — Monte Carlo sampling with Welford online covariance (robust for nonlinear models).
40
+
41
+ Parameters:
42
+ func: Model function with signature ``func(x, params) -> float``.
43
+ method: Solver to use, either ``"analytical"`` or ``"mc"``. Default ``"mc"``.
44
+ n_iter: Maximum number of Monte Carlo iterations. Default ``10_000``.
45
+ rtol: Relative tolerance for MC convergence stopping. Default ``None`` (disabled).
46
+ atol: Absolute tolerance for MC convergence stopping. Default ``None`` (disabled).
47
+ optimizer: SciPy optimizer name used for parameter fitting. Default ``"Nelder-Mead"``.
48
+
49
+ Attributes:
50
+ params_: Fitted parameter array.
51
+ params_std_: Standard deviations of fitted parameters.
52
+ covariance_: Full parameter covariance matrix.
53
+ n_iter_: Actual number of MC iterations run (MC method only).
54
+ """
55
+
56
+ def fit( # type: ignore[override]
57
+ self,
58
+ X: np.ndarray,
59
+ y: np.ndarray,
60
+ x_err: np.ndarray,
61
+ y_err: np.ndarray,
62
+ p0: np.ndarray,
63
+ n_irls: int = 10,
64
+ ) -> "XYWeightedRegressor":
65
+ X, y, y_err, x_err = self._validate_inputs(X, y, y_err, x_err) # type: ignore[misc]
66
+ p0 = np.asarray(p0, dtype=float)
67
+
68
+ if self.method == "analytical":
69
+ params = p0.copy()
70
+ for _ in range(n_irls):
71
+ weights = _combined_weights(self.func, X, params, x_err, y_err)
72
+ params, cov = analytical_solve(self.func, X, y, weights, params, self.optimizer)
73
+ self.params_ = params
74
+ self.covariance_ = cov
75
+ self.params_std_ = np.sqrt(np.diag(cov))
76
+ else:
77
+
78
+ def cost_fn_builder(
79
+ x_s: np.ndarray, y_s: np.ndarray, params_est: np.ndarray
80
+ ) -> object:
81
+ weights = _combined_weights(self.func, x_s, params_est, x_err, y_err)
82
+
83
+ def cost(params: np.ndarray) -> float:
84
+ r = np.array([y_s[i] - self.func(x_s[i], params) for i in range(len(y_s))])
85
+ return float(np.dot(r**2, weights))
86
+
87
+ return cost
88
+
89
+ mean, cov, n = mc_solve(
90
+ cost_fn_builder,
91
+ X,
92
+ y,
93
+ x_err,
94
+ y_err,
95
+ p0,
96
+ self.n_iter,
97
+ self.rtol,
98
+ self.atol,
99
+ self.optimizer,
100
+ )
101
+ self.params_ = mean
102
+ self.covariance_ = cov
103
+ self.params_std_ = np.sqrt(np.diag(cov))
104
+ self.n_iter_ = n
105
+
106
+ return self
@@ -0,0 +1,185 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcup
3
+ Version: 1.0.0
4
+ Summary: Monte Carlo Uncertainty Propagation for regression with measurement errors
5
+ Author-email: Daniel Herman <daniel.herman@protonmail.com>
6
+ License: MIT
7
+ Project-URL: Repository, https://github.com/detrin/MCUP
8
+ Keywords: physics,statistics,error,uncertainty,propagation,regression
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: numpy>=1.21.0
19
+ Requires-Dist: scipy>=1.8.0
20
+ Requires-Dist: numdifftools>=0.9.39
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest>=7.0; extra == "dev"
23
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
24
+ Requires-Dist: ruff>=0.4.0; extra == "dev"
25
+ Requires-Dist: mypy>=1.8; extra == "dev"
26
+ Requires-Dist: twine>=4.0; extra == "dev"
27
+ Requires-Dist: build>=0.10; extra == "dev"
28
+ Provides-Extra: docs
29
+ Requires-Dist: mkdocs>=1.4; extra == "docs"
30
+ Requires-Dist: mkdocs-material>=9.0; extra == "docs"
31
+ Requires-Dist: mkdocstrings[python]>=0.20; extra == "docs"
32
+ Dynamic: license-file
33
+
34
+ # MCUP
35
+
36
+ MCUP (Monte Carlo Uncertainty Propagation) is a Python library for regression with measurement errors. It provides three sklearn-like estimators that correctly propagate x and y measurement uncertainties into parameter confidence intervals.
37
+
38
+ [![PyPI pyversions](https://img.shields.io/pypi/pyversions/mcup.svg)](https://pypi.org/project/mcup/) [![PyPI version](https://img.shields.io/pypi/v/mcup.svg)](https://pypi.org/project/mcup/) [![CI](https://github.com/detrin/MCUP/actions/workflows/package-main.yml/badge.svg)](https://github.com/detrin/MCUP/actions/workflows/package-main.yml) [![codecov](https://codecov.io/gh/detrin/MCUP/branch/master/graph/badge.svg?token=Dx6elQkztR)](https://codecov.io/gh/detrin/MCUP)
39
+
40
+ ## Install
41
+
42
+ ```bash
43
+ uv add mcup
44
+ ```
45
+
46
+ Or with pip:
47
+
48
+ ```bash
49
+ pip install mcup
50
+ ```
51
+
52
+ ## Quick start
53
+
54
+ The core idea: you have data where measurement noise is not uniform, or x itself is measured. MCUP gives you honest parameter uncertainties in both cases.
55
+
56
+ **Case 1 — only y has errors (heteroscedastic noise)**
57
+
58
+ A photodetector where noise grows with signal: points at high intensity are less reliable. OLS doesn't know that and gives overconfident slope uncertainty. `WeightedRegressor` down-weights noisy points and produces calibrated intervals.
59
+
60
+ ```python
61
+ import numpy as np
62
+ from mcup import WeightedRegressor
63
+
64
+ rng = np.random.default_rng(42)
65
+ x = np.linspace(1, 10, 30)
66
+ y_err = 0.1 * x # noise grows with x
67
+ y = 2.0 * x + 1.0 + rng.normal(0, y_err)
68
+
69
+ def line(x, p):
70
+ return p[0] + p[1] * x
71
+
72
+ # Uniform weights (wrong — ignores that high-x points are noisier)
73
+ ols = WeightedRegressor(line, method="analytical")
74
+ ols.fit(x, y, y_err=np.ones_like(x), p0=[0.0, 1.0])
75
+
76
+ # Correct weights from measurement errors
77
+ wls = WeightedRegressor(line, method="analytical")
78
+ wls.fit(x, y, y_err=y_err, p0=[0.0, 1.0])
79
+
80
+ print(f"OLS: slope = {ols.params_[1]:.3f} ± {ols.params_std_[1]:.4f} ← overconfident")
81
+ print(f"Weighted: slope = {wls.params_[1]:.3f} ± {wls.params_std_[1]:.4f} ← calibrated")
82
+ # true slope = 2.0
83
+ ```
84
+
85
+ **Case 2 — both x and y have errors**
86
+
87
+ A spring balance where both extension (x) and force (y) are measured with error. Ignoring x-errors causes attenuation bias (slope pulled toward zero) and intervals that are far too narrow. `XYWeightedRegressor` propagates both error sources.
88
+
89
+ ```python
90
+ from mcup import XYWeightedRegressor
91
+
92
+ rng = np.random.default_rng(0)
93
+ x_true = np.linspace(0.1, 2.0, 25)
94
+ x_err, y_err = 0.05 * np.ones(25), 0.15 * np.ones(25)
95
+ x_obs = x_true + rng.normal(0, x_err)
96
+ y = 8.0 * x_true + rng.normal(0, y_err) # true spring constant k=8
97
+
98
+ # Ignoring x-errors (wrong)
99
+ bad = WeightedRegressor(line, method="analytical")
100
+ bad.fit(x_obs, y, y_err=y_err, p0=[0.0, 1.0])
101
+
102
+ # Propagating both errors (correct)
103
+ est = XYWeightedRegressor(line, method="analytical")
104
+ est.fit(x_obs, y, x_err=x_err, y_err=y_err, p0=[0.0, 1.0])
105
+
106
+ print(f"Ignoring x-err: k = {bad.params_[1]:.3f} ± {bad.params_std_[1]:.3f} ← biased low, too narrow")
107
+ print(f"XYWeighted: k = {est.params_[1]:.3f} ± {est.params_std_[1]:.3f} ← unbiased, calibrated")
108
+ # true k = 8.0
109
+ ```
110
+
111
+ ## Why MCUP
112
+
113
+ Standard least squares (OLS) assumes all observations are equally reliable. Real experiments break this in two common ways:
114
+
115
+ - **Heteroscedastic y-errors** — measurement noise varies across the range. OLS overweights noisy points, biasing the fit and producing overconfident uncertainties.
116
+ - **Errors in x** — when the independent variable is itself measured (time, concentration, displacement), ignoring those errors causes attenuation bias: slopes are pulled toward zero, and uncertainty intervals shrink below their true size.
117
+
118
+ **Why not just use the covariance matrix from the optimizer?**
119
+
120
+ When measurement errors are large, the standard approach of reading off `sqrt(diag(cov))` from the fit residuals underestimates the true parameter uncertainty. The covariance matrix tells you how well the optimizer converged — it does not propagate the uncertainty that came *in* with your data. MCUP propagates measurement noise directly through the model so that `params_std_` reflects both fit quality and input uncertainty. For a worked example comparing both approaches, see this [Kaggle notebook on measurement error in regression](https://www.kaggle.com/code/jetakow/measurement-error-in-regression).
121
+
122
+ MCUP fixes both problems. The figure below illustrates the effect for a linear calibration with heteroscedastic y-errors:
123
+
124
+ ![Comparison of OLS vs WeightedRegressor](docs/assets/comparison_linear.png)
125
+
126
+ *Left: OLS (red) fits the same data differently from weighted regression (blue) because it treats all points equally regardless of σ_y. Right: over 500 simulated experiments, OLS coverage deviates from the nominal 68.3% — WeightedRegressor stays calibrated.*
127
+
128
+ ## Estimators
129
+
130
+ | Estimator | Use when | Error model |
131
+ |-----------|----------|-------------|
132
+ | `WeightedRegressor` | Only y has measurement errors | `Σ (y − f(x))² / σ_y²` |
133
+ | `XYWeightedRegressor` | Both x and y have errors (nonlinear) | Combined variance via error propagation (IRLS) |
134
+ | `DemingRegressor` | Both x and y have errors (linear only) | Joint optimisation over parameters + latent true x |
135
+
136
+ Each estimator supports:
137
+ - `method="analytical"` — weighted LS + `(J^T W J)^{-1}` covariance (fast, exact for well-posed problems)
138
+ - `method="mc"` — Monte Carlo with Welford covariance (robust cross-check for nonlinear models)
139
+
140
+ ## Benchmark summary
141
+
142
+ Validated across 13 physical scenarios (200 independent parameter configurations each). The analytical solver achieves well-calibrated 1σ uncertainty intervals on all scenarios:
143
+
144
+ | Scenario | Estimator | Bias | RMSE | Coverage |
145
+ |----------|-----------|------|------|----------|
146
+ | Linear calibration (homo) | WeightedRegressor | +0.3% | 12.8% | ✓ 68% |
147
+ | Linear calibration (hetero) | WeightedRegressor | +0.5% | 7.2% | ✓ 71% |
148
+ | Radioactive decay | WeightedRegressor | −0.0% | 2.6% | ✓ 64% |
149
+ | Power law (diffusion) | WeightedRegressor | +0.0% | 4.6% | ✓ 68% |
150
+ | Gaussian spectral peak | WeightedRegressor | −0.1% | 1.7% | ✓ 66% |
151
+ | Damped oscillator | WeightedRegressor | −0.4% | 7.2% | ✓ 67% |
152
+ | Exp decay + timing errors | **XYWeightedRegressor** | −1.2% | 5.0% | ✓ 64% |
153
+ | Hooke's law (x+y errors) | **XYWeightedRegressor** | −1.0% | 54% | ✓ 75% |
154
+ | Beer-Lambert (x+y errors) | **XYWeightedRegressor** | +46% | 220% | ✓ 68% |
155
+ | Method comparison | **DemingRegressor** | +14% | 111% | ✓ 64% |
156
+ | Isotope ratio MS | **DemingRegressor** | +3.2% | 420% | ✓ 72% |
157
+ | Small sample (n=8) | WeightedRegressor | −2.7% | 29% | ✓ 69% |
158
+ | Low SNR | WeightedRegressor | −1.9% | 136% | ✓ 67% |
159
+
160
+ Bias and RMSE are relative to the true parameter values. Large RMSE on near-zero intercepts (Beer-Lambert baseline, isotope intercept) reflects small absolute values — the coverage column is the reliable calibration metric.
161
+
162
+ ### OLS baseline: when ignoring measurement errors breaks uncertainty estimation
163
+
164
+ Plain OLS (no error weighting) estimates parameter uncertainty from fit residuals alone — `σ² = SSR/(n−p)`. This works when noise is truly uniform. When noise varies across the range, OLS produces miscalibrated intervals even though the parameter estimates themselves may look reasonable.
165
+
166
+ | Scenario | OLS coverage | WeightedRegressor coverage | What goes wrong |
167
+ |----------|:------------:|:--------------------------:|-----------------|
168
+ | S1 Linear (homo σ_y=0.5) | ✓ 68%/70% | ✓ 68%/70% | — OLS works; noise is uniform |
169
+ | S2 Linear (hetero σ_y=0.1+0.1·x) | ~ 86%/72% | ✓ 71%/72% | Intervals too wide; pooled σ² inflated by noisy high-x points |
170
+ | S3 Radioactive decay (Poisson √A) | ✗ 32%/42% | ✓ 64%/68% | Badly overconfident; large early-time counts dominate residuals |
171
+ | S4 Power law (8% relative noise) | ✓ 66%/66% | ✓ 68%/69% | — OLS approximately ok here |
172
+ | S5 Gaussian peak (Poisson counts) | ✗ 39%/54% | ✓ 66%/70% | Overconfident; amplitude and center poorly constrained |
173
+ | S6 Damped oscillator (uniform σ_y) | ✓ 64%/71% | ✓ 67%/72% | — OLS works; noise is uniform |
174
+
175
+ **The pattern:** OLS coverage is correct only when σ_y is constant across the range (S1, S6). As soon as noise scales with signal — Poisson counting (S3, S5) or percentage-of-reading errors (S2, S4) — the pooled residual variance is a poor proxy for per-point noise, and uncertainty intervals become unreliable. The parameter estimates themselves are often similar; it is the *uncertainty* that OLS gets wrong.
176
+
177
+ ### Using the wrong estimator when x has errors
178
+
179
+ | Scenario | Wrong estimator | Coverage | Correct estimator | Coverage |
180
+ |----------|-----------------|:--------:|-------------------|:--------:|
181
+ | Exp decay + timing errors | WeightedRegressor | ✗ 30% | XYWeightedRegressor | ✓ 64% |
182
+ | Beer-Lambert | WeightedRegressor | ✗ 7% | XYWeightedRegressor | ✓ 68% |
183
+ | Method comparison | WeightedRegressor (OLS) | ✗ 32% | DemingRegressor | ✓ 66% |
184
+
185
+ See [DEVELOPING.md](DEVELOPING.md) for contributing, running tests, and building docs.
@@ -0,0 +1,14 @@
1
+ mcup/__init__.py,sha256=t0sRZV6lqTWcHhwgk_wQFJLvN1A-Ie9flcGNiYcIalg,243
2
+ mcup/_analytical.py,sha256=4VM9OR4jmAq8L4fU8nVww_OG-jUhFG4x-YFZ2ka8BmM,2988
3
+ mcup/_mc.py,sha256=arnon-1EfPQu_azOOdrcL34QC7Bbu69xgrOS9wzmvrc,2682
4
+ mcup/_utils.py,sha256=-bWB0lpfWGwRYIRzDXoObBa-sE04aqK3Iy6To1B2h1w,911
5
+ mcup/base.py,sha256=IUsUOMco-ea9pr1ISJGX6AnFPD7--Wn3JyKu-C2mraw,2169
6
+ mcup/data_generator.py,sha256=t7IeOvf4O4BToQOqNyDOwUsRfPK3zdWuzvqV-iTjNso,4263
7
+ mcup/deming.py,sha256=PK_YvleMkbBttEUWK85s34zTyBujUALRUxj6WfrWUFI,4453
8
+ mcup/weighted.py,sha256=PX6_lJ3SOtcwsa8FStLZPubdbZdjAH0TuUeturrowqI,2906
9
+ mcup/xy_weighted.py,sha256=19JY54jfbnN9LbdlaoflbO9HRVqFjhzgJTiui3gDqfY,3818
10
+ mcup-1.0.0.dist-info/licenses/LICENSE,sha256=wLrB04kUc06V33Ar3mfP3WMWcaYX-bR9auKlgASm7NA,1070
11
+ mcup-1.0.0.dist-info/METADATA,sha256=uqX8kGl53yYm_JWD-mNNtr9z6ZnnZ93q8GNqlQH-Y3I,10380
12
+ mcup-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
13
+ mcup-1.0.0.dist-info/top_level.txt,sha256=wQnyw3YFlhcrK_VFkXApZiELVpKQ4DLsnzAXD-A8E28,5
14
+ mcup-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Daniel Herman
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ mcup