PyDiffGame 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,223 @@
1
+ r"""Continuous-time differential game solver.
2
+
3
+ Solves the control design for
4
+
5
+ .. math:: \dot{x}(t) = A x(t) + \sum_{i=1}^N B_i v_i(t)
6
+
7
+ via the coupled differential / algebraic Riccati equations. The feedback Nash
8
+ gains are :math:`K_i = R_{ii}^{-1} B_i^\top P_i`, and the closed loop is
9
+ :math:`A_{cl} = A - \sum_i B_i K_i`.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import warnings
15
+
16
+ import numpy as np
17
+ from scipy.integrate import ODEintWarning, odeint, solve_ivp
18
+ from scipy.linalg import solve_continuous_are
19
+
20
+ from PyDiffGame._typing import FloatArray
21
+ from PyDiffGame.base import PyDiffGame
22
+
23
+
24
+ class ContinuousPyDiffGame(PyDiffGame):
25
+ """Continuous-time differential game / LQR solver."""
26
+
27
+ def _solve_are(self, B: FloatArray, Q: FloatArray, R: FloatArray) -> FloatArray:
28
+ return solve_continuous_are(self._A, B, Q, R)
29
+
30
+ # ------------------------------------------------------------------ #
31
+ # Coupled Riccati dynamics
32
+ # ------------------------------------------------------------------ #
33
+ def _dP_dt(self, _t: float, P_flat: FloatArray) -> FloatArray:
34
+ r"""RHS of the coupled matrix Riccati ODE (forward-time derivative).
35
+
36
+ .. math:: \dot P_i = -\left(A_{cl}^\top P_i + P_i A_{cl}
37
+ + Q_i + P_i S_i P_i\right),\qquad A_{cl}=A-\sum_j S_j P_j
38
+ """
39
+
40
+ Ps = self._unflatten(P_flat)
41
+ A_cl = self._A - sum(S_j @ P_j for S_j, P_j in zip(self._S, Ps))
42
+ derivatives = [
43
+ -(A_cl.T @ P_i + P_i @ A_cl + Q_i + P_i @ S_i @ P_i)
44
+ for P_i, S_i, Q_i in zip(Ps, self._S, self._Qs)
45
+ ]
46
+ return np.concatenate([d.ravel() for d in derivatives])
47
+
48
+ def _unflatten(self, P_flat: FloatArray) -> list[FloatArray]:
49
+ size = self._n * self._n
50
+ return [P_flat[i * size : (i + 1) * size].reshape(self._n, self._n) for i in range(self._N)]
51
+
52
+ # ------------------------------------------------------------------ #
53
+ # Solve
54
+ # ------------------------------------------------------------------ #
55
+ def solve(self) -> ContinuousPyDiffGame:
56
+ if self._infinite_horizon:
57
+ self._solve_infinite_horizon()
58
+ else:
59
+ self._solve_finite_horizon()
60
+ self._solved = True
61
+ return self
62
+
63
+ def _solve_infinite_horizon(self) -> None:
64
+ if self.is_lqr:
65
+ P = self._solve_are(self._Bs[0], self._Qs[0], self._Rs[0])
66
+ self._P = [P]
67
+ else:
68
+ self._P = self._converge_to_algebraic_solution()
69
+
70
+ self._K = [np.linalg.solve(R_i, B_i.T) @ P_i for R_i, B_i, P_i in zip(self._Rs, self._Bs, self._P)]
71
+ self._K_aggregate = self._aggregate_gain(self._K)
72
+ self._A_cl = self._closed_loop(self._K)
73
+
74
+ def _converge_to_algebraic_solution(self) -> list[FloatArray]:
75
+ """Integrate the coupled DREs backwards over repeated horizons until steady state.
76
+
77
+ Robust against games with no stabilising Nash equilibrium: each backward
78
+ integration uses ``odeint`` with a hard step-count cap (``mxstep``) so a
79
+ single stiff solve can never hang, and an exceeded cap, a non-finite norm
80
+ or a failure to converge within ``max_P_iterations`` all raise a clear
81
+ error pointing the user at the finite-horizon formulation.
82
+ """
83
+
84
+ no_equilibrium = (
85
+ "the coupled algebraic Riccati iteration did not reach a stabilising Nash "
86
+ "equilibrium ({reason}); this infinite-horizon game may not have one - "
87
+ "try a finite horizon by passing T_f."
88
+ )
89
+ Ps = list(self._P_f)
90
+ norms: list[float] = []
91
+ for _ in range(self.max_P_iterations):
92
+ y0 = np.concatenate([P.ravel() for P in Ps])
93
+ try:
94
+ with warnings.catch_warnings():
95
+ warnings.simplefilter("error", category=ODEintWarning)
96
+ ys = odeint(
97
+ self._dP_dt,
98
+ y0,
99
+ [self._T_f, 0.0],
100
+ tfirst=True,
101
+ rtol=1e-8,
102
+ atol=1e-10,
103
+ mxstep=10000,
104
+ )
105
+ except ODEintWarning as exc:
106
+ raise RuntimeError(
107
+ no_equilibrium.format(reason="backward integration did not converge")
108
+ ) from exc
109
+ Ps = self._unflatten(ys[-1])
110
+ norm = sum(float(np.linalg.norm(P)) for P in Ps)
111
+ if not np.isfinite(norm):
112
+ raise RuntimeError(no_equilibrium.format(reason="non-finite Riccati norm"))
113
+ norms.append(norm)
114
+ if self._converged(norms):
115
+ return Ps
116
+ raise RuntimeError(
117
+ no_equilibrium.format(reason=f"no convergence in {self.max_P_iterations} iterations")
118
+ )
119
+
120
+ def _solve_finite_horizon(self) -> None:
121
+ backward_time = np.linspace(self._T_f, 0.0, self._L)
122
+ solution = solve_ivp(
123
+ fun=self._dP_dt,
124
+ t_span=(self._T_f, 0.0),
125
+ y0=np.concatenate([P.ravel() for P in self._P_f]),
126
+ method="LSODA",
127
+ t_eval=backward_time,
128
+ rtol=1e-8,
129
+ atol=1e-10,
130
+ )
131
+ # Reorder so index 0 corresponds to t = 0 (forward time).
132
+ Ps_t = solution.y[:, ::-1]
133
+ self._P = np.stack([self._unflatten(Ps_t[:, l]) for l in range(self._L)]) # (L, N, n, n)
134
+
135
+ gains_t, aggregate_t = [], []
136
+ for l in range(self._L):
137
+ player_gains = [
138
+ np.linalg.solve(R_i, B_i.T) @ self._P[l, i]
139
+ for i, (R_i, B_i) in enumerate(zip(self._Rs, self._Bs))
140
+ ]
141
+ gains_t.append(player_gains)
142
+ aggregate_t.append(self._aggregate_gain(player_gains))
143
+ self._K = gains_t
144
+ self._K_aggregate_t = aggregate_t
145
+ self._A_cl = self._closed_loop(gains_t[0])
146
+
147
+ # ------------------------------------------------------------------ #
148
+ # Gains
149
+ # ------------------------------------------------------------------ #
150
+ def _aggregate_gain(self, player_gains: list[FloatArray]) -> FloatArray:
151
+ """Physical-input gain ``K`` such that ``u = -K x``."""
152
+
153
+ if self.is_lqr:
154
+ return player_gains[0]
155
+ stacked = np.concatenate(player_gains, axis=0)
156
+ return self._M_inv @ stacked if self._M_inv is not None else stacked
157
+
158
+ def _gain_at(self, l: int) -> FloatArray:
159
+ return self._K_aggregate if self._infinite_horizon else self._K_aggregate_t[l]
160
+
161
+ def _closed_loop_at(self, l: int) -> FloatArray:
162
+ if self._infinite_horizon:
163
+ return self._A_cl
164
+ return self._closed_loop(self._K[l])
165
+
166
+ # ------------------------------------------------------------------ #
167
+ # Simulate
168
+ # ------------------------------------------------------------------ #
169
+ def simulate(self) -> ContinuousPyDiffGame:
170
+ self._require_solved()
171
+ if self._x_0 is None:
172
+ raise RuntimeError("simulate() requires x_0")
173
+ target = self._x_T if self._x_T is not None else np.zeros(self._n)
174
+
175
+ def dx_dt(t: float, x: FloatArray) -> FloatArray:
176
+ l = min(int(t / self._delta), self._L - 1)
177
+ return self._closed_loop_at(l) @ (x - target)
178
+
179
+ solution = solve_ivp(
180
+ fun=dx_dt,
181
+ t_span=(0.0, self._T_f),
182
+ y0=self._x_0,
183
+ method="LSODA",
184
+ t_eval=self._forward_time,
185
+ rtol=1e-8,
186
+ atol=1e-10,
187
+ )
188
+ self._x = solution.y.T
189
+ return self
190
+
191
+ # ------------------------------------------------------------------ #
192
+ # Stability
193
+ # ------------------------------------------------------------------ #
194
+ def is_closed_loop_stable(self) -> bool:
195
+ """Hurwitz test: all eigenvalues have non-positive real part, at most one at 0."""
196
+
197
+ self._require_solved()
198
+ eigenvalues = np.linalg.eigvals(self._A_cl)
199
+ real_parts = eigenvalues.real
200
+ zeros = int(np.sum(np.abs(real_parts) < self.eigenvalue_tolerance))
201
+ return bool(np.all(real_parts <= self.eigenvalue_tolerance) and zeros <= 1)
202
+
203
+ def algebraic_riccati_residuals(self) -> list[FloatArray]:
204
+ r"""Coupled-ARE residuals :math:`A_{cl}^\top P_i + P_i A_{cl} + Q_i + P_i S_i P_i`.
205
+
206
+ Only meaningful for the infinite-horizon case, where each residual should
207
+ be close to zero.
208
+ """
209
+
210
+ self._require_solved()
211
+ if not self._infinite_horizon:
212
+ raise RuntimeError(
213
+ "algebraic_riccati_residuals() is only defined for the infinite-horizon problem; "
214
+ "the finite-horizon solution solves the time-varying differential Riccati equation."
215
+ )
216
+ A_cl = self._closed_loop(self._K)
217
+ return [
218
+ A_cl.T @ P_i + P_i @ A_cl + Q_i + P_i @ S_i @ P_i
219
+ for P_i, S_i, Q_i in zip(self._P, self._S, self._Qs)
220
+ ]
221
+
222
+
223
+ __all__ = ["ContinuousPyDiffGame"]
PyDiffGame/discrete.py ADDED
@@ -0,0 +1,211 @@
1
+ r"""Discrete-time differential game solver.
2
+
3
+ Considers control design for
4
+
5
+ .. math:: x[k+1] = \tilde A x[k] + \sum_{i=1}^N \tilde B_i v_i[k]
6
+
7
+ If the matrices are supplied in continuous form (the default) they are first
8
+ discretised with a zero-order hold using the Van Loan matrix-exponential
9
+ identity. The coupled feedback gains at each step are obtained by solving a
10
+ *linear* block system (the discrete coupled-Riccati gain equations are linear in
11
+ the gains given the next-step cost matrices), which is both faster and far more
12
+ robust than the root-finding the previous implementation attempted.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from collections.abc import Sequence
18
+
19
+ import numpy as np
20
+ from scipy.linalg import expm, solve_discrete_are
21
+
22
+ from PyDiffGame._typing import ArrayLike, FloatArray
23
+ from PyDiffGame.base import PyDiffGame
24
+ from PyDiffGame.objective import Objective
25
+
26
+
27
+ class DiscretePyDiffGame(PyDiffGame):
28
+ """Discrete-time differential game / LQR solver.
29
+
30
+ Parameters
31
+ ----------
32
+ is_input_discrete:
33
+ When ``True`` the matrices ``A``, ``B`` and the objectives are taken to
34
+ be already in discrete form. When ``False`` (default) the continuous
35
+ data are discretised with sampling period ``delta = T_f / L``.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ A: ArrayLike,
41
+ objectives: Sequence[Objective],
42
+ *,
43
+ is_input_discrete: bool = False,
44
+ **kwargs,
45
+ ) -> None:
46
+ super().__init__(A, objectives, **kwargs)
47
+ if not is_input_discrete:
48
+ self._discretize()
49
+ # Riccati seeds must be computed from the (possibly discretised) data.
50
+ self._P_f = self._uncoupled_are_solutions()
51
+
52
+ def _solve_are(self, B: FloatArray, Q: FloatArray, R: FloatArray) -> FloatArray:
53
+ return solve_discrete_are(self._A, B, Q, R)
54
+
55
+ def _discretize(self) -> None:
56
+ r"""Zero-order-hold discretisation via the Van Loan identity.
57
+
58
+ .. math:: \exp\!\left(\begin{bmatrix}A & I\\0 & 0\end{bmatrix}\delta\right)
59
+ = \begin{bmatrix}\tilde A & \Gamma\\0 & I\end{bmatrix},
60
+ \qquad \tilde B_i = \Gamma B_i
61
+
62
+ The quadratic cost weights are scaled by the sampling period so the
63
+ discrete sum approximates the continuous integral.
64
+ """
65
+
66
+ n, delta = self._n, self._delta
67
+ augmented = np.zeros((2 * n, 2 * n))
68
+ augmented[:n, :n] = self._A
69
+ augmented[:n, n:] = np.eye(n)
70
+ exponential = expm(augmented * delta)
71
+ A_d = exponential[:n, :n]
72
+ gamma = exponential[:n, n:]
73
+
74
+ self._A = A_d
75
+ self._Bs = [gamma @ B_i for B_i in self._Bs]
76
+ self._Qs = [Q_i * delta for Q_i in self._Qs]
77
+ self._Rs = [R_i * delta for R_i in self._Rs]
78
+
79
+ # ------------------------------------------------------------------ #
80
+ # Coupled gain solve (one backward step)
81
+ # ------------------------------------------------------------------ #
82
+ def _step_gains(self, Ps_next: list[FloatArray]) -> list[FloatArray]:
83
+ r"""Feedback gains for one backward step given next-step matrices ``P[k+1]``.
84
+
85
+ Solves the linear block system arising from
86
+
87
+ .. math:: K_i = (R_{ii} + B_i^\top P_i B_i)^{-1} B_i^\top P_i
88
+ \Big(A - \sum_{j\neq i} B_j K_j\Big)
89
+ """
90
+
91
+ G = [
92
+ np.linalg.solve(R_i + B_i.T @ P_i @ B_i, B_i.T @ P_i)
93
+ for B_i, R_i, P_i in zip(self._Bs, self._Rs, Ps_next)
94
+ ]
95
+ offsets = np.cumsum([0] + [B_i.shape[1] for B_i in self._Bs])
96
+ m = int(offsets[-1])
97
+ C = np.zeros((m, m))
98
+ D = np.zeros((m, self._n))
99
+ for i in range(self._N):
100
+ rows = slice(offsets[i], offsets[i + 1])
101
+ D[rows] = G[i] @ self._A
102
+ for j in range(self._N):
103
+ cols = slice(offsets[j], offsets[j + 1])
104
+ if i == j:
105
+ C[rows, cols] = np.eye(self._Bs[i].shape[1])
106
+ else:
107
+ C[rows, cols] = G[i] @ self._Bs[j]
108
+ K_stacked = np.linalg.solve(C, D)
109
+ return [K_stacked[offsets[i] : offsets[i + 1]] for i in range(self._N)]
110
+
111
+ def _step_P(self, Ps_next: list[FloatArray], gains: list[FloatArray]) -> list[FloatArray]:
112
+ A_cl = self._closed_loop(gains)
113
+ return [
114
+ A_cl.T @ P_i @ A_cl + K_i.T @ R_i @ K_i + Q_i
115
+ for P_i, K_i, R_i, Q_i in zip(Ps_next, gains, self._Rs, self._Qs)
116
+ ]
117
+
118
+ # ------------------------------------------------------------------ #
119
+ # Solve
120
+ # ------------------------------------------------------------------ #
121
+ def solve(self) -> DiscretePyDiffGame:
122
+ if self._infinite_horizon:
123
+ self._solve_infinite_horizon()
124
+ else:
125
+ self._solve_finite_horizon()
126
+ self._solved = True
127
+ return self
128
+
129
+ def _solve_infinite_horizon(self) -> None:
130
+ if self.is_lqr:
131
+ P = self._solve_are(self._Bs[0], self._Qs[0], self._Rs[0])
132
+ Ps = [P]
133
+ gains = self._step_gains(Ps)
134
+ else:
135
+ Ps = list(self._P_f)
136
+ norms: list[float] = []
137
+ for _ in range(self.max_P_iterations):
138
+ gains = self._step_gains(Ps)
139
+ Ps = self._step_P(Ps, gains)
140
+ norms.append(sum(float(np.linalg.norm(P)) for P in Ps))
141
+ if self._converged(norms):
142
+ break
143
+ gains = self._step_gains(Ps)
144
+
145
+ self._P = Ps
146
+ self._K = gains
147
+ self._K_aggregate = self._aggregate_gain(gains)
148
+ self._A_cl = self._closed_loop(gains)
149
+
150
+ def _solve_finite_horizon(self) -> None:
151
+ Ps_t: list[list[FloatArray]] = [list(self._P_f)]
152
+ gains_t: list[list[FloatArray]] = []
153
+ for _ in range(self._L - 1):
154
+ gains = self._step_gains(Ps_t[-1])
155
+ Ps_t.append(self._step_P(Ps_t[-1], gains))
156
+ gains_t.append(gains)
157
+ gains_t.append(self._step_gains(Ps_t[-1]))
158
+
159
+ # Index 0 == t = 0 (forward time): reverse the backward sweep.
160
+ Ps_t.reverse()
161
+ gains_t.reverse()
162
+ self._P = np.stack([np.stack(step) for step in Ps_t]) # (L, N, n, n)
163
+ self._K = gains_t
164
+ self._K_aggregate_t = [self._aggregate_gain(g) for g in gains_t]
165
+ self._A_cl = self._closed_loop(gains_t[0])
166
+
167
+ # ------------------------------------------------------------------ #
168
+ # Gains
169
+ # ------------------------------------------------------------------ #
170
+ def _aggregate_gain(self, player_gains: list[FloatArray]) -> FloatArray:
171
+ if self.is_lqr:
172
+ return player_gains[0]
173
+ stacked = np.concatenate(player_gains, axis=0)
174
+ return self._M_inv @ stacked if self._M_inv is not None else stacked
175
+
176
+ def _gain_at(self, l: int) -> FloatArray:
177
+ return self._K_aggregate if self._infinite_horizon else self._K_aggregate_t[l]
178
+
179
+ def _closed_loop_at(self, l: int) -> FloatArray:
180
+ if self._infinite_horizon:
181
+ return self._A_cl
182
+ return self._closed_loop(self._K[l])
183
+
184
+ # ------------------------------------------------------------------ #
185
+ # Simulate
186
+ # ------------------------------------------------------------------ #
187
+ def simulate(self) -> DiscretePyDiffGame:
188
+ self._require_solved()
189
+ if self._x_0 is None:
190
+ raise RuntimeError("simulate() requires x_0")
191
+ target = self._x_T if self._x_T is not None else np.zeros(self._n)
192
+
193
+ x = np.zeros((self._L, self._n))
194
+ x[0] = self._x_0
195
+ for k in range(self._L - 1):
196
+ x[k + 1] = self._closed_loop_at(k) @ (x[k] - target) + target
197
+ self._x = x
198
+ return self
199
+
200
+ # ------------------------------------------------------------------ #
201
+ # Stability
202
+ # ------------------------------------------------------------------ #
203
+ def is_closed_loop_stable(self) -> bool:
204
+ """Schur test: the closed-loop spectral radius is strictly below one."""
205
+
206
+ self._require_solved()
207
+ spectral_radius = float(np.max(np.abs(np.linalg.eigvals(self._A_cl))))
208
+ return spectral_radius < 1.0
209
+
210
+
211
+ __all__ = ["DiscretePyDiffGame"]