PyDiffGame 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyDiffGame/__init__.py +50 -0
- PyDiffGame/_typing.py +25 -0
- PyDiffGame/base.py +468 -0
- PyDiffGame/comparison.py +121 -0
- PyDiffGame/continuous.py +223 -0
- PyDiffGame/discrete.py +211 -0
- PyDiffGame/examples/InvertedPendulumComparison.py +211 -236
- PyDiffGame/examples/MassesWithSpringsComparison.py +109 -208
- PyDiffGame/examples/PVTOL.py +143 -149
- PyDiffGame/examples/PVTOLComparison.py +75 -69
- PyDiffGame/examples/QuadRotorControl.py +394 -304
- PyDiffGame/lqr.py +30 -0
- PyDiffGame/objective.py +108 -0
- PyDiffGame/plotting.py +98 -0
- pydiffgame-2.0.0.dist-info/METADATA +408 -0
- {pydiffgame-1.0.0.dist-info → pydiffgame-2.0.0.dist-info}/RECORD +18 -16
- {pydiffgame-1.0.0.dist-info → pydiffgame-2.0.0.dist-info}/WHEEL +1 -1
- PyDiffGame/ContinuousPyDiffGame.py +0 -275
- PyDiffGame/DiscretePyDiffGame.py +0 -359
- PyDiffGame/LQR.py +0 -73
- PyDiffGame/Objective.py +0 -62
- PyDiffGame/PyDiffGame.py +0 -1273
- PyDiffGame/PyDiffGameLQRComparison.py +0 -169
- pydiffgame-1.0.0.dist-info/METADATA +0 -306
- {pydiffgame-1.0.0.dist-info → pydiffgame-2.0.0.dist-info}/licenses/LICENSE +0 -0
PyDiffGame/continuous.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
r"""Continuous-time differential game solver.
|
|
2
|
+
|
|
3
|
+
Solves the control design for
|
|
4
|
+
|
|
5
|
+
.. math:: \dot{x}(t) = A x(t) + \sum_{i=1}^N B_i v_i(t)
|
|
6
|
+
|
|
7
|
+
via the coupled differential / algebraic Riccati equations. The feedback Nash
|
|
8
|
+
gains are :math:`K_i = R_{ii}^{-1} B_i^\top P_i`, and the closed loop is
|
|
9
|
+
:math:`A_{cl} = A - \sum_i B_i K_i`.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import warnings
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
from scipy.integrate import ODEintWarning, odeint, solve_ivp
|
|
18
|
+
from scipy.linalg import solve_continuous_are
|
|
19
|
+
|
|
20
|
+
from PyDiffGame._typing import FloatArray
|
|
21
|
+
from PyDiffGame.base import PyDiffGame
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ContinuousPyDiffGame(PyDiffGame):
|
|
25
|
+
"""Continuous-time differential game / LQR solver."""
|
|
26
|
+
|
|
27
|
+
def _solve_are(self, B: FloatArray, Q: FloatArray, R: FloatArray) -> FloatArray:
|
|
28
|
+
return solve_continuous_are(self._A, B, Q, R)
|
|
29
|
+
|
|
30
|
+
# ------------------------------------------------------------------ #
|
|
31
|
+
# Coupled Riccati dynamics
|
|
32
|
+
# ------------------------------------------------------------------ #
|
|
33
|
+
def _dP_dt(self, _t: float, P_flat: FloatArray) -> FloatArray:
|
|
34
|
+
r"""RHS of the coupled matrix Riccati ODE (forward-time derivative).
|
|
35
|
+
|
|
36
|
+
.. math:: \dot P_i = -\left(A_{cl}^\top P_i + P_i A_{cl}
|
|
37
|
+
+ Q_i + P_i S_i P_i\right),\qquad A_{cl}=A-\sum_j S_j P_j
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
Ps = self._unflatten(P_flat)
|
|
41
|
+
A_cl = self._A - sum(S_j @ P_j for S_j, P_j in zip(self._S, Ps))
|
|
42
|
+
derivatives = [
|
|
43
|
+
-(A_cl.T @ P_i + P_i @ A_cl + Q_i + P_i @ S_i @ P_i)
|
|
44
|
+
for P_i, S_i, Q_i in zip(Ps, self._S, self._Qs)
|
|
45
|
+
]
|
|
46
|
+
return np.concatenate([d.ravel() for d in derivatives])
|
|
47
|
+
|
|
48
|
+
def _unflatten(self, P_flat: FloatArray) -> list[FloatArray]:
|
|
49
|
+
size = self._n * self._n
|
|
50
|
+
return [P_flat[i * size : (i + 1) * size].reshape(self._n, self._n) for i in range(self._N)]
|
|
51
|
+
|
|
52
|
+
# ------------------------------------------------------------------ #
|
|
53
|
+
# Solve
|
|
54
|
+
# ------------------------------------------------------------------ #
|
|
55
|
+
def solve(self) -> ContinuousPyDiffGame:
|
|
56
|
+
if self._infinite_horizon:
|
|
57
|
+
self._solve_infinite_horizon()
|
|
58
|
+
else:
|
|
59
|
+
self._solve_finite_horizon()
|
|
60
|
+
self._solved = True
|
|
61
|
+
return self
|
|
62
|
+
|
|
63
|
+
def _solve_infinite_horizon(self) -> None:
|
|
64
|
+
if self.is_lqr:
|
|
65
|
+
P = self._solve_are(self._Bs[0], self._Qs[0], self._Rs[0])
|
|
66
|
+
self._P = [P]
|
|
67
|
+
else:
|
|
68
|
+
self._P = self._converge_to_algebraic_solution()
|
|
69
|
+
|
|
70
|
+
self._K = [np.linalg.solve(R_i, B_i.T) @ P_i for R_i, B_i, P_i in zip(self._Rs, self._Bs, self._P)]
|
|
71
|
+
self._K_aggregate = self._aggregate_gain(self._K)
|
|
72
|
+
self._A_cl = self._closed_loop(self._K)
|
|
73
|
+
|
|
74
|
+
def _converge_to_algebraic_solution(self) -> list[FloatArray]:
|
|
75
|
+
"""Integrate the coupled DREs backwards over repeated horizons until steady state.
|
|
76
|
+
|
|
77
|
+
Robust against games with no stabilising Nash equilibrium: each backward
|
|
78
|
+
integration uses ``odeint`` with a hard step-count cap (``mxstep``) so a
|
|
79
|
+
single stiff solve can never hang, and an exceeded cap, a non-finite norm
|
|
80
|
+
or a failure to converge within ``max_P_iterations`` all raise a clear
|
|
81
|
+
error pointing the user at the finite-horizon formulation.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
no_equilibrium = (
|
|
85
|
+
"the coupled algebraic Riccati iteration did not reach a stabilising Nash "
|
|
86
|
+
"equilibrium ({reason}); this infinite-horizon game may not have one - "
|
|
87
|
+
"try a finite horizon by passing T_f."
|
|
88
|
+
)
|
|
89
|
+
Ps = list(self._P_f)
|
|
90
|
+
norms: list[float] = []
|
|
91
|
+
for _ in range(self.max_P_iterations):
|
|
92
|
+
y0 = np.concatenate([P.ravel() for P in Ps])
|
|
93
|
+
try:
|
|
94
|
+
with warnings.catch_warnings():
|
|
95
|
+
warnings.simplefilter("error", category=ODEintWarning)
|
|
96
|
+
ys = odeint(
|
|
97
|
+
self._dP_dt,
|
|
98
|
+
y0,
|
|
99
|
+
[self._T_f, 0.0],
|
|
100
|
+
tfirst=True,
|
|
101
|
+
rtol=1e-8,
|
|
102
|
+
atol=1e-10,
|
|
103
|
+
mxstep=10000,
|
|
104
|
+
)
|
|
105
|
+
except ODEintWarning as exc:
|
|
106
|
+
raise RuntimeError(
|
|
107
|
+
no_equilibrium.format(reason="backward integration did not converge")
|
|
108
|
+
) from exc
|
|
109
|
+
Ps = self._unflatten(ys[-1])
|
|
110
|
+
norm = sum(float(np.linalg.norm(P)) for P in Ps)
|
|
111
|
+
if not np.isfinite(norm):
|
|
112
|
+
raise RuntimeError(no_equilibrium.format(reason="non-finite Riccati norm"))
|
|
113
|
+
norms.append(norm)
|
|
114
|
+
if self._converged(norms):
|
|
115
|
+
return Ps
|
|
116
|
+
raise RuntimeError(
|
|
117
|
+
no_equilibrium.format(reason=f"no convergence in {self.max_P_iterations} iterations")
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
def _solve_finite_horizon(self) -> None:
|
|
121
|
+
backward_time = np.linspace(self._T_f, 0.0, self._L)
|
|
122
|
+
solution = solve_ivp(
|
|
123
|
+
fun=self._dP_dt,
|
|
124
|
+
t_span=(self._T_f, 0.0),
|
|
125
|
+
y0=np.concatenate([P.ravel() for P in self._P_f]),
|
|
126
|
+
method="LSODA",
|
|
127
|
+
t_eval=backward_time,
|
|
128
|
+
rtol=1e-8,
|
|
129
|
+
atol=1e-10,
|
|
130
|
+
)
|
|
131
|
+
# Reorder so index 0 corresponds to t = 0 (forward time).
|
|
132
|
+
Ps_t = solution.y[:, ::-1]
|
|
133
|
+
self._P = np.stack([self._unflatten(Ps_t[:, l]) for l in range(self._L)]) # (L, N, n, n)
|
|
134
|
+
|
|
135
|
+
gains_t, aggregate_t = [], []
|
|
136
|
+
for l in range(self._L):
|
|
137
|
+
player_gains = [
|
|
138
|
+
np.linalg.solve(R_i, B_i.T) @ self._P[l, i]
|
|
139
|
+
for i, (R_i, B_i) in enumerate(zip(self._Rs, self._Bs))
|
|
140
|
+
]
|
|
141
|
+
gains_t.append(player_gains)
|
|
142
|
+
aggregate_t.append(self._aggregate_gain(player_gains))
|
|
143
|
+
self._K = gains_t
|
|
144
|
+
self._K_aggregate_t = aggregate_t
|
|
145
|
+
self._A_cl = self._closed_loop(gains_t[0])
|
|
146
|
+
|
|
147
|
+
# ------------------------------------------------------------------ #
|
|
148
|
+
# Gains
|
|
149
|
+
# ------------------------------------------------------------------ #
|
|
150
|
+
def _aggregate_gain(self, player_gains: list[FloatArray]) -> FloatArray:
|
|
151
|
+
"""Physical-input gain ``K`` such that ``u = -K x``."""
|
|
152
|
+
|
|
153
|
+
if self.is_lqr:
|
|
154
|
+
return player_gains[0]
|
|
155
|
+
stacked = np.concatenate(player_gains, axis=0)
|
|
156
|
+
return self._M_inv @ stacked if self._M_inv is not None else stacked
|
|
157
|
+
|
|
158
|
+
def _gain_at(self, l: int) -> FloatArray:
|
|
159
|
+
return self._K_aggregate if self._infinite_horizon else self._K_aggregate_t[l]
|
|
160
|
+
|
|
161
|
+
def _closed_loop_at(self, l: int) -> FloatArray:
|
|
162
|
+
if self._infinite_horizon:
|
|
163
|
+
return self._A_cl
|
|
164
|
+
return self._closed_loop(self._K[l])
|
|
165
|
+
|
|
166
|
+
# ------------------------------------------------------------------ #
|
|
167
|
+
# Simulate
|
|
168
|
+
# ------------------------------------------------------------------ #
|
|
169
|
+
def simulate(self) -> ContinuousPyDiffGame:
|
|
170
|
+
self._require_solved()
|
|
171
|
+
if self._x_0 is None:
|
|
172
|
+
raise RuntimeError("simulate() requires x_0")
|
|
173
|
+
target = self._x_T if self._x_T is not None else np.zeros(self._n)
|
|
174
|
+
|
|
175
|
+
def dx_dt(t: float, x: FloatArray) -> FloatArray:
|
|
176
|
+
l = min(int(t / self._delta), self._L - 1)
|
|
177
|
+
return self._closed_loop_at(l) @ (x - target)
|
|
178
|
+
|
|
179
|
+
solution = solve_ivp(
|
|
180
|
+
fun=dx_dt,
|
|
181
|
+
t_span=(0.0, self._T_f),
|
|
182
|
+
y0=self._x_0,
|
|
183
|
+
method="LSODA",
|
|
184
|
+
t_eval=self._forward_time,
|
|
185
|
+
rtol=1e-8,
|
|
186
|
+
atol=1e-10,
|
|
187
|
+
)
|
|
188
|
+
self._x = solution.y.T
|
|
189
|
+
return self
|
|
190
|
+
|
|
191
|
+
# ------------------------------------------------------------------ #
|
|
192
|
+
# Stability
|
|
193
|
+
# ------------------------------------------------------------------ #
|
|
194
|
+
def is_closed_loop_stable(self) -> bool:
|
|
195
|
+
"""Hurwitz test: all eigenvalues have non-positive real part, at most one at 0."""
|
|
196
|
+
|
|
197
|
+
self._require_solved()
|
|
198
|
+
eigenvalues = np.linalg.eigvals(self._A_cl)
|
|
199
|
+
real_parts = eigenvalues.real
|
|
200
|
+
zeros = int(np.sum(np.abs(real_parts) < self.eigenvalue_tolerance))
|
|
201
|
+
return bool(np.all(real_parts <= self.eigenvalue_tolerance) and zeros <= 1)
|
|
202
|
+
|
|
203
|
+
def algebraic_riccati_residuals(self) -> list[FloatArray]:
|
|
204
|
+
r"""Coupled-ARE residuals :math:`A_{cl}^\top P_i + P_i A_{cl} + Q_i + P_i S_i P_i`.
|
|
205
|
+
|
|
206
|
+
Only meaningful for the infinite-horizon case, where each residual should
|
|
207
|
+
be close to zero.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
self._require_solved()
|
|
211
|
+
if not self._infinite_horizon:
|
|
212
|
+
raise RuntimeError(
|
|
213
|
+
"algebraic_riccati_residuals() is only defined for the infinite-horizon problem; "
|
|
214
|
+
"the finite-horizon solution solves the time-varying differential Riccati equation."
|
|
215
|
+
)
|
|
216
|
+
A_cl = self._closed_loop(self._K)
|
|
217
|
+
return [
|
|
218
|
+
A_cl.T @ P_i + P_i @ A_cl + Q_i + P_i @ S_i @ P_i
|
|
219
|
+
for P_i, S_i, Q_i in zip(self._P, self._S, self._Qs)
|
|
220
|
+
]
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
__all__ = ["ContinuousPyDiffGame"]
|
PyDiffGame/discrete.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
r"""Discrete-time differential game solver.
|
|
2
|
+
|
|
3
|
+
Considers control design for
|
|
4
|
+
|
|
5
|
+
.. math:: x[k+1] = \tilde A x[k] + \sum_{i=1}^N \tilde B_i v_i[k]
|
|
6
|
+
|
|
7
|
+
If the matrices are supplied in continuous form (the default) they are first
|
|
8
|
+
discretised with a zero-order hold using the Van Loan matrix-exponential
|
|
9
|
+
identity. The coupled feedback gains at each step are obtained by solving a
|
|
10
|
+
*linear* block system (the discrete coupled-Riccati gain equations are linear in
|
|
11
|
+
the gains given the next-step cost matrices), which is both faster and far more
|
|
12
|
+
robust than the root-finding the previous implementation attempted.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from collections.abc import Sequence
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
from scipy.linalg import expm, solve_discrete_are
|
|
21
|
+
|
|
22
|
+
from PyDiffGame._typing import ArrayLike, FloatArray
|
|
23
|
+
from PyDiffGame.base import PyDiffGame
|
|
24
|
+
from PyDiffGame.objective import Objective
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DiscretePyDiffGame(PyDiffGame):
|
|
28
|
+
"""Discrete-time differential game / LQR solver.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
is_input_discrete:
|
|
33
|
+
When ``True`` the matrices ``A``, ``B`` and the objectives are taken to
|
|
34
|
+
be already in discrete form. When ``False`` (default) the continuous
|
|
35
|
+
data are discretised with sampling period ``delta = T_f / L``.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
A: ArrayLike,
|
|
41
|
+
objectives: Sequence[Objective],
|
|
42
|
+
*,
|
|
43
|
+
is_input_discrete: bool = False,
|
|
44
|
+
**kwargs,
|
|
45
|
+
) -> None:
|
|
46
|
+
super().__init__(A, objectives, **kwargs)
|
|
47
|
+
if not is_input_discrete:
|
|
48
|
+
self._discretize()
|
|
49
|
+
# Riccati seeds must be computed from the (possibly discretised) data.
|
|
50
|
+
self._P_f = self._uncoupled_are_solutions()
|
|
51
|
+
|
|
52
|
+
def _solve_are(self, B: FloatArray, Q: FloatArray, R: FloatArray) -> FloatArray:
|
|
53
|
+
return solve_discrete_are(self._A, B, Q, R)
|
|
54
|
+
|
|
55
|
+
def _discretize(self) -> None:
|
|
56
|
+
r"""Zero-order-hold discretisation via the Van Loan identity.
|
|
57
|
+
|
|
58
|
+
.. math:: \exp\!\left(\begin{bmatrix}A & I\\0 & 0\end{bmatrix}\delta\right)
|
|
59
|
+
= \begin{bmatrix}\tilde A & \Gamma\\0 & I\end{bmatrix},
|
|
60
|
+
\qquad \tilde B_i = \Gamma B_i
|
|
61
|
+
|
|
62
|
+
The quadratic cost weights are scaled by the sampling period so the
|
|
63
|
+
discrete sum approximates the continuous integral.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
n, delta = self._n, self._delta
|
|
67
|
+
augmented = np.zeros((2 * n, 2 * n))
|
|
68
|
+
augmented[:n, :n] = self._A
|
|
69
|
+
augmented[:n, n:] = np.eye(n)
|
|
70
|
+
exponential = expm(augmented * delta)
|
|
71
|
+
A_d = exponential[:n, :n]
|
|
72
|
+
gamma = exponential[:n, n:]
|
|
73
|
+
|
|
74
|
+
self._A = A_d
|
|
75
|
+
self._Bs = [gamma @ B_i for B_i in self._Bs]
|
|
76
|
+
self._Qs = [Q_i * delta for Q_i in self._Qs]
|
|
77
|
+
self._Rs = [R_i * delta for R_i in self._Rs]
|
|
78
|
+
|
|
79
|
+
# ------------------------------------------------------------------ #
|
|
80
|
+
# Coupled gain solve (one backward step)
|
|
81
|
+
# ------------------------------------------------------------------ #
|
|
82
|
+
def _step_gains(self, Ps_next: list[FloatArray]) -> list[FloatArray]:
|
|
83
|
+
r"""Feedback gains for one backward step given next-step matrices ``P[k+1]``.
|
|
84
|
+
|
|
85
|
+
Solves the linear block system arising from
|
|
86
|
+
|
|
87
|
+
.. math:: K_i = (R_{ii} + B_i^\top P_i B_i)^{-1} B_i^\top P_i
|
|
88
|
+
\Big(A - \sum_{j\neq i} B_j K_j\Big)
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
G = [
|
|
92
|
+
np.linalg.solve(R_i + B_i.T @ P_i @ B_i, B_i.T @ P_i)
|
|
93
|
+
for B_i, R_i, P_i in zip(self._Bs, self._Rs, Ps_next)
|
|
94
|
+
]
|
|
95
|
+
offsets = np.cumsum([0] + [B_i.shape[1] for B_i in self._Bs])
|
|
96
|
+
m = int(offsets[-1])
|
|
97
|
+
C = np.zeros((m, m))
|
|
98
|
+
D = np.zeros((m, self._n))
|
|
99
|
+
for i in range(self._N):
|
|
100
|
+
rows = slice(offsets[i], offsets[i + 1])
|
|
101
|
+
D[rows] = G[i] @ self._A
|
|
102
|
+
for j in range(self._N):
|
|
103
|
+
cols = slice(offsets[j], offsets[j + 1])
|
|
104
|
+
if i == j:
|
|
105
|
+
C[rows, cols] = np.eye(self._Bs[i].shape[1])
|
|
106
|
+
else:
|
|
107
|
+
C[rows, cols] = G[i] @ self._Bs[j]
|
|
108
|
+
K_stacked = np.linalg.solve(C, D)
|
|
109
|
+
return [K_stacked[offsets[i] : offsets[i + 1]] for i in range(self._N)]
|
|
110
|
+
|
|
111
|
+
def _step_P(self, Ps_next: list[FloatArray], gains: list[FloatArray]) -> list[FloatArray]:
|
|
112
|
+
A_cl = self._closed_loop(gains)
|
|
113
|
+
return [
|
|
114
|
+
A_cl.T @ P_i @ A_cl + K_i.T @ R_i @ K_i + Q_i
|
|
115
|
+
for P_i, K_i, R_i, Q_i in zip(Ps_next, gains, self._Rs, self._Qs)
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
# ------------------------------------------------------------------ #
|
|
119
|
+
# Solve
|
|
120
|
+
# ------------------------------------------------------------------ #
|
|
121
|
+
def solve(self) -> DiscretePyDiffGame:
|
|
122
|
+
if self._infinite_horizon:
|
|
123
|
+
self._solve_infinite_horizon()
|
|
124
|
+
else:
|
|
125
|
+
self._solve_finite_horizon()
|
|
126
|
+
self._solved = True
|
|
127
|
+
return self
|
|
128
|
+
|
|
129
|
+
def _solve_infinite_horizon(self) -> None:
|
|
130
|
+
if self.is_lqr:
|
|
131
|
+
P = self._solve_are(self._Bs[0], self._Qs[0], self._Rs[0])
|
|
132
|
+
Ps = [P]
|
|
133
|
+
gains = self._step_gains(Ps)
|
|
134
|
+
else:
|
|
135
|
+
Ps = list(self._P_f)
|
|
136
|
+
norms: list[float] = []
|
|
137
|
+
for _ in range(self.max_P_iterations):
|
|
138
|
+
gains = self._step_gains(Ps)
|
|
139
|
+
Ps = self._step_P(Ps, gains)
|
|
140
|
+
norms.append(sum(float(np.linalg.norm(P)) for P in Ps))
|
|
141
|
+
if self._converged(norms):
|
|
142
|
+
break
|
|
143
|
+
gains = self._step_gains(Ps)
|
|
144
|
+
|
|
145
|
+
self._P = Ps
|
|
146
|
+
self._K = gains
|
|
147
|
+
self._K_aggregate = self._aggregate_gain(gains)
|
|
148
|
+
self._A_cl = self._closed_loop(gains)
|
|
149
|
+
|
|
150
|
+
def _solve_finite_horizon(self) -> None:
|
|
151
|
+
Ps_t: list[list[FloatArray]] = [list(self._P_f)]
|
|
152
|
+
gains_t: list[list[FloatArray]] = []
|
|
153
|
+
for _ in range(self._L - 1):
|
|
154
|
+
gains = self._step_gains(Ps_t[-1])
|
|
155
|
+
Ps_t.append(self._step_P(Ps_t[-1], gains))
|
|
156
|
+
gains_t.append(gains)
|
|
157
|
+
gains_t.append(self._step_gains(Ps_t[-1]))
|
|
158
|
+
|
|
159
|
+
# Index 0 == t = 0 (forward time): reverse the backward sweep.
|
|
160
|
+
Ps_t.reverse()
|
|
161
|
+
gains_t.reverse()
|
|
162
|
+
self._P = np.stack([np.stack(step) for step in Ps_t]) # (L, N, n, n)
|
|
163
|
+
self._K = gains_t
|
|
164
|
+
self._K_aggregate_t = [self._aggregate_gain(g) for g in gains_t]
|
|
165
|
+
self._A_cl = self._closed_loop(gains_t[0])
|
|
166
|
+
|
|
167
|
+
# ------------------------------------------------------------------ #
|
|
168
|
+
# Gains
|
|
169
|
+
# ------------------------------------------------------------------ #
|
|
170
|
+
def _aggregate_gain(self, player_gains: list[FloatArray]) -> FloatArray:
|
|
171
|
+
if self.is_lqr:
|
|
172
|
+
return player_gains[0]
|
|
173
|
+
stacked = np.concatenate(player_gains, axis=0)
|
|
174
|
+
return self._M_inv @ stacked if self._M_inv is not None else stacked
|
|
175
|
+
|
|
176
|
+
def _gain_at(self, l: int) -> FloatArray:
|
|
177
|
+
return self._K_aggregate if self._infinite_horizon else self._K_aggregate_t[l]
|
|
178
|
+
|
|
179
|
+
def _closed_loop_at(self, l: int) -> FloatArray:
|
|
180
|
+
if self._infinite_horizon:
|
|
181
|
+
return self._A_cl
|
|
182
|
+
return self._closed_loop(self._K[l])
|
|
183
|
+
|
|
184
|
+
# ------------------------------------------------------------------ #
|
|
185
|
+
# Simulate
|
|
186
|
+
# ------------------------------------------------------------------ #
|
|
187
|
+
def simulate(self) -> DiscretePyDiffGame:
|
|
188
|
+
self._require_solved()
|
|
189
|
+
if self._x_0 is None:
|
|
190
|
+
raise RuntimeError("simulate() requires x_0")
|
|
191
|
+
target = self._x_T if self._x_T is not None else np.zeros(self._n)
|
|
192
|
+
|
|
193
|
+
x = np.zeros((self._L, self._n))
|
|
194
|
+
x[0] = self._x_0
|
|
195
|
+
for k in range(self._L - 1):
|
|
196
|
+
x[k + 1] = self._closed_loop_at(k) @ (x[k] - target) + target
|
|
197
|
+
self._x = x
|
|
198
|
+
return self
|
|
199
|
+
|
|
200
|
+
# ------------------------------------------------------------------ #
|
|
201
|
+
# Stability
|
|
202
|
+
# ------------------------------------------------------------------ #
|
|
203
|
+
def is_closed_loop_stable(self) -> bool:
|
|
204
|
+
"""Schur test: the closed-loop spectral radius is strictly below one."""
|
|
205
|
+
|
|
206
|
+
self._require_solved()
|
|
207
|
+
spectral_radius = float(np.max(np.abs(np.linalg.eigvals(self._A_cl))))
|
|
208
|
+
return spectral_radius < 1.0
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
__all__ = ["DiscretePyDiffGame"]
|