PyDiffGame 0.1.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,257 +1,232 @@
1
+ """Inverted-pendulum-on-a-cart benchmark: differential game vs. LQR.
2
+
3
+ A uniform rod pinned to a cart forms the classic underactuated inverted
4
+ pendulum. Linearising about the upright equilibrium yields a ``4``-dimensional
5
+ linear system (cart position, pendulum angle and their velocities) driven by a
6
+ two-dimensional physical input (cart force ``F`` and pendulum moment ``M``).
7
+ The input is decomposed so that the cart and the pendulum each become one player
8
+ of a differential game, and the result is compared against a single LQR
9
+ controller acting on the whole system.
10
+
11
+ The nonlinear closed loop can additionally be simulated by feeding the designed
12
+ gains back into the full (un-linearised) equations of motion.
13
+
14
+ Run directly to solve a single instance and report the costs and stability::
15
+
16
+ python -m PyDiffGame.examples.InvertedPendulumComparison
17
+ """
18
+
1
19
  from __future__ import annotations
2
20
 
3
21
  import numpy as np
4
- import scipy as sp
5
- from time import time
6
- import matplotlib
7
- import matplotlib.pyplot as plt
22
+ from scipy.integrate import solve_ivp
8
23
 
9
- from typing import Optional
10
-
11
- from PyDiffGame.PyDiffGame import PyDiffGame
12
- from PyDiffGame.PyDiffGameLQRComparison import PyDiffGameLQRComparison
13
- from PyDiffGame.Objective import GameObjective, LQRObjective
24
+ from PyDiffGame.base import PyDiffGame
25
+ from PyDiffGame.comparison import PyDiffGameLQRComparison
26
+ from PyDiffGame.objective import GameObjective, LQRObjective
27
+ from PyDiffGame.plotting import show
14
28
 
15
29
 
16
30
  class InvertedPendulumComparison(PyDiffGameLQRComparison):
17
- def __init__(self,
18
- m_c: float,
19
- m_p: float,
20
- p_L: float,
21
- q: float,
22
- r: Optional[float] = 1,
23
- x_0: Optional[np.array] = None,
24
- x_T: Optional[np.array] = None,
25
- T_f: Optional[float] = None,
26
- epsilon_x: Optional[float] = PyDiffGame.epsilon_x_default,
27
- epsilon_P: Optional[float] = PyDiffGame.epsilon_P_default,
28
- L: Optional[int] = PyDiffGame.L_default,
29
- eta: Optional[int] = PyDiffGame.eta_default):
30
- self.__m_c = m_c
31
- self.__m_p = m_p
32
- self.__p_L = p_L
33
- self.__l = self.__p_L / 2 # CoM of uniform rod
34
- self.__I = 1 / 12 * self.__m_p * self.__p_L ** 2 # center mass moment of inertia of uniform rod
35
-
36
- # # original linear system
37
- linearized_D = self.__m_c * self.__m_p * self.__l ** 2 + self.__I * (self.__m_c + self.__m_p)
38
- a32 = self.__m_p * PyDiffGame.g * self.__l ** 2 / linearized_D
39
- a42 = self.__m_p * PyDiffGame.g * self.__l * (self.__m_c + self.__m_p) / linearized_D
40
- A = np.array([[0, 0, 1, 0],
41
- [0, 0, 0, 1],
42
- [0, a32, 0, 0],
43
- [0, a42, 0, 0]])
44
-
45
- b21 = (m_p * self.__l ** 2 + self.__I) / linearized_D
46
- b31 = m_p * self.__l / linearized_D
31
+ """Compare a two-player differential game with an LQR for the cart-pendulum."""
32
+
33
+ def __init__(
34
+ self,
35
+ m_c: float,
36
+ m_p: float,
37
+ p_L: float,
38
+ q: float,
39
+ r: float = 1.0,
40
+ x_0: np.ndarray | None = None,
41
+ x_T: np.ndarray | None = None,
42
+ T_f: float | None = None,
43
+ epsilon_x: float = PyDiffGame.epsilon_x_default,
44
+ epsilon_P: float = PyDiffGame.epsilon_P_default,
45
+ L: int = PyDiffGame.L_default,
46
+ eta: int = PyDiffGame.eta_default,
47
+ ) -> None:
48
+ self._m_c = m_c
49
+ self._m_p = m_p
50
+ self._p_L = p_L
51
+ self._l = p_L / 2 # centre of mass of the uniform rod
52
+ self._I = 1 / 12 * m_p * p_L**2 # moment of inertia of the uniform rod about its centre
53
+
54
+ # Linearised system about the upright equilibrium. The cart<->pendulum
55
+ # coupling terms (a32, and the off-diagonal inputs b22, b31) are negative
56
+ # in the standard convention; gravity destabilises the angle (a42 > 0).
57
+ # These matrices are exactly the Jacobian of ``_nonlinear_acceleration``
58
+ # at the origin (asserted by tests/test_examples.py).
59
+ linearized_D = m_c * m_p * self._l**2 + self._I * (m_c + m_p)
60
+ a32 = -m_p * PyDiffGame.g * self._l**2 / linearized_D
61
+ a42 = m_p * PyDiffGame.g * self._l * (m_c + m_p) / linearized_D
62
+ A = np.array([[0, 0, 1, 0], [0, 0, 0, 1], [0, a32, 0, 0], [0, a42, 0, 0]], dtype=float)
63
+
64
+ b21 = (m_p * self._l**2 + self._I) / linearized_D
65
+ b31 = -m_p * self._l / linearized_D
47
66
  b22 = b31
48
67
  b32 = (m_c + m_p) / linearized_D
49
- B = np.array([[0, 0],
50
- [0, 0],
51
- [b21, b22],
52
- [b31, b32]])
68
+ B = np.array([[0, 0], [0, 0], [b21, b22], [b31, b32]], dtype=float)
53
69
 
70
+ # Each physical-input row becomes one player's decomposition matrix.
54
71
  M1 = B[2, :].reshape(1, 2)
55
72
  M2 = B[3, :].reshape(1, 2)
56
73
  Ms = [M1, M2]
57
74
 
58
- Q_x = q * np.array([[1, 0, 2, 0],
59
- [0, 0, 0, 0],
60
- [2, 0, 4, 0],
61
- [0, 0, 0, 0]])
62
- Q_theta = q * np.array([[0, 0, 0, 0],
63
- [0, 1, 0, 2],
64
- [0, 0, 0, 0],
65
- [0, 2, 0, 4]])
66
- Q_lqr = Q_theta + Q_x
75
+ Q_x = q * np.array([[1, 0, 2, 0], [0, 0, 0, 0], [2, 0, 4, 0], [0, 0, 0, 0]], dtype=float)
76
+ Q_theta = q * np.array([[0, 0, 0, 0], [0, 1, 0, 2], [0, 0, 0, 0], [0, 2, 0, 4]], dtype=float)
77
+ Q_lqr = Q_x + Q_theta
67
78
  Qs = [Q_x, Q_theta]
68
79
 
69
80
  R_lqr = np.diag([r] * 2)
70
- Rs = [np.array([r])] * 2
71
-
72
- self.__origin = (0.0, 0.0)
73
-
74
- state_variables_names = ['x',
75
- '\\theta',
76
- '\\dot{x}',
77
- '\\dot{\\theta}']
78
-
79
- args = {'A': A,
80
- 'B': B,
81
- 'x_0': x_0,
82
- 'x_T': x_T,
83
- 'T_f': T_f,
84
- 'state_variables_names': state_variables_names,
85
- 'epsilon_x': epsilon_x,
86
- 'epsilon_P': epsilon_P,
87
- 'L': L,
88
- 'eta': eta,
89
- 'force_finite_horizon': T_f is not None}
90
-
91
- lqr_objective = [LQRObjective(Q=Q_lqr, R_ii=R_lqr)]
92
- game_objectives = [GameObjective(Q=Q, R_ii=R, M_i=M_i) for Q, R, M_i in zip(Qs, Rs, Ms)]
81
+ Rs = [np.array([[r]])] * 2
82
+
83
+ self._origin = (0.0, 0.0)
84
+
85
+ state_variables_names = [
86
+ "x",
87
+ r"\theta",
88
+ r"\dot{x}",
89
+ r"\dot{\theta}",
90
+ ]
91
+
92
+ lqr_objective = [LQRObjective(Q=Q_lqr, R=R_lqr)]
93
+ game_objectives = [GameObjective(Q=Q_i, R=R_i, M=M_i) for Q_i, R_i, M_i in zip(Qs, Rs, Ms)]
93
94
  games_objectives = [lqr_objective, game_objectives]
94
95
 
95
- super().__init__(args=args,
96
- games_objectives=games_objectives,
97
- continuous=True)
96
+ super().__init__(
97
+ A=A,
98
+ B=B,
99
+ games_objectives=games_objectives,
100
+ continuous=True,
101
+ x_0=x_0,
102
+ x_T=x_T,
103
+ T_f=T_f,
104
+ L=L,
105
+ eta=eta,
106
+ epsilon_x=epsilon_x,
107
+ epsilon_P=epsilon_P,
108
+ state_variables_names=state_variables_names,
109
+ )
110
+
111
+ def _nonlinear_acceleration(
112
+ self, theta: float, theta_dot: float, F: float, M: float
113
+ ) -> tuple[float, float]:
114
+ """Cart and pendulum accelerations from the full nonlinear EOM.
115
+
116
+ Upright convention: gravity destabilises the angle. The Jacobian of this
117
+ map at the origin equals the linear design matrices ``A``, ``B`` (checked
118
+ in ``tests/test_examples.py``), so the gains designed on the linear model
119
+ are applied to a consistent nonlinear plant.
120
+ """
121
+
122
+ m_p, m_c, length, inertia = self._m_p, self._m_c, self._l, self._I
123
+ den = m_p * length**2 + inertia - (m_p * length) ** 2 * np.cos(theta) ** 2 / (m_p + m_c)
124
+ theta_ddot = (
125
+ M
126
+ - m_p
127
+ * length
128
+ * (
129
+ np.cos(theta) / (m_p + m_c) * (F + m_p * length * np.sin(theta) * theta_dot**2)
130
+ - PyDiffGame.g * np.sin(theta)
131
+ )
132
+ ) / den
133
+ x_ddot = (F + m_p * length * (np.sin(theta) * theta_dot**2 - np.cos(theta) * theta_ddot)) / (
134
+ m_p + m_c
135
+ )
136
+ return x_ddot, theta_ddot
137
+
138
+ def simulate_nonlinear_system(self, i: int) -> np.ndarray:
139
+ """Simulate the full nonlinear cart-pendulum closed loop for game ``i``.
140
+
141
+ The feedback gains designed on the linear model are fed back into the
142
+ un-linearised equations of motion. Works for both the infinite-horizon
143
+ (constant gain) and finite-horizon (time-varying gain) designs. Returns
144
+ the state trajectory with shape ``(n, L)``.
145
+ """
98
146
 
99
- def __simulate_non_linear_system(self,
100
- i: int,
101
- plot: bool = False) -> np.array:
102
- game = self._games[i]
103
- K = game.K
104
- x_T = game.x_T
105
-
106
- def nonlinear_state_space(_, x_t: np.array) -> np.array:
107
- x_t = x_t - x_T
108
-
109
- if game.is_LQR():
110
- u_t = - K[0] @ x_t
111
- F_t, M_t = u_t.T
112
- else:
113
- K_x, K_theta = K
114
- v_x = - K_x @ x_t
115
- v_theta = - K_theta @ x_t
116
- v = np.array([v_x, v_theta])
117
- F_t, M_t = game.M_inv @ v
118
-
119
- x, theta, x_dot, theta_dot = x_t
120
-
121
- theta_ddot = 1 / (
122
- self.__m_p * self.__l ** 2 + self.__I - (self.__m_p * self.__l) ** 2 * np.cos(theta) ** 2 /
123
- (self.__m_p + self.__m_c)) * (M_t - self.__m_p * self.__l *
124
- (np.cos(theta) / (self.__m_p + self.__m_c) *
125
- (F_t + self.__m_p * self.__l * np.sin(theta)
126
- * theta_dot ** 2) + PyDiffGame.g * np.sin(theta)))
127
- x_ddot = 1 / (self.__m_p + self.__m_c) * (F_t + self.__m_p * self.__l * (np.sin(theta) * theta_dot ** 2 -
128
- np.cos(theta) * theta_ddot))
129
- if isinstance(theta_ddot, np.ndarray):
130
- theta_ddot = theta_ddot[0]
131
- x_ddot = x_ddot[0]
132
-
133
- non_linear_x = np.array([x_dot, theta_dot, x_ddot, theta_ddot],
134
- dtype=float)
135
-
136
- return non_linear_x
137
-
138
- pendulum_state = sp.integrate.solve_ivp(fun=nonlinear_state_space,
139
- t_span=[0.0, game.T_f],
140
- y0=game.x_0,
141
- t_eval=game.forward_time,
142
- rtol=game.epsilon)
143
-
144
- Y = pendulum_state.y
145
-
146
- if plot:
147
- game.plot_state_variables(state_variables=Y.T,
148
- linear_system=False)
149
-
150
- return Y
151
-
152
- def __run_animation(self,
153
- i: int) -> (matplotlib.lines.Line2D, matplotlib.patches.Rectangle):
154
147
  game = self._games[i]
155
- game._x_non_linear = self.__simulate_non_linear_system(i=i,
156
- plot=True)
157
- x_t, theta_t, x_dot_t, theta_dot_t = game._x_non_linear
158
-
159
- pendulumArm = matplotlib.lines.Line2D(xdata=self.__origin,
160
- ydata=self.__origin,
161
- color='r')
162
- cart = matplotlib.patches.Rectangle(xy=self.__origin,
163
- width=0.5,
164
- height=0.15,
165
- color='b')
166
-
167
- fig = plt.figure()
168
- x_max = max(abs(max(x_t)), abs(min(x_t)))
169
- square_side = 1.1 * min(max(self.__p_L, x_max), 3 * self.__p_L)
170
-
171
- ax = fig.add_subplot(111,
172
- aspect='equal',
173
- xlim=(-square_side, square_side),
174
- ylim=(-square_side, square_side),
175
- title=f"Inverted Pendulum {'LQR' if game.is_LQR() else 'Game'} Simulation")
176
-
177
- def init() -> (matplotlib.lines.Line2D, matplotlib.patches.Rectangle):
178
- ax.add_patch(cart)
179
- ax.add_line(pendulumArm)
180
-
181
- return pendulumArm, cart
182
-
183
- def animate(i: int) -> (matplotlib.lines.Line2D, matplotlib.patches.Rectangle):
184
- x_i, theta_i = x_t[i], theta_t[i]
185
- pendulum_x_coordinates = [x_i, x_i + self.__p_L * np.sin(theta_i)]
186
- pendulum_y_coordinates = [0, - self.__p_L * np.cos(theta_i)]
187
- pendulumArm.set_xdata(x=pendulum_x_coordinates)
188
- pendulumArm.set_ydata(y=pendulum_y_coordinates)
189
-
190
- cart_x_y = [x_i - cart.get_width() / 2, - cart.get_height()]
191
- cart.set_xy(xy=cart_x_y)
192
-
193
- return pendulumArm, cart
194
-
195
- ax.grid()
196
- t0 = time()
197
- animate(0)
198
- t1 = time()
199
-
200
- frames = game.L
201
- interval = game.T_f - (t1 - t0)
202
-
203
- anim = matplotlib.animationFuncAnimation(fig=fig,
204
- func=animate,
205
- init_func=init,
206
- frames=frames,
207
- interval=interval,
208
- blit=True)
209
- plt.show()
210
-
211
-
212
- def multiprocess_worker_function(x_T: float,
213
- theta_0: float,
214
- m_c: float,
215
- m_p: float,
216
- p_L: float,
217
- q: float,
218
- epsilon_x: float,
219
- epsilon_P: float) -> int:
220
- x_T = np.array([x_T, # x
221
- theta_0, # theta
222
- 0, # x_dot
223
- 0] # theta_dot
224
- )
225
- x_0 = np.zeros_like(x_T)
226
-
227
- inverted_pendulum_comparison = \
228
- InvertedPendulumComparison(m_c=m_c,
229
- m_p=m_p,
230
- p_L=p_L,
231
- q=q,
232
- x_0=x_0,
233
- x_T=x_T,
234
- epsilon_x=epsilon_x,
235
- epsilon_P=epsilon_P) # game class
236
- is_max_lqr = \
237
- inverted_pendulum_comparison(plot_state_spaces=False,
238
- run_animations=False
239
- )
240
-
241
- # inverted_pendulum_comparison.plot_two_state_spaces(non_linear=True)
242
- return int(is_max_lqr)
243
-
244
-
245
- if __name__ == '__main__':
246
- x_Ts = [10 ** p for p in [2]]
247
- theta_Ts = [np.pi / 2 + np.pi / n for n in [10]]
248
- m_cs = [10 ** p for p in [1, 2]]
249
- m_ps = [10 ** p for p in [0, 1, 2]]
250
- p_Ls = [10 ** p for p in [0, 1]]
251
- qs = [10 ** p for p in [-2, -1, 0, 1]]
252
- epsilon_xs = [10 ** (-7)]
253
- epsilon_Ps = [10 ** (-3)]
254
- params = [x_Ts, theta_Ts, m_cs, m_ps, p_Ls, qs, epsilon_xs, epsilon_Ps]
255
-
256
- PyDiffGameLQRComparison.run_multiprocess(multiprocess_worker_function=multiprocess_worker_function,
257
- values=params)
148
+ game.solve()
149
+ x_T = game.x_T if game.x_T is not None else np.zeros(game.n)
150
+ forward_time = game.forward_time
151
+ n_samples = len(forward_time)
152
+ delta = game.T_f / n_samples
153
+
154
+ def nonlinear_state_space(t: float, x_t: np.ndarray) -> np.ndarray:
155
+ x_tilde = x_t - x_T
156
+ # Aggregate physical-input gain at this time (already maps the
157
+ # per-player gains back to the physical input u = [F, M]).
158
+ sample = min(int(t / delta), n_samples - 1)
159
+ F_t, M_t = (-game._gain_at(sample) @ x_tilde).ravel()
160
+ _x, theta, _x_dot, theta_dot = x_t
161
+ x_ddot, theta_ddot = self._nonlinear_acceleration(theta, theta_dot, F_t, M_t)
162
+ return np.array([_x_dot, theta_dot, x_ddot, theta_ddot], dtype=float)
163
+
164
+ pendulum_state = solve_ivp(
165
+ fun=nonlinear_state_space,
166
+ t_span=[0.0, game.T_f],
167
+ y0=game.x_0,
168
+ t_eval=game.forward_time,
169
+ rtol=1e-6,
170
+ )
171
+
172
+ return pendulum_state.y
173
+
174
+
175
+ def multiprocess_worker_function(
176
+ x_T: float,
177
+ theta_0: float,
178
+ m_c: float,
179
+ m_p: float,
180
+ p_L: float,
181
+ q: float,
182
+ epsilon_x: float,
183
+ epsilon_P: float,
184
+ ) -> int:
185
+ """Worker for a parameter sweep: returns 1 if the LQR cost is the largest."""
186
+
187
+ x_T_vec = np.array([x_T, theta_0, 0.0, 0.0])
188
+ x_0 = np.zeros_like(x_T_vec)
189
+
190
+ comparison = InvertedPendulumComparison(
191
+ m_c=m_c,
192
+ m_p=m_p,
193
+ p_L=p_L,
194
+ q=q,
195
+ x_0=x_0,
196
+ x_T=x_T_vec,
197
+ T_f=10.0,
198
+ L=300,
199
+ epsilon_x=epsilon_x,
200
+ epsilon_P=epsilon_P,
201
+ )
202
+ comparison.run(plot_state_spaces=False)
203
+ lqr_cost, game_cost = comparison.costs()
204
+ return int(lqr_cost >= game_cost)
205
+
206
+
207
+ def main(*, plot: bool = True, save_figure: bool = False) -> None:
208
+ """Solve a single inverted-pendulum comparison and print the costs."""
209
+
210
+ m_c, m_p, p_L, q, r = 10.0, 1.0, 1.0, 1.0, 1.0
211
+ x_T = np.array([5.0, 0.0, 0.0, 0.0])
212
+ x_0 = np.array([0.0, np.pi / 18, 0.0, 0.0])
213
+
214
+ # The upright pendulum is open-loop unstable, so the coupled infinite-horizon
215
+ # game need not have a stabilising Nash equilibrium; use a finite horizon.
216
+ comparison = InvertedPendulumComparison(
217
+ m_c=m_c, m_p=m_p, p_L=p_L, q=q, r=r, x_0=x_0, x_T=x_T, T_f=10.0, L=300
218
+ )
219
+ comparison.run(plot_state_spaces=plot, save_figure=save_figure)
220
+
221
+ lqr_cost, game_cost = comparison.costs()
222
+ print(f"LQR cost = {lqr_cost:.4g}")
223
+ print(f"Game cost = {game_cost:.4g}")
224
+ print(f"All games controllable: {comparison.are_all_controllable()}")
225
+ print(f"LQR closed loop stable: {comparison[0].is_closed_loop_stable()}")
226
+ print(f"Game closed loop stable: {comparison[1].is_closed_loop_stable()}")
227
+ if plot:
228
+ show()
229
+
230
+
231
+ if __name__ == "__main__":
232
+ main(plot=False)