physkan 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ **/__pycache__/
@@ -0,0 +1,246 @@
1
+ # %% [markdown]
2
+ # # Bounded-KAN: Architecture Demonstrations
3
+ # This suite systematically proves the uncertainty-forwarding and gradient
4
+ # firewall mechanics of the Bounded-KAN architecture.
5
+
6
+ # %%
7
+ import torch
8
+ import torch.nn as nn
9
+ from physkan import KAN, KANDemonstrator, KANLinear
10
+
11
+ torch.manual_seed(42)
12
+
13
+ # Helper: Generate raw physical state (x)
14
+ def generate_x_data(x_min, x_max, steps=200):
15
+ return torch.linspace(x_min, x_max, steps).unsqueeze(1)
16
+
17
+ # Helper: Generate raw physical state (x) and angle (theta)
18
+ def generate_x_theta_train(steps=400):
19
+ x = torch.rand(steps, 1) * 2 - 1
20
+ # Full phase [-pi, pi] to break collinearity and ensure cos(theta) spans [-1, 1]
21
+ theta = torch.rand(steps, 1) * 2 * torch.pi - torch.pi
22
+ return torch.cat([x, theta], dim=1)
23
+
24
+ def generate_x_theta_eval(x_min, x_max, steps=200):
25
+ x = torch.linspace(x_min, x_max, steps).unsqueeze(1)
26
+ # Lock theta at 1.5 rad (~85 deg) so cos(theta) is near 0.07.
27
+ # This specifically exposes the naive multiplication trap for evaluation.
28
+ theta = torch.full((steps, 1), 1.5)
29
+ return torch.cat([x, theta], dim=1)
30
+
31
+ # %%
32
+ # %matplotlib inline
33
+
34
+ # %% [markdown]
35
+ # # 0a. Standard KAN Vulnerability (Arbitrary OOB)
36
+ # **Goal:** Mimic an unprotected KAN using narrow nominal bounds `(-1.0, 1.0)` and
37
+ # the default `SiLU` base activation. We train on the nominal range and extrapolate.
38
+ #
39
+ # **Result:** While our native clamp turns the violent out-of-bounds discontinuity
40
+ # into a plateau, the asymmetric nature of `SiLU` (linear for positive, zero for
41
+ # negative) makes extrapolation arbitrary. It grows on the right but flatlines on
42
+ # the left.
43
+
44
+ # %%
45
+ model_0a = KANLinear(
46
+ in_features=1,
47
+ out_features=1,
48
+ grid_size=5,
49
+ spline_order=3,
50
+ grid_range=(-1.0, 1.0),
51
+ base_activation=nn.SiLU
52
+ )
53
+ demo_0a = KANDemonstrator(model=model_0a, target_fn=lambda x: x**2)
54
+
55
+ demo_0a.train(generate_x_data(-1.0, 1.0, 100))
56
+ demo_0a.plot(generate_x_data(-4.0, 4.0, 200), "0a. Narrow Bounds (Arbitrary SiLU Asymmetry)")
57
+
58
+ # %% [markdown]
59
+ # # 0b. The "Wide Grid" Fallacy (Untrained Knot Collapse)
60
+ # **Goal:** Mimic a practitioner trying to fix 0a by expanding the bounds to cover
61
+ # the extrapolation limits `(-4.0, 4.0)`. We increase `grid_size` proportionally
62
+ # to maintain resolution.
63
+ #
64
+ # **Result:** B-splines have strictly local support. The knots in the `(1.0, 4.0)`
65
+ # region receive absolutely zero gradient updates during training. The prediction
66
+ # completely detaches from the physics and outputs chaotic initialization noise,
67
+ # proving why expanding bounds without data is mathematically unsafe.
68
+
69
+ # %%
70
+ model_0b = KANLinear(
71
+ in_features=1,
72
+ out_features=1,
73
+ grid_size=20, # Increased to maintain resolution over wider bounds
74
+ spline_order=3,
75
+ grid_range=(-4.0, 4.0), # The practitioner's "fix"
76
+ base_activation=nn.SiLU
77
+ )
78
+ demo_0b = KANDemonstrator(model=model_0b, target_fn=lambda x: x**2)
79
+
80
+ demo_0b.train(generate_x_data(-1.0, 1.0, 100))
81
+ demo_0b.plot(generate_x_data(-4.0, 4.0, 200), "0b. Wide Bounds Fallacy (Untrained Extrapolation)")
82
+
83
+ # %% [markdown]
84
+ # # 0c. The Data Sparsity Vulnerability (The Interpolation Hole)
85
+ # **Goal:** The practitioner now tries to train across the full wide grid `(-1.0, 4.0)`.
86
+ # However, real physical data has gaps. We filter out all training data between `2.0`
87
+ # and `3.5` to simulate a sparse transition regime (e.g., ships avoiding marginal weather).
88
+ #
89
+ # **Result:** Even though the bounds enclose all the data, the knots *inside the hole* # receive zero gradient updates. Instead of bridging the gap smoothly, the prediction
90
+ # violently collapses into the void, outputting untrained noise. This proves that
91
+ # relying purely on splines across sparse datasets destroys physical identification.
92
+
93
+ # %%
94
+ model_0c = KANLinear(
95
+ in_features=1,
96
+ out_features=1,
97
+ grid_size=20,
98
+ spline_order=3,
99
+ grid_range=(-1.0, 4.0),
100
+ base_activation=nn.SiLU
101
+ )
102
+ demo_0c = KANDemonstrator(model=model_0c, target_fn=lambda x: x**2)
103
+
104
+ # Generate full data, then explicitly mask out the (2.0 to 3.5) transition regime
105
+ x_train_0c = generate_x_data(-1.0, 4.0, steps=200)
106
+ x_train_sparse = x_train_0c[(x_train_0c[:, 0] < 1.5) | (x_train_0c[:, 0] > 3.5)]
107
+
108
+ demo_0c.train(x_train_sparse)
109
+ demo_0c.plot(generate_x_data(-4.0, 4.0, 200), "0c. Data Sparsity (Interpolation Hole Collapse)")
110
+
111
+ # %% [markdown]
112
+ # # 1. Spline Plateau (Symmetric Linear Track)
113
+ # **Goal:** Show how Bounded-KAN behaves with the strict `Identity` linear baseline.
114
+ #
115
+ # **Result:** Inside the bounds, the splines perfectly fit the curve. Out of bounds,
116
+ # the mechanical clamp safely freezes the splines to prevent chaotic oscillation.
117
+ # However, notice that the left-side extrapolation actually looks slightly *worse* # than the naive SiLU in Case 0a!
118
+ #
119
+ # **Why?** The symmetry exists in the training data, but *we structurally
120
+ # enforced* an asymmetric linear asymptote by using the strict `Identity` base track.
121
+ # The splines easily fit the symmetric parabola locally, while the base track absorbs
122
+ # a slight residual slope. When the splines clamp out of bounds, that raw linear slope
123
+ # is exposed. We intentionally trade the arbitrary, "lucky" flatlining of SiLU for
124
+ # strict, predictable linear extrapolation.
125
+ #
126
+ # **Try this:** If you know the physical domain is symmetric, you can pass `base_activation=torch.abs` when initializing the model
127
+ # to structurally enforce a symmetric V-shape out of bounds. While this makes the baseline extrapolation look slightly better,
128
+ # it is still just a linear approximation. In general, the explicit feature engineering demonstrated in step 2 is the preferred
129
+ # approach.
130
+
131
+ # %%
132
+ model_1 = KAN(layer_dims=[1, 1], grid_size=5, spline_order=3)
133
+ demo_1 = KANDemonstrator(model=model_1, target_fn=lambda x: x**2)
134
+
135
+ demo_1.train(generate_x_data(-1.0, 1.0, 100))
136
+ demo_1.plot(generate_x_data(-4.0, 4.0, 200), "1. Spline Plateau (Symmetric Linear Track)")
137
+
138
+ # %% [markdown]
139
+ # # 2a. Linear Recovery via Feature Engineering
140
+ # **Goal:** Provide $x^2$ as an engineered feature. Show that extrapolation now
141
+ # works perfectly because the unbroken linear track carries the out-of-bounds scaling.
142
+
143
+ # %%
144
+ model_2a = KAN(layer_dims=[2, 1], grid_size=5, spline_order=3)
145
+ demo_2a = KANDemonstrator(
146
+ model=model_2a,
147
+ target_fn=lambda x: x**2,
148
+ feature_fn=lambda x: torch.cat([x, x**2], dim=1)
149
+ )
150
+
151
+ demo_2a.train(generate_x_data(-1.0, 1.0, 100))
152
+ demo_2a.plot(generate_x_data(-4.0, 4.0, 200), "2. Linear Recovery (Engineered $x^2$)")
153
+
154
+ # %% [markdown]
155
+ # # 2b. Interval Protection (The Collinearity Fix)
156
+ # **Goal:** Use the `KANInteraction` module to compute the product using interval
157
+ # arithmetic. We feed the network $x^2$, $\cos(\theta)$, and their interaction.
158
+ #
159
+ # **Result:** Look at the bottom plot—the firewall worked perfectly! It recognized
160
+ # the massive out-of-bounds variance of $x$ and slammed the severity $D$ up to 6.0.
161
+ # However, the physical prediction (top plot) overshoots. Why? Spurious correlation.
162
+ # During training, the network got lazy. Instead of relying purely on the interaction
163
+ # feature, it put weight on the raw $x^2$ feature, and used the splines to cancel
164
+ # out the error. When extrapolated, the splines clamped, the cancellation stopped,
165
+ # and the raw $x^2$ error shot up. This proves why severity tracking is non-negotiable!
166
+
167
+ # %%
168
+ model_2b = KAN(layer_dims=[1, 1], interaction_map=[[0, 0]], grid_size=5, spline_order=3)
169
+ demo_2b = KANDemonstrator(
170
+ model=model_2b,
171
+ target_fn=lambda x: x**2,
172
+ feature_fn=lambda x: x
173
+ )
174
+
175
+ demo_2b.train(generate_x_data(-1.0, 1.0, 100))
176
+ demo_2b.plot(generate_x_data(-4.0, 4.0, 200), "2. Linear Recovery (Engineered $x^2$)")
177
+
178
+ # %% [markdown]
179
+ # # 2c. The Dropout Fix (Forcing Physical Isolation)
180
+ # **Goal:** How do we stop the network from using splines as a crutch to hide bad
181
+ # linear weights? We introduce **Spline Dropout**. By randomly zeroing out the
182
+ # splines during training, the linear track is forced to explain as much as possible of the physical features.
183
+ #
184
+ # **Result:** The linear track sets the weight of pure $x^2$ to zero, and the weight
185
+ # of the interaction feature to 1.0. The physical prediction is now perfectly flat
186
+ # (matching the true physics), AND the severity firewall remains fully active.
187
+
188
+ # %%
189
+ model_2c = KAN(layer_dims=[1, 1], interaction_map=[[0, 0]], grid_size=5, spline_order=3, spline_dropout=0.05)
190
+ demo_2c = KANDemonstrator(
191
+ model=model_2c,
192
+ target_fn=lambda x: x**2,
193
+ feature_fn=lambda x: x
194
+ )
195
+
196
+ demo_2c.train(generate_x_data(-1.0, 1.0, 100))
197
+ demo_2c.plot(generate_x_data(-4.0, 4.0, 200), "2c. The Dropout Fix (Perfect Physics + Firewall)")
198
+
199
+ # %% [markdown]
200
+ # # 3a. Protected Interaction Layer
201
+ # **Goal:** Use the `KANInteraction` module to compute the product.
202
+ # The interval arithmetic accurately assesses the high variance, raises a severe dual $D$,
203
+ # and slams the gradient firewall shut. Extrapolation plateaus safely.
204
+
205
+ # %%
206
+ model_3a = KAN(layer_dims=[2, 1], interaction_map=[[0, 1]], grid_size=5, spline_order=3, spline_dropout=0.05)
207
+ demo_3a = KANDemonstrator(
208
+ model=model_3a,
209
+ target_fn=lambda x: (x[:, 0:1]**2) * torch.cos(x[:, 1:2]),
210
+ feature_fn=lambda x: torch.cat([x[:, 0:1]**2, torch.cos(x[:, 1:2])], dim=1)
211
+ )
212
+
213
+ demo_3a.train(generate_x_theta_train())
214
+ demo_3a.plot(generate_x_theta_eval(-4.0, 4.0), "3a. Interval Protection (Interaction Firewall)")
215
+
216
+ # %% [markdown]
217
+ # # 3b. Deep Network Feature Discovery
218
+ # **Goal:** Remove explicit interaction mapping. Provide just $x^2$ and $\cos(\theta)$
219
+ # to a deeper network (`[2, 4, 1]`) to let it learn the interaction. Show that
220
+ # the dual mathematically compounds through the linear matrices, protecting the entire depth.
221
+
222
+ # %%
223
+ model_3b = KAN(layer_dims=[2, 4, 1], grid_size=5, spline_order=3)
224
+ demo_3b = KANDemonstrator(
225
+ model=model_3b,
226
+ target_fn=lambda x: (x[:, 0:1]**2) * torch.cos(x[:, 1:2]),
227
+ feature_fn=lambda x: torch.cat([x[:, 0:1]**2, torch.cos(x[:, 1:2])], dim=1)
228
+ )
229
+
230
+ demo_3b.train(generate_x_theta_train(steps=800), epochs=1000)
231
+ demo_3b.plot(generate_x_theta_eval(-4.0, 4.0), "3b. Deep Discovery (Matrix Dual Routing)")
232
+
233
+ # %% [markdown]
234
+ # **The Takeaway:** The severity firewall ($D$) still spikes perfectly, alerting us that
235
+ # we have left the data-driven regime. However, because the deep network relies on
236
+ # fragile, unconstrained spline combinations to fake multiplication, the physical
237
+ # extrapolation shape becomes erratic.
238
+ #
239
+ # This highlights a crucial philosophical point: high severity ($D$) doesn't inherently
240
+ # mean "danger"—it simply means the model is now relying entirely on its structural priors.
241
+ # If those priors are unconstrained deep networks, extrapolation is chaotic. But if we
242
+ # engineer those priors correctly, we can extrapolate safely and indefinitely.
243
+ #
244
+ # If deep KANs and automated feature discovery are part of your plans, please proceed
245
+ # to `demo_deep.py` to see how we leash the beast!
246
+ # %%
@@ -0,0 +1,147 @@
1
+ # %% [markdown]
2
+ # # Bounded-KAN: Architecture Demonstrations
3
+ # This suite systematically proves the uncertainty-forwarding and gradient
4
+ # firewall mechanics of the Bounded-KAN architecture.
5
+
6
+ # %%
7
+ import torch
8
+ import torch.nn as nn
9
+ from physkan import KAN, KANDemonstrator, KANLinear
10
+
11
+ torch.manual_seed(42)
12
+
13
+ # Helper: Generate raw physical state (x)
14
+ def generate_x_data(x_min, x_max, steps=200):
15
+ return torch.linspace(x_min, x_max, steps).unsqueeze(1)
16
+
17
+ # Helper: Generate raw physical state (x) and angle (theta)
18
+ def generate_x_theta_train(steps=400):
19
+ x = torch.rand(steps, 1) * 2 - 1
20
+ # Full phase [-pi, pi] to break collinearity and ensure cos(theta) spans [-1, 1]
21
+ theta = torch.rand(steps, 1) * 2 * torch.pi - torch.pi
22
+ return torch.cat([x, theta], dim=1)
23
+
24
+ def generate_x_theta_eval(x_min, x_max, steps=200):
25
+ x = torch.linspace(x_min, x_max, steps).unsqueeze(1)
26
+ # Lock theta at 1.5 rad (~85 deg) so cos(theta) is near 0.07.
27
+ # This specifically exposes the naive multiplication trap for evaluation.
28
+ theta = torch.full((steps, 1), 1.5)
29
+ return torch.cat([x, theta], dim=1)
30
+
31
+ # %%
32
+ # %matplotlib inline
33
+
34
+ # %% [markdown]
35
+ # # 3b. Deep Network Feature Discovery
36
+ # We repeat this final example from `demo.py` for context, with takeaway
37
+ # "... high severity ($D$) doesn't inherently
38
+ # mean "danger"—it simply means the model is now relying entirely on its structural priors.
39
+ # If those priors are unconstrained deep networks, extrapolation is chaotic. But if we
40
+ # engineer those priors correctly, we can extrapolate safely and indefinitely.".
41
+
42
+ # %%
43
+ torch.manual_seed(42)
44
+ model_3b = KAN(layer_dims=[2, 4, 1], grid_size=5, spline_order=3, spline_dropout=0.1)
45
+ demo_3b = KANDemonstrator(
46
+ model=model_3b,
47
+ target_fn=lambda x: (x[:, 0:1]**2) * torch.cos(x[:, 1:2]),
48
+ feature_fn=lambda x: torch.cat([x[:, 0:1]**2, torch.cos(x[:, 1:2])], dim=1)
49
+ )
50
+
51
+ demo_3b.train(generate_x_theta_train(steps=800), epochs=1000)
52
+ demo_3b.plot(generate_x_theta_eval(-4.0, 4.0), "3b. Deep Discovery (Matrix Dual Routing)")
53
+
54
+ # %% [markdown]
55
+ # # 3c. Deep Network Feature Discovery, a hybrid approach
56
+ # ...hybrid polynomial/kan...
57
+
58
+ # %%
59
+ torch.manual_seed(42)
60
+ model_3c = KAN(layer_dims=[2, 4, 1], grid_size=5, spline_order=3, symbolic_order=2, spline_dropout=0.8)
61
+ demo_3c = KANDemonstrator(
62
+ model=model_3c,
63
+ target_fn=lambda x: (x[:, 0:1]**2) * torch.cos(x[:, 1:2]),
64
+ feature_fn=lambda x: torch.cat([x[:, 0:1]**2, torch.cos(x[:, 1:2])], dim=1)
65
+ )
66
+
67
+ demo_3c.train(generate_x_theta_train(steps=800), epochs=1000)
68
+ demo_3c.plot(generate_x_theta_eval(-4.0, 4.0), "3b. Hybrid Deep Discovery")
69
+
70
+ # %% [markdown]
71
+ # # 4a. Multi-target Surgical Detachment
72
+ #
73
+ # **Goal:** Demonstrate that the Dual Severity Tracker is not a global panic button, but a surgical, node-specific diagnostic tool.
74
+ #
75
+ # We will map a system with two outputs:
76
+ # * $y_1$ relies heavily on an $x^2$ anomaly.
77
+ # * $y_2$ is highly insulated, relying on a stable $\cos(\theta)$ feature and a tiny fractional coefficient of $x$.
78
+ #
79
+ # **The Expectation:** When $x$ goes violently out of bounds, the network should aggressively firewall $y_1$ (high severity) while leaving $y_2$ almost completely untouched. The severity is quarantined because the underlying linear weights strictly dictate the localized interval routing ($|W| \cdot D$).
80
+
81
+ # %%
82
+ torch.manual_seed(42)
83
+
84
+ def target_multi(x):
85
+ # y1 is highly sensitive to the out-of-bounds explosion
86
+ y1 = x[:, 0:1]**2
87
+ # y2 is insulated, relying mostly on bounded cos(theta)
88
+ y2 = (1e-3 * x[:, 0:1]) + torch.cos(x[:, 1:2])
89
+ return torch.cat([y1, y2], dim=1)
90
+
91
+ def feature_multi(x):
92
+ # Provide the exact bases so the symbolic track can perfectly map the weights
93
+ return torch.cat([
94
+ x[:, 0:1], # Raw x
95
+ x[:, 0:1]**2, # The x^2 anomaly
96
+ torch.cos(x[:, 1:2]) # The bounded periodic feature
97
+ ], dim=1)
98
+
99
+ # We use 3 inputs for the 3 explicit features.
100
+ # symbolic_order=1 lets the global skip-connection effortlessly lock onto the correct features.
101
+ model_4a = KAN(
102
+ layer_dims=[3, 4, 2],
103
+ grid_size=5,
104
+ spline_order=3,
105
+ symbolic_order=1,
106
+ spline_dropout=0.8
107
+ )
108
+
109
+ demo_4a = KANDemonstrator(
110
+ model=model_4a,
111
+ target_fn=target_multi,
112
+ feature_fn=feature_multi
113
+ )
114
+
115
+ demo_4a.train(generate_x_theta_train(steps=800), epochs=1000)
116
+ demo_4a.plot(generate_x_theta_eval(-4.0, 4.0), "4a. Multi-target Surgical Detachment")
117
+
118
+ # %%
119
+ # 4b. Perfect Surgical Detachment (Shallow Architecture)
120
+ def target_multi(x):
121
+ # y1 is highly sensitive to the out-of-bounds explosion
122
+ y1 = x[:, 0:1]**2
123
+ # y2 is insulated, relying mostly on bounded cos(theta)
124
+ y2 = (1e-3 * x[:, 0:1]) + torch.cos(x[:, 1:2])
125
+ return torch.cat([y1, y2], dim=1)
126
+
127
+ def feature_multi(x):
128
+ # Provide the exact bases so the symbolic track can perfectly map the weights
129
+ return torch.cat([
130
+ x[:, 0:1], # Raw x
131
+ x[:, 0:1]**2, # The x^2 anomaly
132
+ torch.cos(x[:, 1:2]) # The bounded periodic feature
133
+ ], dim=1)
134
+
135
+ model_4b = KAN(
136
+ layer_dims=[3, 2], # NO hidden layers. Pure direct mapping.
137
+ grid_size=5,
138
+ spline_order=3,
139
+ symbolic_order=1,
140
+ spline_dropout=0.8
141
+ )
142
+
143
+ demo_4b = KANDemonstrator(model=model_4b, target_fn=target_multi, feature_fn=feature_multi)
144
+ demo_4b.train(generate_x_theta_train(steps=800), epochs=1000)
145
+ demo_4b.plot(generate_x_theta_eval(-4.0, 4.0), "4b. Perfect Quarantine (Shallow)")
146
+
147
+ # %%
physkan-0.1.0/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Based on efficient-kan, copyright (c) 2024 Huanqi Cao.
4
+ Modifications copyright (c) 2026 Simula Research Laboratory.
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
physkan-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,184 @@
1
+ Metadata-Version: 2.4
2
+ Name: physkan
3
+ Version: 0.1.0
4
+ Summary: A physics-constrained Kolmogorov-Arnold Network with bounded latent spaces.
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.12
7
+ Requires-Dist: torch>=2.9.0
8
+ Provides-Extra: dev
9
+ Requires-Dist: ruff; extra == 'dev'
10
+ Description-Content-Type: text/markdown
11
+
12
+ # PhysKAN
13
+
14
+ **Physics-constrained Kolmogorov-Arnold Networks for stable system identification**
15
+
16
+ This repository provides a structural adaptation of the B-spline Kolmogorov-Arnold Network (KAN) architecture, designed for physical system identification, digital twins, and robust regression.
17
+
18
+ While standard KANs perform well at function approximation in purely mathematical domains, applying them to physical telemetry often requires interventions, like dynamic grid updates or statistical normalization such as LayerNorm, to handle out-of-bounds (OOB) anomalies.
19
+ In this context, OOB refers to any data point that exceeds the nominal operational range of the system, whether caused by a real but long-tail phenomenon (e.g., unseen weather regimes) or a transient sensor failure (e.g., signal spikes).
20
+ Unfortunately, these standard deep learning techniques remove the spatial meaning of the network's internal variables.
21
+
22
+ This architecture addresses this by freezing the spatial grid and enforcing strict physical bounds natively, prioritizing metric stability and OOB safety over localized curve-fitting flexibility.
23
+ It also uses forward uncertainty propagation with interval arithmetic to track the OOB state through the network.
24
+
25
+
26
+ ## Core design philosophy
27
+
28
+ PhysKAN is built on three central ideas, meant to bridge the gap between theoretical non-linear mapping and the robust fail-safes required for physical engineering:
29
+
30
+ 1. **Progressive Koopman-style unbending:** Rather than relying on black-box MLP node activations, the model acts as a structural filter.
31
+ It uses constrained B-splines to progressively unbend non-linear physical inputs layer-by-layer, lifting them into a linearized latent space (analogous to finding "observables" in Koopman Operator Theory).
32
+
33
+ 2. **Embrace out-of-bounds (OOB) values:** Real-world physics do not stay neatly within standardized grids.
34
+ Instead of arbitrarily squashing long-tail events or sensor glitches with clamps or global activations, the architecture uses the grid range to explicitly define the boundary between the dense, well-modeled operational regime and the sparse, asymptotic tail.
35
+ OOB states are safely clamped on the non-linear spline track and routed unclamped through a parallel linear track, ensuring mathematically stable extrapolation.
36
+
37
+ 3. **Epistemic uncertainty tracking:** The network computes a continuous dual property alongside the physical prediction.
38
+ This signal forward-propagates the mathematical severity of any out-of-bounds state, providing a deterministic measure of when the network is forced to extrapolate.
39
+
40
+ ---
41
+
42
+ ## Under the hood: the OOB routing mechanism
43
+
44
+ To safely execute this philosophy, the network requires a specific mental model for how it routes data—especially during the backward pass.
45
+
46
+ In standard implementations, out-of-bounds data either "falls off" the spline grid entirely (dropping to zero) or requires the input to be clamped or bounded.
47
+ However, if clamped *without* gradient detachment, the boundary knot absorbs the training loss for all out-of-bounds states.
48
+ It becomes a wastebasket for outlying values, compressing the long-tail distribution into a single coordinate and warping predictions for nominal operations.
49
+
50
+ The PhysKAN architecture acts as a traffic cop for physical regimes:
51
+ * **The nominal regime (non-linear track):** Dense, expected data operates inside the grid, shaping the non-linear B-splines.
52
+ * **The out-of-bounds regime (linear track):** OOB data are clamped on the non-linear track (with detached gradients to protect the nominal-range knots).
53
+ The excess signal flows entirely through the linear track.
54
+
55
+ This ensures the non-linear splines strictly learn the nominal physics, while the linear track safely catches long-tail events.
56
+
57
+ ## Architectural constraints
58
+
59
+ To maintain the absolute physical meaning of these latent observables during deployment, the model relies on two structural constraints:
60
+
61
+ ### 1. Static grid boundaries
62
+
63
+ KAN architectures often rely on dynamic grid updates (knot insertion or movement) during training.
64
+ This architecture disables this.
65
+ Dynamic updates shift the underlying coordinate system of the network mid-training, causing downstream layers to lose their physical calibration.
66
+ By enforcing a static grid, the model sacrifices some theoretical curve-fitting capacity to guarantee that a specific latent state retains its exact metric meaning from initialization to deployment.
67
+
68
+ ### 2. Linear skip connections as safety valves
69
+ Because the spline gradients are detached for OOB values, the network routes the excess gradients entirely through the parallel linear skip connection.
70
+ This serves as a vital safety valve: it protects the non-linear splines from gradient pollution, and it ensures that OOB inputs extrapolate linearly and predictably.
71
+ This limits the downstream impact of anomalies, making system filtering more reliable.
72
+
73
+ #### Justification for linear extrapolation (physical basis functions)
74
+
75
+ While real-world OOB events often exhibit higher-order scaling (e.g., cubic wave resistance), the model enforces a linear default for OOB extrapolation.
76
+ This is a deliberate design choice to prevent mathematical instability caused by sensor faults.
77
+
78
+ To safely capture higher-order OOB physics, domain knowledge should be embedded directly via feature engineering.
79
+ As long as the input features form a sufficient physical basis, particularly for asymptotic behaviours, the linear skip connection will naturally capture higher-order OOB phenomena as a linear combination of features without compromising the nominal operating region.
80
+
81
+ Applying a post-summation node activation (such as `SiLU` or `tanh`) fundamentally sabotages this mechanism.
82
+ A non-linear activation will warp the magnitude of the OOB event, rendering the linear skip connection unable to model it.
83
+ For this reason, activations are disabled by default (using `Identity`).
84
+ Other activations may be selected, but beware that the guarantees provided by "standard" PhysKAN may be weakened or destroyed.
85
+
86
+ ## Feature engineering and explicit interactions
87
+
88
+ Deep architectures can theoretically learn multiplicative interactions (such as computing `x * y` by combining multiple layers).
89
+ Making the network deduce these relationships from scratch consumes capacity and degrades poorly when out-of-bounds.
90
+ Instead, to capture known physical behaviors, domain knowledge should be embedded directly via feature engineering.
91
+ Providing the network with a dictionary of physical basis functions (e.g., `x^2` or `cos(θ)`) allows the linear skip connection to latch onto these engineered features as a stable baseline.
92
+ This leaves the splines to map the local residuals, ensuring safe extrapolation when the splines saturate.
93
+
94
+ However, combining features naively can mask out-of-bounds anomalies.
95
+ If you manually pre-compute an interaction like `wave_height * cos(wind_dir)` and pass it to the network as a raw input, the anomaly signal is suppressed.
96
+ For instance, if `wave_height` is OOB (e.g., twice nominal range) but `cos(wave_dir)` is near zero, their product is well within nominal bounds.
97
+ The model treats this as a regular in-bounds prediction, and uses the data point to update its nominal-range spline.
98
+
99
+ To prevent this suppression, the network requires interaction terms to be defined internally via an `interaction_map` rather than expanded manually beforehand.
100
+
101
+ The network computes a continuous dual property alongside the standard physical prediction.
102
+ This dual represents the mathematical severity of the out-of-bounds state.
103
+ * The *physical prediction* is computed using the non-linear splines and the linear track.
104
+ * The *dual severity* strictly bypasses the splines and propagates via the absolute values of the linear weights, ensuring that uncertainties compound and never cancel out.
105
+
106
+ By defining interactions explicitly through the `interaction_map`, the model correctly applies the uncertainty product rule to the input features before they enter the network.
107
+ If a large wave anomaly interacts with a nominal-range cosine, the resulting interaction term inherits a proportional severity score.
108
+ This deterministic distress signal persists through the entire depth of the network, ensuring that the non-linear splines are firewalled from learning from the anomaly, while the linear track safely handles the extrapolated magnitude.
109
+ It also provides downstream consumers with a clear indicator of when the model is operating on dodgy data.
110
+
111
+ ### Defining the nominal range: data density vs. physical limits
112
+
113
+ When defining the `grid_range` and normalizing inputs, the boundaries should reflect the density of the training data rather than the theoretical limits of the physical system.
114
+
115
+ B-splines require consistent data distribution across their internal grid to form a stable curve.
116
+ If for example a physical feature (such as wave height) has a theoretical operational limit of 5.0 meters, but the training dataset becomes sparse above 2.0 meters, setting the spline boundary to 5.0 meters forces the model to fit curves in an under-constrained region.
117
+ This often causes the splines to oscillate or overfit to a handful of isolated data points.
118
+
119
+ Instead, the grid boundary should be placed where the data density noticeably drops off (e.g., at 2.0 meters).
120
+ By treating the sparse region as out-of-bounds, the network safely clamps the splines in the dense region and relies on the linear track to extrapolate smoothly through the sparse tail.
121
+ The working principle is to treat the nominal range strictly as the bounds of the dense training data.
122
+
123
+ ## Installation
124
+
125
+ You can install the package directly from GitHub:
126
+
127
+ ```bash
128
+ pip install git+[https://github.com/simula/physkan.git](https://github.com/simula/physkan.git)
129
+
130
+ ## Usage example
131
+
132
+ The model handles explicit feature expansion and interval arithmetic internally. A standard linear layer should be used as the final readout.
133
+
134
+ ```python
135
+ import torch
136
+ import torch.nn as nn
137
+ from physkan import KAN
138
+
139
+ # Define explicit cross-terms using indices
140
+ # e.g., for features [wave, wind, cos_dir]:
141
+ # [0, 0] adds wave^2
142
+ # [0, 2] adds wave * cos_dir
143
+ interactions = [[0, 0], [0, 2]]
144
+
145
+ # The KAN model automatically expands the initial input dimension
146
+ # and sets up the continuous dual routing.
147
+ kan_encoder = KAN(
148
+ layers_dims=[3, 16, 8], # Input dim is 3 (wave, wind, cos_dir)
149
+ grid_range=(0.0, 1.0),
150
+ interaction_map=interactions
151
+ )
152
+
153
+ # The readout: Linear combination of the final observables of a zero-at-rest (unbiased) system
154
+ linear_mixer = nn.Linear(in_features=8, out_features=1, bias=False)
155
+
156
+ model = nn.Sequential(
157
+ kan_encoder,
158
+ linear_mixer
159
+ )
160
+
161
+ # Nominal physical data
162
+ x_nominal = torch.tensor([[0.5, 0.8, 0.1]])
163
+
164
+ # Pass data through the encoder, requesting the dual distress signal
165
+ latent_features, severity_signal = kan_encoder(x_nominal, return_dual=True)
166
+ prediction = linear_mixer(latent_features)
167
+
168
+ # For an out-of-bounds event (e.g., wave height sensor reads 5.0)
169
+ x_oob = torch.tensor([[5.0, 0.8, 0.1]])
170
+ latent_oob, severity_oob = kan_encoder(x_oob, return_dual=True)
171
+
172
+ # severity_oob > 0 indicates the prediction relies on mathematically
173
+ # extrapolated values, allowing downstream logic to trigger heuristics.
174
+ if severity_oob.mean() > 0.0:
175
+ print("Warning: operating in uncharted physical regime.")
176
+ ```
177
+
178
+ ## Attribution
179
+
180
+ This repository is an adaptation of the excellent **[efficient-kan](https://github.com/Blealtan/efficient-kan)** library by Blealtan.
181
+
182
+ The core B-spline evaluation mechanics, memory-efficient tensor formulation, and foundational matrix operations are directly derived from `efficient-kan`.
183
+ The modifications introduced here are strictly architectural (specifically the detached routing, strict boundary clamping, interval arithmetic dual, and default identity activations) designed to constrain the network for physical system identification.
184
+ Full credit for the underlying efficiency and base implementation belongs to the original author.