phantomrt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atlas/__init__.py +3 -0
- atlas/agents/__init__.py +8 -0
- atlas/agents/command_space.py +227 -0
- atlas/analysis/__init__.py +3 -0
- atlas/analysis/binary_agent.py +488 -0
- atlas/analysis/binary_fuzz.py +389 -0
- atlas/analysis/frida_live.py +261 -0
- atlas/analysis/graph_annotator.py +147 -0
- atlas/analysis/spectrida_bridge.py +84 -0
- atlas/analysis/unicorn_harness.py +337 -0
- atlas/core/__init__.py +14 -0
- atlas/core/decoder.py +65 -0
- atlas/core/dynamics.py +217 -0
- atlas/core/encoder.py +120 -0
- atlas/core/surprise.py +145 -0
- atlas/core/world_model.py +334 -0
- atlas/environments/__init__.py +5 -0
- atlas/environments/base.py +51 -0
- atlas/environments/grid_world.py +219 -0
- atlas/environments/physics_2d.py +283 -0
- atlas/environments/vm_world.py +168 -0
- atlas/knowledge/__init__.py +3 -0
- atlas/knowledge/instruction_vocab.py +534 -0
- atlas/monitor/__init__.py +5 -0
- atlas/monitor/execution_monitor.py +518 -0
- atlas/optimization/__init__.py +6 -0
- atlas/optimization/speed.py +457 -0
- atlas/planning/__init__.py +4 -0
- atlas/planning/goal.py +100 -0
- atlas/planning/mcts.py +228 -0
- atlas/training/__init__.py +4 -0
- atlas/training/continual.py +392 -0
- atlas/training/growth.py +213 -0
- atlas/training/loop.py +306 -0
- atlas/training/losses.py +101 -0
- atlas/training/self_train.py +307 -0
- atlas/utils/__init__.py +4 -0
- atlas/utils/logging.py +33 -0
- atlas/utils/math_helpers.py +30 -0
- atlas/utils/viz.py +136 -0
- atlas/vm/__init__.py +4 -0
- atlas/vm/wsl_vm.py +249 -0
- phantomrt-0.1.0.dist-info/METADATA +75 -0
- phantomrt-0.1.0.dist-info/RECORD +48 -0
- phantomrt-0.1.0.dist-info/WHEEL +5 -0
- phantomrt-0.1.0.dist-info/entry_points.txt +3 -0
- phantomrt-0.1.0.dist-info/licenses/LICENSE +21 -0
- phantomrt-0.1.0.dist-info/top_level.txt +1 -0
atlas/core/dynamics.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dynamics Model: The Physics Engine
|
|
3
|
+
|
|
4
|
+
This is the HEART of the world model.
|
|
5
|
+
It learns HOW THE WORLD EVOLVES over time.
|
|
6
|
+
|
|
7
|
+
Instead of discrete layers, we use a Neural ODE —
|
|
8
|
+
a continuous differential equation that describes
|
|
9
|
+
how the state changes:
|
|
10
|
+
|
|
11
|
+
dx/dt = f(x, action)
|
|
12
|
+
|
|
13
|
+
This is fundamentally different from transformers:
|
|
14
|
+
- Continuous, not discrete
|
|
15
|
+
- Learns dynamics, not patterns
|
|
16
|
+
- Can simulate arbitrary time horizons
|
|
17
|
+
- Naturally handles variable-speed events
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import torch
|
|
21
|
+
import torch.nn as nn
|
|
22
|
+
from torchdiffeq import odeint
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DynamicsFunction(nn.Module):
|
|
26
|
+
"""
|
|
27
|
+
Neural network that defines the dynamics: dx/dt = f(x, action)
|
|
28
|
+
|
|
29
|
+
Input: (state, action) concatenated
|
|
30
|
+
Output: derivative dx/dt (rate of change of state)
|
|
31
|
+
|
|
32
|
+
The network learns the PHYSICS of the environment —
|
|
33
|
+
how objects move, interact, and change over time.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, state_dim: int, action_dim: int, hidden_dim: int = 512, num_layers: int = 3):
|
|
37
|
+
super().__init__()
|
|
38
|
+
|
|
39
|
+
self.state_dim = state_dim
|
|
40
|
+
self.action_dim = action_dim
|
|
41
|
+
|
|
42
|
+
# Build dynamics network
|
|
43
|
+
layers = []
|
|
44
|
+
input_dim = state_dim + action_dim
|
|
45
|
+
|
|
46
|
+
for i in range(num_layers):
|
|
47
|
+
output_dim = hidden_dim if i < num_layers - 1 else state_dim
|
|
48
|
+
layers.extend([
|
|
49
|
+
nn.Linear(input_dim if i == 0 else hidden_dim, hidden_dim),
|
|
50
|
+
nn.LayerNorm(hidden_dim),
|
|
51
|
+
nn.SiLU(),
|
|
52
|
+
])
|
|
53
|
+
# Skip connection every 2 layers for gradient flow
|
|
54
|
+
if i > 0 and i % 2 == 0:
|
|
55
|
+
layers.append(SkipConnection(hidden_dim))
|
|
56
|
+
|
|
57
|
+
# Final layer outputs the derivative
|
|
58
|
+
layers.append(nn.Linear(hidden_dim, state_dim))
|
|
59
|
+
|
|
60
|
+
self.net = nn.Sequential(*layers)
|
|
61
|
+
|
|
62
|
+
# Initialize last layer with small weights
|
|
63
|
+
# (prevents explosive dynamics at the start)
|
|
64
|
+
last_layer = self.net[-1]
|
|
65
|
+
nn.init.xavier_uniform_(last_layer.weight, gain=0.01)
|
|
66
|
+
nn.init.zeros_(last_layer.bias)
|
|
67
|
+
|
|
68
|
+
def forward(self, state: torch.Tensor, t: torch.Tensor, action: torch.Tensor) -> torch.Tensor:
|
|
69
|
+
"""
|
|
70
|
+
Compute the derivative dx/dt.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
state: [batch, state_dim] current state
|
|
74
|
+
t: scalar or [batch] current time (needed for ODE solver)
|
|
75
|
+
action: [batch, action_dim] action being taken
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
dx_dt: [batch, state_dim] rate of change of state
|
|
79
|
+
"""
|
|
80
|
+
# Concatenate state and action
|
|
81
|
+
x = torch.cat([state, action], dim=-1)
|
|
82
|
+
|
|
83
|
+
# Compute derivative
|
|
84
|
+
dx_dt = self.net(x)
|
|
85
|
+
|
|
86
|
+
# Optional: clamp derivatives to prevent explosion
|
|
87
|
+
dx_dt = torch.clamp(dx_dt, min=-10.0, max=10.0)
|
|
88
|
+
|
|
89
|
+
return dx_dt
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class SkipConnection(nn.Module):
|
|
93
|
+
"""Simple skip connection for better gradient flow."""
|
|
94
|
+
|
|
95
|
+
def __init__(self, dim: int):
|
|
96
|
+
super().__init__()
|
|
97
|
+
self.norm = nn.LayerNorm(dim)
|
|
98
|
+
|
|
99
|
+
def forward(self, x):
|
|
100
|
+
return x + self.norm(x)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class NeuralODE(nn.Module):
|
|
104
|
+
"""
|
|
105
|
+
Wraps a dynamics function with an ODE solver.
|
|
106
|
+
|
|
107
|
+
This lets us:
|
|
108
|
+
1. Integrate forward in time (simulate the future)
|
|
109
|
+
2. Backpropagate through the ODE solver (adjoint method)
|
|
110
|
+
3. Use adaptive step sizing (accuracy + efficiency)
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(
|
|
114
|
+
self,
|
|
115
|
+
dynamics_fn: DynamicsFunction,
|
|
116
|
+
solver: str = "dopri5",
|
|
117
|
+
dt: float = 0.05,
|
|
118
|
+
rtol: float = 1e-3,
|
|
119
|
+
atol: float = 1e-4,
|
|
120
|
+
):
|
|
121
|
+
super().__init__()
|
|
122
|
+
self.dynamics_fn = dynamics_fn
|
|
123
|
+
self.solver = solver
|
|
124
|
+
self.dt = dt
|
|
125
|
+
self.rtol = rtol
|
|
126
|
+
self.atol = atol
|
|
127
|
+
|
|
128
|
+
def forward(
|
|
129
|
+
self,
|
|
130
|
+
initial_state: torch.Tensor,
|
|
131
|
+
actions: torch.Tensor,
|
|
132
|
+
time_horizon: float = None,
|
|
133
|
+
) -> torch.Tensor:
|
|
134
|
+
"""
|
|
135
|
+
Roll out the dynamics forward in time.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
initial_state: [batch, state_dim] starting state
|
|
139
|
+
actions: [batch, num_steps, action_dim] action sequence
|
|
140
|
+
OR [batch, action_dim] for single constant action
|
|
141
|
+
time_horizon: total simulation time (if None, len(actions) * dt)
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
trajectory: [batch, num_steps+1, state_dim]
|
|
145
|
+
(includes initial state at t=0)
|
|
146
|
+
"""
|
|
147
|
+
batch_size = initial_state.shape[0]
|
|
148
|
+
|
|
149
|
+
# Handle single action case
|
|
150
|
+
if actions.dim() == 2:
|
|
151
|
+
actions = actions.unsqueeze(1)
|
|
152
|
+
|
|
153
|
+
num_steps = actions.shape[1]
|
|
154
|
+
|
|
155
|
+
if time_horizon is None:
|
|
156
|
+
time_horizon = num_steps * self.dt
|
|
157
|
+
|
|
158
|
+
t_span = torch.linspace(0, time_horizon, num_steps + 1, device=initial_state.device)
|
|
159
|
+
|
|
160
|
+
# Interpolate actions at solver timesteps
|
|
161
|
+
def dynamics_with_action(t, state):
|
|
162
|
+
# Find the closest action for current time
|
|
163
|
+
action_idx = torch.clamp(
|
|
164
|
+
(t / self.dt).long(),
|
|
165
|
+
min=0,
|
|
166
|
+
max=num_steps - 1
|
|
167
|
+
)
|
|
168
|
+
# Get actions for this batch
|
|
169
|
+
action = actions[:, action_idx] # [batch, action_dim]
|
|
170
|
+
return self.dynamics_fn(state, t, action)
|
|
171
|
+
|
|
172
|
+
# Solve the ODE
|
|
173
|
+
trajectory = odeint(
|
|
174
|
+
dynamics_with_action,
|
|
175
|
+
initial_state,
|
|
176
|
+
t_span,
|
|
177
|
+
method=self.solver,
|
|
178
|
+
rtol=self.rtol,
|
|
179
|
+
atol=self.atol,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# trajectory shape: [num_timesteps, batch, state_dim]
|
|
183
|
+
# Transpose to: [batch, num_timesteps, state_dim]
|
|
184
|
+
trajectory = trajectory.transpose(0, 1)
|
|
185
|
+
|
|
186
|
+
return trajectory
|
|
187
|
+
|
|
188
|
+
def single_step(
|
|
189
|
+
self,
|
|
190
|
+
state: torch.Tensor,
|
|
191
|
+
action: torch.Tensor,
|
|
192
|
+
dt: float = None,
|
|
193
|
+
) -> torch.Tensor:
|
|
194
|
+
"""
|
|
195
|
+
Single-step prediction: state(t) → state(t+dt)
|
|
196
|
+
|
|
197
|
+
Faster than full rollout for training.
|
|
198
|
+
"""
|
|
199
|
+
if dt is None:
|
|
200
|
+
dt = self.dt
|
|
201
|
+
|
|
202
|
+
t_span = torch.tensor([0.0, dt], device=state.device)
|
|
203
|
+
|
|
204
|
+
def dynamics(t, s):
|
|
205
|
+
return self.dynamics_fn(s, t, action)
|
|
206
|
+
|
|
207
|
+
# Solve for one step
|
|
208
|
+
result = odeint(
|
|
209
|
+
dynamics,
|
|
210
|
+
state,
|
|
211
|
+
t_span,
|
|
212
|
+
method="euler", # fast for single step
|
|
213
|
+
options={"step_size": dt},
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Return final state: [batch, state_dim]
|
|
217
|
+
return result[-1]
|
atlas/core/encoder.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Encoder: Observation → Latent State
|
|
3
|
+
|
|
4
|
+
Compresses raw observations (images, vectors, etc.) into
|
|
5
|
+
a compact latent representation that captures the ESSENCE
|
|
6
|
+
of the current state.
|
|
7
|
+
|
|
8
|
+
Uses variational inference to output a probability distribution
|
|
9
|
+
over possible states, not just a single point. This lets the
|
|
10
|
+
model express UNCERTAINTY about what it's seeing.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import torch
|
|
14
|
+
import torch.nn as nn
|
|
15
|
+
import torch.nn.functional as F
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Encoder(nn.Module):
|
|
19
|
+
"""
|
|
20
|
+
Encodes observations into a latent state distribution.
|
|
21
|
+
|
|
22
|
+
Input: observation tensor [batch, obs_dim]
|
|
23
|
+
Output: (mean, log_variance) each [batch, latent_dim]
|
|
24
|
+
|
|
25
|
+
The latent state is sampled via the reparameterization trick:
|
|
26
|
+
z = mean + std * epsilon, where epsilon ~ N(0, 1)
|
|
27
|
+
|
|
28
|
+
This allows gradients to flow through the sampling process.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, obs_dim: int, latent_dim: int = 256, hidden_dims: list = None, dropout: float = 0.1):
|
|
32
|
+
super().__init__()
|
|
33
|
+
|
|
34
|
+
self.obs_dim = obs_dim
|
|
35
|
+
self.latent_dim = latent_dim
|
|
36
|
+
|
|
37
|
+
if hidden_dims is None:
|
|
38
|
+
hidden_dims = [512, 512]
|
|
39
|
+
|
|
40
|
+
# Build encoder network
|
|
41
|
+
layers = []
|
|
42
|
+
prev_dim = obs_dim
|
|
43
|
+
|
|
44
|
+
for h_dim in hidden_dims:
|
|
45
|
+
layers.extend([
|
|
46
|
+
nn.Linear(prev_dim, h_dim),
|
|
47
|
+
nn.LayerNorm(h_dim),
|
|
48
|
+
nn.SiLU(),
|
|
49
|
+
nn.Dropout(dropout),
|
|
50
|
+
])
|
|
51
|
+
prev_dim = h_dim
|
|
52
|
+
|
|
53
|
+
self.feature_net = nn.Sequential(*layers)
|
|
54
|
+
|
|
55
|
+
# Output heads: mean and log_variance of latent distribution
|
|
56
|
+
self.mean_head = nn.Linear(prev_dim, latent_dim)
|
|
57
|
+
self.log_var_head = nn.Linear(prev_dim, latent_dim)
|
|
58
|
+
|
|
59
|
+
# Initialize heads with small weights for stable training
|
|
60
|
+
nn.init.xavier_uniform_(self.mean_head.weight, gain=0.1)
|
|
61
|
+
nn.init.zeros_(self.mean_head.bias)
|
|
62
|
+
nn.init.xavier_uniform_(self.log_var_head.weight, gain=0.1)
|
|
63
|
+
nn.init.zeros_(self.log_var_head.bias)
|
|
64
|
+
|
|
65
|
+
def forward(self, observation: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
|
|
66
|
+
"""
|
|
67
|
+
Encode observation into latent distribution parameters.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
observation: [batch, obs_dim] raw observation
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
(mean, log_variance): each [batch, latent_dim]
|
|
74
|
+
"""
|
|
75
|
+
features = self.feature_net(observation)
|
|
76
|
+
mean = self.mean_head(features)
|
|
77
|
+
log_var = self.log_var_head(features)
|
|
78
|
+
|
|
79
|
+
# Clamp log_var to prevent numerical instability
|
|
80
|
+
# Range: [-10, 2] → std range: [~0.00005, ~7.4]
|
|
81
|
+
log_var = torch.clamp(log_var, min=-10.0, max=2.0)
|
|
82
|
+
|
|
83
|
+
return mean, log_var
|
|
84
|
+
|
|
85
|
+
def sample(self, mean: torch.Tensor, log_var: torch.Tensor) -> torch.Tensor:
|
|
86
|
+
"""
|
|
87
|
+
Sample from the latent distribution using reparameterization trick.
|
|
88
|
+
|
|
89
|
+
z = μ + σ * ε, where ε ~ N(0, I)
|
|
90
|
+
|
|
91
|
+
This is differentiable — gradients flow through the sample
|
|
92
|
+
back to the encoder parameters.
|
|
93
|
+
"""
|
|
94
|
+
std = torch.exp(0.5 * log_var) # convert log_var to std
|
|
95
|
+
epsilon = torch.randn_like(std) # random noise ~ N(0, 1)
|
|
96
|
+
z = mean + std * epsilon
|
|
97
|
+
return z
|
|
98
|
+
|
|
99
|
+
def encode(self, observation: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
|
100
|
+
"""
|
|
101
|
+
Full encoding: observation → (mean, log_var, sampled_state)
|
|
102
|
+
|
|
103
|
+
Convenience method that does everything in one call.
|
|
104
|
+
"""
|
|
105
|
+
mean, log_var = self.forward(observation)
|
|
106
|
+
z = self.sample(mean, log_var)
|
|
107
|
+
return mean, log_var, z
|
|
108
|
+
|
|
109
|
+
def kl_divergence(self, mean: torch.Tensor, log_var: torch.Tensor) -> torch.Tensor:
|
|
110
|
+
"""
|
|
111
|
+
KL divergence from the latent distribution to N(0, I).
|
|
112
|
+
|
|
113
|
+
KL(q(z|x) || p(z)) = -0.5 * Σ(1 + log(σ²) - μ² - σ²)
|
|
114
|
+
|
|
115
|
+
This regularizes the latent space to stay close to a standard
|
|
116
|
+
normal distribution. Prevents the model from cheating by
|
|
117
|
+
encoding everything into degenerate distributions.
|
|
118
|
+
"""
|
|
119
|
+
kl = -0.5 * torch.sum(1 + log_var - mean.pow(2) - log_var.exp(), dim=-1)
|
|
120
|
+
return kl.mean()
|
atlas/core/surprise.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Surprise Detection System
|
|
3
|
+
|
|
4
|
+
The KEY insight of the world model: only learn from SURPRISES.
|
|
5
|
+
|
|
6
|
+
If the model predicted reality correctly → no learning needed
|
|
7
|
+
If the model was WRONG → that's a surprise → UPDATE THE MODEL
|
|
8
|
+
|
|
9
|
+
This is how the brain works:
|
|
10
|
+
- You expect the floor to be there → you step on it → no surprise
|
|
11
|
+
- You expect the floor to be there → it's not → HUGE surprise → learning!
|
|
12
|
+
|
|
13
|
+
The surprise signal drives ALL learning in the model.
|
|
14
|
+
This is more efficient than backpropagating through everything —
|
|
15
|
+
we only allocate compute to things we got wrong.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import torch
|
|
19
|
+
import torch.nn.functional as F
|
|
20
|
+
from collections import deque
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SurpriseDetector:
|
|
24
|
+
"""
|
|
25
|
+
Detects when the world model's predictions don't match reality.
|
|
26
|
+
|
|
27
|
+
Maintains an adaptive threshold:
|
|
28
|
+
- Gets tighter as the model improves (higher standards)
|
|
29
|
+
- Loosens when the environment changes (new things to learn)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, initial_threshold: float = 0.1, adaptation_rate: float = 0.01, history_size: int = 1000):
|
|
33
|
+
self.threshold = initial_threshold
|
|
34
|
+
self.adaptation_rate = adaptation_rate
|
|
35
|
+
self.history = deque(maxlen=history_size)
|
|
36
|
+
|
|
37
|
+
# Track overall learning progress
|
|
38
|
+
self.total_surprises = 0
|
|
39
|
+
self.total_predictions = 0
|
|
40
|
+
self.surprise_rate_history = deque(maxlen=100)
|
|
41
|
+
|
|
42
|
+
def compute_surprise(self, real: torch.Tensor, predicted: torch.Tensor) -> tuple[torch.Tensor, bool]:
|
|
43
|
+
"""
|
|
44
|
+
Compute how surprising the real observation is.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
real: actual observation [batch, obs_dim]
|
|
48
|
+
predicted: model's prediction [batch, obs_dim]
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
(surprise_score, is_surprising): scalar loss + boolean
|
|
52
|
+
"""
|
|
53
|
+
# Element-wise prediction error
|
|
54
|
+
error = F.mse_loss(real, predicted, reduction="none").mean(dim=-1) # [batch]
|
|
55
|
+
|
|
56
|
+
# Overall surprise score (mean across batch)
|
|
57
|
+
surprise_score = error.mean()
|
|
58
|
+
|
|
59
|
+
# Track history
|
|
60
|
+
self.history.append(surprise_score.item())
|
|
61
|
+
self.total_predictions += real.shape[0]
|
|
62
|
+
|
|
63
|
+
# Is this surprising enough to trigger learning?
|
|
64
|
+
is_surprising = surprise_score.item() > self.threshold
|
|
65
|
+
|
|
66
|
+
if is_surprising:
|
|
67
|
+
self.total_surprises += real.shape[0]
|
|
68
|
+
|
|
69
|
+
# Update adaptive threshold
|
|
70
|
+
self._adapt_threshold()
|
|
71
|
+
|
|
72
|
+
# Track surprise rate
|
|
73
|
+
if self.total_predictions > 0:
|
|
74
|
+
current_rate = self.total_surprises / self.total_predictions
|
|
75
|
+
self.surprise_rate_history.append(current_rate)
|
|
76
|
+
|
|
77
|
+
return surprise_score, is_surprising
|
|
78
|
+
|
|
79
|
+
def _adapt_threshold(self):
|
|
80
|
+
"""
|
|
81
|
+
Adapt the surprise threshold based on recent history.
|
|
82
|
+
|
|
83
|
+
Logic:
|
|
84
|
+
- If surprise rate is high (>50%): we're learning a lot, keep threshold
|
|
85
|
+
- If surprise rate is low (<10%): model is good, tighten threshold
|
|
86
|
+
- If surprise rate is medium: maintain current threshold
|
|
87
|
+
"""
|
|
88
|
+
if len(self.history) < 50:
|
|
89
|
+
return # not enough data
|
|
90
|
+
|
|
91
|
+
recent_surprises = list(self.history)[-50:]
|
|
92
|
+
avg_surprise = sum(recent_surprises) / len(recent_surprises)
|
|
93
|
+
|
|
94
|
+
# Adaptive: threshold follows average surprise but stays slightly below
|
|
95
|
+
# This means ~50% of predictions will be "surprising"
|
|
96
|
+
target = avg_surprise * 0.8
|
|
97
|
+
|
|
98
|
+
# Smoothly adjust toward target
|
|
99
|
+
self.threshold += self.adaptation_rate * (target - self.threshold)
|
|
100
|
+
|
|
101
|
+
# Keep threshold in reasonable bounds
|
|
102
|
+
self.threshold = max(0.001, min(self.threshold, 10.0))
|
|
103
|
+
|
|
104
|
+
def compute_novelty(self, real: torch.Tensor, predicted: torch.Tensor) -> torch.Tensor:
|
|
105
|
+
"""
|
|
106
|
+
Compute novelty score — how DIFFERENT is this from what we've seen?
|
|
107
|
+
|
|
108
|
+
High novelty = new situation = important to learn from
|
|
109
|
+
Low novelty = familiar situation = can skip learning
|
|
110
|
+
"""
|
|
111
|
+
error = F.mse_loss(real, predicted, reduction="none").mean(dim=-1)
|
|
112
|
+
|
|
113
|
+
# Normalize by recent average surprise
|
|
114
|
+
if len(self.history) > 0:
|
|
115
|
+
avg = sum(self.history) / len(self.history)
|
|
116
|
+
novelty = error / (avg + 1e-8)
|
|
117
|
+
else:
|
|
118
|
+
novelty = error
|
|
119
|
+
|
|
120
|
+
return novelty
|
|
121
|
+
|
|
122
|
+
def get_stats(self) -> dict:
|
|
123
|
+
"""Get current surprise statistics."""
|
|
124
|
+
return {
|
|
125
|
+
"threshold": self.threshold,
|
|
126
|
+
"total_surprises": self.total_surprises,
|
|
127
|
+
"total_predictions": self.total_predictions,
|
|
128
|
+
"surprise_rate": (
|
|
129
|
+
self.total_surprises / self.total_predictions
|
|
130
|
+
if self.total_predictions > 0
|
|
131
|
+
else 0.0
|
|
132
|
+
),
|
|
133
|
+
"avg_surprise_recent": (
|
|
134
|
+
sum(self.history) / len(self.history)
|
|
135
|
+
if len(self.history) > 0
|
|
136
|
+
else 0.0
|
|
137
|
+
),
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
def reset(self):
|
|
141
|
+
"""Reset all tracking state."""
|
|
142
|
+
self.history.clear()
|
|
143
|
+
self.total_surprises = 0
|
|
144
|
+
self.total_predictions = 0
|
|
145
|
+
self.surprise_rate_history.clear()
|