phantomrt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. atlas/__init__.py +3 -0
  2. atlas/agents/__init__.py +8 -0
  3. atlas/agents/command_space.py +227 -0
  4. atlas/analysis/__init__.py +3 -0
  5. atlas/analysis/binary_agent.py +488 -0
  6. atlas/analysis/binary_fuzz.py +389 -0
  7. atlas/analysis/frida_live.py +261 -0
  8. atlas/analysis/graph_annotator.py +147 -0
  9. atlas/analysis/spectrida_bridge.py +84 -0
  10. atlas/analysis/unicorn_harness.py +337 -0
  11. atlas/core/__init__.py +14 -0
  12. atlas/core/decoder.py +65 -0
  13. atlas/core/dynamics.py +217 -0
  14. atlas/core/encoder.py +120 -0
  15. atlas/core/surprise.py +145 -0
  16. atlas/core/world_model.py +334 -0
  17. atlas/environments/__init__.py +5 -0
  18. atlas/environments/base.py +51 -0
  19. atlas/environments/grid_world.py +219 -0
  20. atlas/environments/physics_2d.py +283 -0
  21. atlas/environments/vm_world.py +168 -0
  22. atlas/knowledge/__init__.py +3 -0
  23. atlas/knowledge/instruction_vocab.py +534 -0
  24. atlas/monitor/__init__.py +5 -0
  25. atlas/monitor/execution_monitor.py +518 -0
  26. atlas/optimization/__init__.py +6 -0
  27. atlas/optimization/speed.py +457 -0
  28. atlas/planning/__init__.py +4 -0
  29. atlas/planning/goal.py +100 -0
  30. atlas/planning/mcts.py +228 -0
  31. atlas/training/__init__.py +4 -0
  32. atlas/training/continual.py +392 -0
  33. atlas/training/growth.py +213 -0
  34. atlas/training/loop.py +306 -0
  35. atlas/training/losses.py +101 -0
  36. atlas/training/self_train.py +307 -0
  37. atlas/utils/__init__.py +4 -0
  38. atlas/utils/logging.py +33 -0
  39. atlas/utils/math_helpers.py +30 -0
  40. atlas/utils/viz.py +136 -0
  41. atlas/vm/__init__.py +4 -0
  42. atlas/vm/wsl_vm.py +249 -0
  43. phantomrt-0.1.0.dist-info/METADATA +75 -0
  44. phantomrt-0.1.0.dist-info/RECORD +48 -0
  45. phantomrt-0.1.0.dist-info/WHEEL +5 -0
  46. phantomrt-0.1.0.dist-info/entry_points.txt +3 -0
  47. phantomrt-0.1.0.dist-info/licenses/LICENSE +21 -0
  48. phantomrt-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,283 @@
1
+ """
2
+ 2D Physics Environment
3
+
4
+ Simple Newtonian physics simulation with:
5
+ - Balls that move with velocity
6
+ - Gravity
7
+ - Elastic collisions between balls
8
+ - Wall bouncing
9
+
10
+ This tests whether the world model can learn ACTUAL PHYSICS —
11
+ conservation of momentum, gravity, collision response.
12
+
13
+ Observation: [ball1_x, ball1_y, ball1_vx, ball1_vy, ball1_r, ball1_color, ...]
14
+ Action: [force_x, force_y] applied to agent ball
15
+ """
16
+
17
+ import numpy as np
18
+ from typing import Optional
19
+ from .base import BaseEnvironment
20
+
21
+
22
+ class Ball:
23
+ """Simple physics ball."""
24
+
25
+ def __init__(self, x, y, vx=0, vy=0, radius=0.5, mass=1.0, color=0, fixed=False):
26
+ self.pos = np.array([x, y], dtype=np.float64)
27
+ self.vel = np.array([vx, vy], dtype=np.float64)
28
+ self.radius = radius
29
+ self.mass = mass
30
+ self.color = color # 0=agent, 1=obstacle, 2=collectible
31
+ self.fixed = fixed # fixed balls don't move
32
+
33
+
34
+ class Physics2DEnvironment(BaseEnvironment):
35
+ """
36
+ 2D physics world with Newtonian mechanics.
37
+
38
+ Gravity pulls things down, balls bounce off walls
39
+ and each other, agent can apply forces.
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ num_balls: int = 4,
45
+ num_obstacles: int = 2,
46
+ world_size: float = 10.0,
47
+ gravity: float = -9.8,
48
+ dt: float = 0.02,
49
+ max_steps: int = 500,
50
+ seed: Optional[int] = None,
51
+ ):
52
+ self.num_balls = num_balls
53
+ self.num_obstacles = num_obstacles
54
+ self.world_size = world_size
55
+ self.gravity = gravity
56
+ self.dt = dt
57
+ self.max_steps = max_steps
58
+
59
+ self.rng = np.random.RandomState(seed)
60
+ self.balls = []
61
+ self.steps = 0
62
+
63
+ # Agent is the first ball (color=0)
64
+ # Obstacles are static (color=1, fixed)
65
+ # Other balls are dynamic (color=2)
66
+ self._obs_dim = num_balls * 6 # x, y, vx, vy, radius, color for each
67
+ self._action_dim = 2 # force x, force y on agent
68
+
69
+ def get_observation_dim(self) -> int:
70
+ return self._obs_dim
71
+
72
+ def get_action_dim(self) -> int:
73
+ return self._action_dim
74
+
75
+ def reset(self) -> np.ndarray:
76
+ """Reset with random ball positions."""
77
+ self.steps = 0
78
+ self.balls = []
79
+
80
+ # Agent ball (red, controllable)
81
+ agent = Ball(
82
+ x=self.rng.uniform(1, self.world_size - 1),
83
+ y=self.rng.uniform(5, self.world_size - 1),
84
+ vx=0, vy=0,
85
+ radius=0.5, mass=1.0,
86
+ color=0, fixed=False,
87
+ )
88
+ self.balls.append(agent)
89
+
90
+ # Obstacle balls (green, fixed)
91
+ for _ in range(self.num_obstacles):
92
+ obs = Ball(
93
+ x=self.rng.uniform(1, self.world_size - 1),
94
+ y=self.rng.uniform(1, 4),
95
+ radius=0.8, mass=float('inf'),
96
+ color=1, fixed=True,
97
+ )
98
+ self.balls.append(obs)
99
+
100
+ # Dynamic balls (blue)
101
+ for _ in range(self.num_balls - 1 - self.num_obstacles):
102
+ ball = Ball(
103
+ x=self.rng.uniform(1, self.world_size - 1),
104
+ y=self.rng.uniform(1, self.world_size - 1),
105
+ vx=self.rng.uniform(-2, 2),
106
+ vy=self.rng.uniform(-2, 2),
107
+ radius=0.4, mass=1.0,
108
+ color=2, fixed=False,
109
+ )
110
+ self.balls.append(ball)
111
+
112
+ return self._get_observation()
113
+
114
+ def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, dict]:
115
+ """Apply force and simulate physics."""
116
+ self.steps += 1
117
+
118
+ # Apply force to agent ball
119
+ force = np.clip(action, -50, 50).astype(np.float64)
120
+ agent = self.balls[0]
121
+ if not agent.fixed:
122
+ agent.vel += (force / agent.mass) * self.dt
123
+
124
+ # Apply gravity
125
+ for ball in self.balls:
126
+ if not ball.fixed:
127
+ ball.vel[1] += self.gravity * self.dt
128
+
129
+ # Update positions
130
+ for ball in self.balls:
131
+ if not ball.fixed:
132
+ ball.pos += ball.vel * self.dt
133
+
134
+ # Wall collisions
135
+ for ball in self.balls:
136
+ if ball.fixed:
137
+ continue
138
+ for dim in range(2):
139
+ if ball.pos[dim] - ball.radius < 0:
140
+ ball.pos[dim] = ball.radius
141
+ ball.vel[dim] *= -0.8 # bounce with energy loss
142
+ elif ball.pos[dim] + ball.radius > self.world_size:
143
+ ball.pos[dim] = self.world_size - ball.radius
144
+ ball.vel[dim] *= -0.8
145
+
146
+ # Ball-ball collisions
147
+ collisions = 0
148
+ for i in range(len(self.balls)):
149
+ for j in range(i + 1, len(self.balls)):
150
+ if self._collide(self.balls[i], self.balls[j]):
151
+ self._resolve_collision(self.balls[i], self.balls[j])
152
+ collisions += 1
153
+
154
+ # Compute reward based on agent position relative to other balls
155
+ reward = 0.0
156
+
157
+ # Small energy penalty (encourages efficient movement)
158
+ reward -= 0.001 * np.sum(agent.vel ** 2)
159
+
160
+ # Penalty for going out of bounds (shouldn't happen but just in case)
161
+ if agent.pos[0] < 0 or agent.pos[0] > self.world_size:
162
+ reward -= 1.0
163
+ if agent.pos[1] < 0 or agent.pos[1] > self.world_size:
164
+ reward -= 1.0
165
+
166
+ done = self.steps >= self.max_steps
167
+
168
+ info = {
169
+ "collisions": collisions,
170
+ "agent_pos": agent.pos.copy(),
171
+ "agent_vel": agent.vel.copy(),
172
+ }
173
+
174
+ return self._get_observation(), reward, done, info
175
+
176
+ def _collide(self, ball1: Ball, ball2: Ball) -> bool:
177
+ """Check if two balls are overlapping."""
178
+ dist = np.linalg.norm(ball1.pos - ball2.pos)
179
+ return dist < (ball1.radius + ball2.radius)
180
+
181
+ def _resolve_collision(self, ball1: Ball, ball2: Ball):
182
+ """Resolve elastic collision between two balls."""
183
+ # Separation vector
184
+ diff = ball1.pos - ball2.pos
185
+ dist = np.linalg.norm(diff)
186
+
187
+ if dist < 1e-10:
188
+ return
189
+
190
+ normal = diff / dist
191
+
192
+ # Separate balls
193
+ overlap = (ball1.radius + ball2.radius) - dist
194
+ if overlap > 0:
195
+ if ball1.fixed:
196
+ ball2.pos -= normal * overlap
197
+ elif ball2.fixed:
198
+ ball1.pos += normal * overlap
199
+ else:
200
+ ball1.pos += normal * overlap * 0.5
201
+ ball2.pos -= normal * overlap * 0.5
202
+
203
+ # Compute relative velocity
204
+ rel_vel = ball1.vel - ball2.vel
205
+ vel_along_normal = np.dot(rel_vel, normal)
206
+
207
+ # Don't resolve if balls are moving apart
208
+ if vel_along_normal > 0:
209
+ return
210
+
211
+ # Elastic collision with restitution
212
+ restitution = 0.9
213
+
214
+ if ball1.fixed:
215
+ ball2.vel -= (1 + restitution) * vel_along_normal * normal
216
+ elif ball2.fixed:
217
+ ball1.vel += (1 + restitution) * vel_along_normal * normal
218
+ else:
219
+ # Standard elastic collision formula
220
+ m1, m2 = ball1.mass, ball2.mass
221
+ impulse = -(1 + restitution) * vel_along_normal / (1/m1 + 1/m2)
222
+
223
+ ball1.vel += (impulse / m1) * normal
224
+ ball2.vel -= (impulse / m2) * normal
225
+
226
+ def _get_observation(self) -> np.ndarray:
227
+ """Build observation vector."""
228
+ obs = []
229
+
230
+ # Normalize positions and velocities
231
+ for ball in self.balls:
232
+ obs.append(ball.pos[0] / self.world_size) # x normalized
233
+ obs.append(ball.pos[1] / self.world_size) # y normalized
234
+ obs.append(ball.vel[0] / 10.0) # vx normalized
235
+ obs.append(ball.vel[1] / 10.0) # vy normalized
236
+ obs.append(ball.radius) # radius
237
+ obs.append(float(ball.color) / 2.0) # color normalized
238
+
239
+ return np.array(obs, dtype=np.float32)
240
+
241
+ def render(self) -> np.ndarray:
242
+ """Render physics world as RGB image."""
243
+ resolution = 100
244
+ img = np.ones((resolution, resolution, 3), dtype=np.uint8) * 30 # dark background
245
+
246
+ colors = {
247
+ 0: (255, 100, 50), # agent: orange-red
248
+ 1: (50, 200, 50), # obstacles: green
249
+ 2: (50, 100, 255), # dynamic: blue
250
+ }
251
+
252
+ for ball in self.balls:
253
+ # Convert world coords to pixel coords
254
+ px = int(np.clip(ball.pos[0] / self.world_size * (resolution - 1), 0, resolution - 1))
255
+ py = int(np.clip((1 - ball.pos[1] / self.world_size) * (resolution - 1), 0, resolution - 1))
256
+ pr = max(1, int(ball.radius / self.world_size * resolution))
257
+
258
+ # Draw ball as filled circle (simplified)
259
+ y_lo = max(0, py - pr)
260
+ y_hi = min(resolution, py + pr + 1)
261
+ x_lo = max(0, px - pr)
262
+ x_hi = min(resolution, px + pr + 1)
263
+
264
+ color = colors.get(ball.color, (200, 200, 200))
265
+ img[y_lo:y_hi, x_lo:x_hi] = color
266
+
267
+ return img
268
+
269
+ def get_state(self) -> dict:
270
+ """Full state for debugging."""
271
+ return {
272
+ "balls": [
273
+ {
274
+ "pos": b.pos.copy(),
275
+ "vel": b.vel.copy(),
276
+ "radius": b.radius,
277
+ "color": b.color,
278
+ "fixed": b.fixed,
279
+ }
280
+ for b in self.balls
281
+ ],
282
+ "steps": self.steps,
283
+ }
@@ -0,0 +1,168 @@
1
+ """
2
+ VMWorldEnv — the real environment: a whole Linux box the agent acts on.
3
+
4
+ Unlike the toy grid/physics envs, the "action" here is a **shell command string**
5
+ (chosen by the agent from the CommandProposer's candidates). ``step`` runs it in
6
+ the isolated VM and featurizes the *real* outcome into the next observation.
7
+
8
+ State/observation = features of the last command's real result (exit code, output
9
+ size/entropy, error signatures, timing, …). The world model's job is to predict
10
+ this vector for an action before it runs — prediction error is the surprise signal.
11
+
12
+ Reward is count-based novelty (intrinsic curiosity): behaviors the agent has rarely
13
+ produced are worth more, so it seeks out unseen corners of the machine. Distinct
14
+ behavior signatures = coverage.
15
+
16
+ Safety: after a plausibly-destructive or failed command we health-check the VM and,
17
+ if it's bricked, roll back to the base snapshot and continue — recovery, not a
18
+ rewarded action.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import math
24
+ from collections import Counter
25
+ from typing import Optional
26
+
27
+ import numpy as np
28
+
29
+ from .base import BaseEnvironment
30
+ from ..agents.command_space import embed_command, command_family, ACTION_DIM
31
+
32
+ STATE_DIM = 24
33
+
34
+
35
+ def _entropy(s: str) -> float:
36
+ if not s:
37
+ return 0.0
38
+ counts = Counter(s)
39
+ n = len(s)
40
+ return -sum((c / n) * math.log2(c / n) for c in counts.values())
41
+
42
+
43
+ class VMWorldEnv(BaseEnvironment):
44
+ """A real Linux VM presented as an RL-style environment."""
45
+
46
+ def __init__(self, vm, log=print, auto_recover: bool = True,
47
+ snapshot_tag: str = "base"):
48
+ self.vm = vm
49
+ self.log = log
50
+ self.auto_recover = auto_recover
51
+ self.snapshot_tag = snapshot_tag
52
+
53
+ self.seen = Counter() # behavior signature -> count (novelty/coverage)
54
+ self.family_seen = Counter() # family -> count
55
+ self._last = np.zeros(STATE_DIM, dtype=np.float32)
56
+ self.steps = 0
57
+ self.recoveries = 0
58
+
59
+ # ── BaseEnvironment API ──────────────────────────────────────────────────
60
+ def reset(self) -> np.ndarray:
61
+ self._last = np.zeros(STATE_DIM, dtype=np.float32)
62
+ return self._last.copy()
63
+
64
+ def get_observation_dim(self) -> int:
65
+ return STATE_DIM
66
+
67
+ def get_action_dim(self) -> int:
68
+ return ACTION_DIM
69
+
70
+ def render(self):
71
+ return None
72
+
73
+ def run_probe(self, command: str, timeout: int = 8) -> np.ndarray:
74
+ """Run a command purely to measure prediction error (held-out probe);
75
+ does not mutate learning state."""
76
+ return self.featurize(command, self.vm.run(command, timeout=timeout))
77
+
78
+ # ── the real step ────────────────────────────────────────────────────────
79
+ def step(self, command: str, timeout: int = 8):
80
+ """Run ``command`` in the VM; return (obs, reward, done, info) with the
81
+ real outcome. ``command`` is a string (the agent's chosen action)."""
82
+ self.steps += 1
83
+ result = self.vm.run(command, timeout=timeout)
84
+
85
+ obs = self.featurize(command, result)
86
+ sig = self.behavior_signature(command, result)
87
+
88
+ # count-based novelty: rarer behavior -> higher intrinsic reward
89
+ self.seen[sig] += 1
90
+ self.family_seen[command_family(command)] += 1
91
+ reward = 1.0 / math.sqrt(self.seen[sig])
92
+ is_new = self.seen[sig] == 1
93
+
94
+ recovered = self._maybe_recover(command, result)
95
+
96
+ self._last = obs
97
+ info = {
98
+ "command": command,
99
+ "result": result,
100
+ "signature": sig,
101
+ "is_new_behavior": is_new,
102
+ "coverage": len(self.seen),
103
+ "recovered": recovered,
104
+ "family": command_family(command),
105
+ }
106
+ return obs, reward, False, info
107
+
108
+ # ── featurization: real result -> observation vector ─────────────────────
109
+ def featurize(self, command: str, result) -> np.ndarray:
110
+ out, err = result.stdout, result.stderr
111
+ low = (out + " " + err).lower()
112
+ v = np.zeros(STATE_DIM, dtype=np.float32)
113
+ v[0] = max(-1.0, min(1.0, result.exit_code / 128.0))
114
+ v[1] = 1.0 if result.exit_code == 0 else 0.0
115
+ v[2] = 1.0 if result.crashed else 0.0
116
+ v[3] = 1.0 if result.timed_out else 0.0
117
+ v[4] = min(len(out) / 500.0, 1.0)
118
+ v[5] = min(len(err) / 500.0, 1.0)
119
+ v[6] = min(out.count("\n") / 50.0, 1.0)
120
+ v[7] = _entropy(out[:1000]) / 8.0
121
+ v[8] = 1.0 if "error" in low else 0.0
122
+ v[9] = 1.0 if ("not found" in low or "no such" in low) else 0.0
123
+ v[10] = 1.0 if "permission denied" in low else 0.0
124
+ v[11] = 1.0 if "usage" in low else 0.0
125
+ v[12] = 1.0 if any(ch.isdigit() for ch in out[:200]) else 0.0
126
+ v[13] = 1.0 if out.strip() else 0.0
127
+ v[14] = 1.0 if err.strip() else 0.0
128
+ v[15] = min(result.duration / 8.0, 1.0) # normalized wall time
129
+ v[16] = embed_command(command)[29] # was destructive
130
+ v[17] = 1.0 if result.exit_code == -1 else 0.0 # launch failure
131
+ v[18] = 1.0 if result.exit_code == 127 else 0.0 # command not found
132
+ v[19] = 1.0 if result.exit_code == 126 else 0.0 # not executable
133
+ v[20] = 1.0 if "\n" in out.strip() else 0.0 # multiline
134
+ v[21] = 1.0 if len(out) > 2000 else 0.0 # large output
135
+ v[22] = 1.0 if (result.exit_code == 0 and not out.strip()) else 0.0 # silent success
136
+ v[23] = 1.0 # bias
137
+ return v
138
+
139
+ def behavior_signature(self, command: str, result):
140
+ """Coarse hash of *what kind of thing happened* — for novelty/coverage."""
141
+ size_bucket = 0 if not result.stdout else min(len(result.stdout).bit_length(), 12)
142
+ return (
143
+ command_family(command),
144
+ "ok" if result.exit_code == 0 else ("crash" if result.crashed else
145
+ ("timeout" if result.timed_out else f"rc{result.exit_code}")),
146
+ size_bucket,
147
+ bool(result.stderr.strip()),
148
+ )
149
+
150
+ # ── auto-recovery (the rollback safety net) ──────────────────────────────
151
+ def _maybe_recover(self, command: str, result) -> bool:
152
+ if not self.auto_recover:
153
+ return False
154
+ # only pay for a health check when something might have broken the box
155
+ risky = embed_command(command)[29] > 0 or result.exit_code in (-1, 124) \
156
+ or result.crashed
157
+ if not risky:
158
+ return False
159
+ if self.vm.health_ok():
160
+ return False
161
+ self.log(f"[recover] VM unhealthy after: {command!r} → rolling back")
162
+ try:
163
+ self.vm.rollback(self.snapshot_tag)
164
+ self.recoveries += 1
165
+ return True
166
+ except Exception as e:
167
+ self.log(f"[recover] rollback FAILED: {e}")
168
+ return False
@@ -0,0 +1,3 @@
1
+ from .instruction_vocab import InstructionEncoder, BinaryKnowledgeBase
2
+
3
+ __all__ = ["InstructionEncoder", "BinaryKnowledgeBase"]