phantomrt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atlas/__init__.py +3 -0
- atlas/agents/__init__.py +8 -0
- atlas/agents/command_space.py +227 -0
- atlas/analysis/__init__.py +3 -0
- atlas/analysis/binary_agent.py +488 -0
- atlas/analysis/binary_fuzz.py +389 -0
- atlas/analysis/frida_live.py +261 -0
- atlas/analysis/graph_annotator.py +147 -0
- atlas/analysis/spectrida_bridge.py +84 -0
- atlas/analysis/unicorn_harness.py +337 -0
- atlas/core/__init__.py +14 -0
- atlas/core/decoder.py +65 -0
- atlas/core/dynamics.py +217 -0
- atlas/core/encoder.py +120 -0
- atlas/core/surprise.py +145 -0
- atlas/core/world_model.py +334 -0
- atlas/environments/__init__.py +5 -0
- atlas/environments/base.py +51 -0
- atlas/environments/grid_world.py +219 -0
- atlas/environments/physics_2d.py +283 -0
- atlas/environments/vm_world.py +168 -0
- atlas/knowledge/__init__.py +3 -0
- atlas/knowledge/instruction_vocab.py +534 -0
- atlas/monitor/__init__.py +5 -0
- atlas/monitor/execution_monitor.py +518 -0
- atlas/optimization/__init__.py +6 -0
- atlas/optimization/speed.py +457 -0
- atlas/planning/__init__.py +4 -0
- atlas/planning/goal.py +100 -0
- atlas/planning/mcts.py +228 -0
- atlas/training/__init__.py +4 -0
- atlas/training/continual.py +392 -0
- atlas/training/growth.py +213 -0
- atlas/training/loop.py +306 -0
- atlas/training/losses.py +101 -0
- atlas/training/self_train.py +307 -0
- atlas/utils/__init__.py +4 -0
- atlas/utils/logging.py +33 -0
- atlas/utils/math_helpers.py +30 -0
- atlas/utils/viz.py +136 -0
- atlas/vm/__init__.py +4 -0
- atlas/vm/wsl_vm.py +249 -0
- phantomrt-0.1.0.dist-info/METADATA +75 -0
- phantomrt-0.1.0.dist-info/RECORD +48 -0
- phantomrt-0.1.0.dist-info/WHEEL +5 -0
- phantomrt-0.1.0.dist-info/entry_points.txt +3 -0
- phantomrt-0.1.0.dist-info/licenses/LICENSE +21 -0
- phantomrt-0.1.0.dist-info/top_level.txt +1 -0
atlas/__init__.py
ADDED
atlas/agents/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Agent-side pieces: the action space (command proposer + embedding)."""
|
|
2
|
+
from .command_space import (
|
|
3
|
+
CommandProposer, embed_command, command_family, primary_binary, ACTION_DIM,
|
|
4
|
+
)
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"CommandProposer", "embed_command", "command_family", "primary_binary", "ACTION_DIM",
|
|
8
|
+
]
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""
|
|
2
|
+
The action space: proposing shell commands and embedding them.
|
|
3
|
+
|
|
4
|
+
Two jobs:
|
|
5
|
+
|
|
6
|
+
1. ``embed_command`` — turn a command string into a fixed, *structured* vector.
|
|
7
|
+
The embedding is deliberately compositional: it encodes the command's family,
|
|
8
|
+
argument structure, operators, and intent — NOT a memorized id. This is what
|
|
9
|
+
lets the world model generalize (predict `grep -c` from having learned `grep`
|
|
10
|
+
and `wc -c` separately) instead of memorizing exact strings.
|
|
11
|
+
|
|
12
|
+
2. ``CommandProposer`` — generate candidate commands for the agent to choose
|
|
13
|
+
among. Unrestricted: bare binaries, discovered files, mutations/compositions
|
|
14
|
+
of past commands, file creation, compiling & running code, and yes destructive
|
|
15
|
+
ops too. The VM boundary + snapshot rollback are the containment, not a filter.
|
|
16
|
+
A self-expanding library of *working* commands becomes new building blocks —
|
|
17
|
+
the action space itself grows as the agent discovers what the machine offers.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import random
|
|
23
|
+
import re
|
|
24
|
+
import shlex
|
|
25
|
+
import numpy as np
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
ACTION_DIM = 32
|
|
29
|
+
|
|
30
|
+
# ── binary → (family, intent) knowledge, used only to *structure* the embedding.
|
|
31
|
+
# Unknown binaries fall back to "misc"; the agent still runs and learns them.
|
|
32
|
+
_FAMILY = {
|
|
33
|
+
"text": ["grep", "sed", "awk", "cut", "sort", "uniq", "tr", "wc", "head",
|
|
34
|
+
"tail", "cat", "tac", "rev", "fold", "paste", "join", "column", "nl"],
|
|
35
|
+
"file": ["ls", "cp", "mv", "rm", "mkdir", "rmdir", "touch", "stat", "find",
|
|
36
|
+
"ln", "readlink", "basename", "dirname", "du", "df", "file", "tree"],
|
|
37
|
+
"archive": ["tar", "gzip", "gunzip", "zip", "unzip", "xz", "bzip2", "cpio"],
|
|
38
|
+
"compile": ["gcc", "g++", "cc", "make", "ld", "as", "ar", "objdump", "nm", "strip"],
|
|
39
|
+
"interp": ["python3", "python", "perl", "bash", "sh", "node", "ruby", "lua", "awk"],
|
|
40
|
+
"proc": ["ps", "top", "kill", "pkill", "nice", "nohup", "jobs", "sleep",
|
|
41
|
+
"timeout", "watch", "pgrep", "pidof"],
|
|
42
|
+
"perm": ["chmod", "chown", "chgrp", "umask", "id", "whoami", "groups", "sudo"],
|
|
43
|
+
"system": ["uname", "hostname", "uptime", "date", "env", "printenv", "free",
|
|
44
|
+
"lscpu", "mount", "dmesg", "sysctl", "ulimit"],
|
|
45
|
+
"net": ["ip", "ss", "ping", "curl", "wget", "netstat", "nc", "host", "dig"],
|
|
46
|
+
"shell": ["echo", "printf", "test", "true", "false", "seq", "yes", "xargs",
|
|
47
|
+
"tee", "read", "expr", "let", "type", "which", "command"],
|
|
48
|
+
}
|
|
49
|
+
_BIN2FAM = {b: fam for fam, bins in _FAMILY.items() for b in bins}
|
|
50
|
+
_FAMILIES = list(_FAMILY.keys()) + ["misc"]
|
|
51
|
+
|
|
52
|
+
# rough intent per family (reads / writes / creates / deletes / executes / info)
|
|
53
|
+
_INTENT = {
|
|
54
|
+
"text": (1, 0, 0, 0, 0, 1), "file": (1, 1, 1, 1, 0, 1),
|
|
55
|
+
"archive":(1, 1, 1, 0, 0, 0), "compile":(1, 1, 1, 0, 1, 0),
|
|
56
|
+
"interp": (1, 1, 1, 0, 1, 0), "proc": (1, 0, 0, 1, 1, 1),
|
|
57
|
+
"perm": (1, 1, 0, 0, 0, 1), "system":(1, 0, 0, 0, 0, 1),
|
|
58
|
+
"net": (1, 1, 0, 0, 1, 1), "shell": (0, 1, 1, 0, 0, 1),
|
|
59
|
+
"misc": (1, 0, 0, 0, 1, 0),
|
|
60
|
+
}
|
|
61
|
+
# commands that tend to break the box — tracked as a feature, NOT blocked.
|
|
62
|
+
_DANGER = ("rm ", "rm -", "dd ", "mkfs", ":(){", "shutdown", "reboot", "> /dev",
|
|
63
|
+
"chmod -R", "chown -R", "mv /", "> /etc", "kill -9 -1")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def primary_binary(command: str) -> str:
|
|
67
|
+
"""First real program token in a command (skips `sudo`, env assignments)."""
|
|
68
|
+
try:
|
|
69
|
+
toks = shlex.split(command)
|
|
70
|
+
except ValueError:
|
|
71
|
+
toks = command.split()
|
|
72
|
+
for t in toks:
|
|
73
|
+
if "=" in t and not t.startswith("-"): # VAR=val prefix
|
|
74
|
+
continue
|
|
75
|
+
if t in ("sudo", "nohup", "time", "nice", "timeout", "env"):
|
|
76
|
+
continue
|
|
77
|
+
return t.split("/")[-1]
|
|
78
|
+
return toks[0].split("/")[-1] if toks else ""
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def embed_command(command: str) -> np.ndarray:
|
|
82
|
+
"""Structured, compositional embedding of a command → [ACTION_DIM] in ~[0,1]."""
|
|
83
|
+
v = np.zeros(ACTION_DIM, dtype=np.float32)
|
|
84
|
+
cmd = command.strip()
|
|
85
|
+
binn = primary_binary(cmd)
|
|
86
|
+
fam = _BIN2FAM.get(binn, "misc")
|
|
87
|
+
|
|
88
|
+
# [0:11] family one-hot
|
|
89
|
+
v[_FAMILIES.index(fam)] = 1.0
|
|
90
|
+
# [11:17] intent bits for the family
|
|
91
|
+
v[11:17] = np.array(_INTENT[fam], dtype=np.float32)
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
toks = shlex.split(cmd)
|
|
95
|
+
except ValueError:
|
|
96
|
+
toks = cmd.split()
|
|
97
|
+
args = toks[1:] if toks else []
|
|
98
|
+
|
|
99
|
+
# [17:32] structural features (all bounded)
|
|
100
|
+
v[17] = min(len(toks) / 8.0, 1.0) # n tokens
|
|
101
|
+
v[18] = min(len(cmd) / 80.0, 1.0) # length
|
|
102
|
+
v[19] = min(sum(a.startswith("-") for a in args) / 4.0, 1.0) # flags
|
|
103
|
+
v[20] = 1.0 if "|" in cmd else 0.0 # pipe
|
|
104
|
+
v[21] = 1.0 if (">" in cmd or ">>" in cmd) else 0.0 # redirect out
|
|
105
|
+
v[22] = 1.0 if "<" in cmd else 0.0 # redirect in
|
|
106
|
+
v[23] = 1.0 if any(c in cmd for c in "*?[") else 0.0 # glob
|
|
107
|
+
v[24] = min(sum("/" in a for a in args) / 3.0, 1.0) # path args
|
|
108
|
+
v[25] = min(sum(bool(re.fullmatch(r"-?\d+", a)) for a in args) / 3.0, 1.0) # numeric
|
|
109
|
+
v[26] = 1.0 if ("$(" in cmd or "`" in cmd) else 0.0 # subshell
|
|
110
|
+
v[27] = 1.0 if ("&&" in cmd or ";" in cmd or "||" in cmd) else 0.0 # chained
|
|
111
|
+
v[28] = 1.0 if cmd.rstrip().endswith("&") else 0.0 # background
|
|
112
|
+
v[29] = 1.0 if any(d in cmd for d in _DANGER) else 0.0 # destructive
|
|
113
|
+
v[30] = 1.0 if binn in _BIN2FAM else 0.0 # known binary
|
|
114
|
+
v[31] = 1.0 if ("--help" in cmd or " -h" in cmd or binn == "man") else 0.0 # self-doc
|
|
115
|
+
return v
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def command_family(command: str) -> str:
|
|
119
|
+
"""The behavior family a command belongs to (for per-family competence)."""
|
|
120
|
+
return _BIN2FAM.get(primary_binary(command), "misc")
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class CommandProposer:
|
|
124
|
+
"""Generates candidate commands. Unrestricted and self-expanding."""
|
|
125
|
+
|
|
126
|
+
def __init__(self, vm, rng: random.Random | None = None):
|
|
127
|
+
self.vm = vm
|
|
128
|
+
self.rng = rng or random.Random(0)
|
|
129
|
+
self.binaries: list[str] = []
|
|
130
|
+
self.files: set[str] = {"~", "/tmp", "/etc/hostname", "/proc/cpuinfo"}
|
|
131
|
+
# library of commands observed to *work* — grows into new building blocks
|
|
132
|
+
self.library: list[str] = [
|
|
133
|
+
"echo hello", "ls -la", "pwd", "whoami", "uname -a",
|
|
134
|
+
"cat /etc/os-release", "ls /usr/bin | head", "seq 1 5",
|
|
135
|
+
]
|
|
136
|
+
self._discovered = False
|
|
137
|
+
|
|
138
|
+
# ── discovery ────────────────────────────────────────────────────────────
|
|
139
|
+
def discover(self) -> None:
|
|
140
|
+
"""One-time enumeration of what the machine offers (PATH binaries)."""
|
|
141
|
+
r = self.vm.run("ls /usr/bin /bin 2>/dev/null | sort -u", timeout=20)
|
|
142
|
+
bins = [b.strip() for b in r.stdout.splitlines() if b.strip() and "/" not in b]
|
|
143
|
+
self.binaries = bins or list(_BIN2FAM.keys())
|
|
144
|
+
self._discovered = True
|
|
145
|
+
|
|
146
|
+
def observe(self, command: str, result) -> None:
|
|
147
|
+
"""Learn from a run: keep working commands, harvest discovered paths."""
|
|
148
|
+
if result.exit_code == 0 and command not in self.library:
|
|
149
|
+
if len(self.library) < 2000:
|
|
150
|
+
self.library.append(command)
|
|
151
|
+
# harvest path-looking tokens from output the agent just saw
|
|
152
|
+
for tok in re.findall(r"/[\w./-]+", result.stdout[:4000]):
|
|
153
|
+
if len(self.files) < 5000:
|
|
154
|
+
self.files.add(tok)
|
|
155
|
+
|
|
156
|
+
# ── proposing ────────────────────────────────────────────────────────────
|
|
157
|
+
def propose(self, n: int = 16) -> list[str]:
|
|
158
|
+
if not self._discovered:
|
|
159
|
+
self.discover()
|
|
160
|
+
cands: set[str] = set()
|
|
161
|
+
strategies = [
|
|
162
|
+
self._bare_binary, self._binary_help, self._on_file, self._mutate,
|
|
163
|
+
self._compose_pipe, self._explore_fs, self._create_and_run, self._write_code,
|
|
164
|
+
]
|
|
165
|
+
guard = 0
|
|
166
|
+
while len(cands) < n and guard < n * 6:
|
|
167
|
+
guard += 1
|
|
168
|
+
try:
|
|
169
|
+
c = self.rng.choice(strategies)()
|
|
170
|
+
except Exception:
|
|
171
|
+
c = None
|
|
172
|
+
if c:
|
|
173
|
+
cands.add(c.strip())
|
|
174
|
+
return list(cands)[:n]
|
|
175
|
+
|
|
176
|
+
def _rand_bin(self) -> str:
|
|
177
|
+
return self.rng.choice(self.binaries) if self.binaries else "echo"
|
|
178
|
+
|
|
179
|
+
def _rand_file(self) -> str:
|
|
180
|
+
return self.rng.choice(sorted(self.files))
|
|
181
|
+
|
|
182
|
+
def _bare_binary(self) -> str:
|
|
183
|
+
return self._rand_bin()
|
|
184
|
+
|
|
185
|
+
def _binary_help(self) -> str:
|
|
186
|
+
return f"{self._rand_bin()} --help 2>&1 | head -5"
|
|
187
|
+
|
|
188
|
+
def _on_file(self) -> str:
|
|
189
|
+
b = self.rng.choice(["cat", "ls -la", "stat", "wc -l", "head", "file", "du -h"])
|
|
190
|
+
return f"{b} {self._rand_file()}"
|
|
191
|
+
|
|
192
|
+
def _mutate(self) -> str:
|
|
193
|
+
base = self.rng.choice(self.library)
|
|
194
|
+
pipe = self.rng.choice(["| head -3", "| wc -l", "| sort", "| grep -c .", "| tr a-z A-Z"])
|
|
195
|
+
return f"{base} 2>&1 {pipe}"
|
|
196
|
+
|
|
197
|
+
def _compose_pipe(self) -> str:
|
|
198
|
+
a, b = self.rng.choice(self.library), self.rng.choice(
|
|
199
|
+
["wc -c", "grep .", "sort", "uniq -c", "head -4", "tac", "rev"])
|
|
200
|
+
return f"{a} 2>/dev/null | {b}"
|
|
201
|
+
|
|
202
|
+
def _explore_fs(self) -> str:
|
|
203
|
+
return self.rng.choice([
|
|
204
|
+
f"ls -la {self._rand_file()}", "find / -maxdepth 2 -type d 2>/dev/null | head",
|
|
205
|
+
"ls /proc | head", f"cat {self._rand_file()} 2>&1 | head -3",
|
|
206
|
+
"df -h", "free -m", "ps aux | head", "env | head",
|
|
207
|
+
])
|
|
208
|
+
|
|
209
|
+
def _create_and_run(self) -> str:
|
|
210
|
+
n = self.rng.randint(1, 999)
|
|
211
|
+
return self.rng.choice([
|
|
212
|
+
f"echo data{n} > /tmp/f{n}.txt && cat /tmp/f{n}.txt",
|
|
213
|
+
f"mkdir -p /tmp/d{n} && ls -la /tmp/d{n}",
|
|
214
|
+
f"seq 1 {n % 20 + 1} | sort -r",
|
|
215
|
+
f"printf 'a\\nb\\nc\\n' | grep b",
|
|
216
|
+
])
|
|
217
|
+
|
|
218
|
+
def _write_code(self) -> str:
|
|
219
|
+
"""Write and execute real code — learn compiling/running mechanics."""
|
|
220
|
+
n = self.rng.randint(1, 999)
|
|
221
|
+
return self.rng.choice([
|
|
222
|
+
f"python3 -c 'print(sum(range({n % 50 + 1})))'",
|
|
223
|
+
f"echo 'int main(){{return {n % 5};}}' > /tmp/p{n}.c "
|
|
224
|
+
f"&& gcc /tmp/p{n}.c -o /tmp/p{n} 2>&1 && /tmp/p{n}; echo rc=$?",
|
|
225
|
+
f"python3 -c 'import os; print(os.listdir(\"/tmp\")[:5])'",
|
|
226
|
+
f"echo 'print(2**{n % 16})' | python3",
|
|
227
|
+
])
|