agentkernel-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentkernel/__init__.py +7 -0
- agentkernel/__main__.py +5 -0
- agentkernel/agent.py +311 -0
- agentkernel/approval/__init__.py +23 -0
- agentkernel/approval/base.py +34 -0
- agentkernel/approval/cli.py +129 -0
- agentkernel/approval/policy.py +58 -0
- agentkernel/approval/risk.py +91 -0
- agentkernel/approval/sandbox.py +201 -0
- agentkernel/budget.py +64 -0
- agentkernel/checkpoint.py +50 -0
- agentkernel/cli.py +1482 -0
- agentkernel/config.py +224 -0
- agentkernel/context/__init__.py +17 -0
- agentkernel/context/manager.py +216 -0
- agentkernel/context/truncate.py +35 -0
- agentkernel/cron.py +146 -0
- agentkernel/curation.py +183 -0
- agentkernel/doctor.py +141 -0
- agentkernel/embeddings.py +132 -0
- agentkernel/evaluation.py +186 -0
- agentkernel/improvement.py +133 -0
- agentkernel/insights.py +141 -0
- agentkernel/kanban.py +114 -0
- agentkernel/knowledge.py +383 -0
- agentkernel/loops.py +145 -0
- agentkernel/mcp/__init__.py +23 -0
- agentkernel/mcp/client.py +181 -0
- agentkernel/mcp/config.py +59 -0
- agentkernel/mcp/tools.py +96 -0
- agentkernel/memory.py +1208 -0
- agentkernel/paths.py +73 -0
- agentkernel/plugins.py +76 -0
- agentkernel/profiles.py +70 -0
- agentkernel/progress.py +89 -0
- agentkernel/providers/__init__.py +35 -0
- agentkernel/providers/_http.py +157 -0
- agentkernel/providers/anthropic.py +282 -0
- agentkernel/providers/base.py +38 -0
- agentkernel/providers/credentials.py +65 -0
- agentkernel/providers/local.py +34 -0
- agentkernel/providers/openai.py +260 -0
- agentkernel/redaction.py +77 -0
- agentkernel/semantic_index.py +139 -0
- agentkernel/semantic_memory.py +253 -0
- agentkernel/skills.py +268 -0
- agentkernel/subagent.py +161 -0
- agentkernel/telemetry.py +199 -0
- agentkernel/templates/README.md +35 -0
- agentkernel/templates/SKILL.md +28 -0
- agentkernel/templates/eval-suite.toml +22 -0
- agentkernel/templates/loop.toml +29 -0
- agentkernel/templates/mcp-servers.toml +22 -0
- agentkernel/templates/profile.toml +29 -0
- agentkernel/templates/tool_module.py +64 -0
- agentkernel/tools/__init__.py +5 -0
- agentkernel/tools/base.py +100 -0
- agentkernel/tools/builtin/__init__.py +37 -0
- agentkernel/tools/builtin/checkpoint_tool.py +33 -0
- agentkernel/tools/builtin/clarify.py +60 -0
- agentkernel/tools/builtin/files.py +221 -0
- agentkernel/tools/builtin/kanban_tool.py +100 -0
- agentkernel/tools/builtin/search.py +225 -0
- agentkernel/tools/builtin/shell.py +67 -0
- agentkernel/tools/builtin/todo.py +106 -0
- agentkernel/tui/__init__.py +50 -0
- agentkernel/tui/app.py +594 -0
- agentkernel/types.py +127 -0
- agentkernel/worktree.py +64 -0
- agentkernel_cli-0.1.0.dist-info/METADATA +426 -0
- agentkernel_cli-0.1.0.dist-info/RECORD +74 -0
- agentkernel_cli-0.1.0.dist-info/WHEEL +4 -0
- agentkernel_cli-0.1.0.dist-info/entry_points.txt +2 -0
- agentkernel_cli-0.1.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""Execution boundary implementations (design §10.3).
|
|
2
|
+
|
|
3
|
+
``LocalSandbox`` runs commands as a subprocess confined to a working directory.
|
|
4
|
+
``DockerSandbox`` runs them inside a per-project container — real isolation
|
|
5
|
+
(separate filesystem, no host network by default, resource limits) — behind the
|
|
6
|
+
same ``Sandbox`` protocol, so ``bash``'s handler never changes.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import shlex
|
|
13
|
+
import signal
|
|
14
|
+
import subprocess
|
|
15
|
+
import uuid
|
|
16
|
+
from collections.abc import Callable
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
# A command runner: takes an argv list + timeout, returns (exit, stdout, stderr).
|
|
20
|
+
# Injectable so DockerSandbox is testable offline without a Docker daemon.
|
|
21
|
+
CommandRunner = Callable[[list[str], int], "tuple[int, str, str]"]
|
|
22
|
+
|
|
23
|
+
# Substrings that mark an environment variable as a secret to scrub before
|
|
24
|
+
# handing the environment to a subprocess.
|
|
25
|
+
_SECRET_MARKERS = ("API_KEY", "TOKEN", "SECRET", "PASSWORD", "PASSWD", "CREDENTIAL")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _scrubbed_env() -> dict[str, str]:
|
|
29
|
+
"""A copy of the environment with secret-looking variables removed."""
|
|
30
|
+
return {
|
|
31
|
+
k: v
|
|
32
|
+
for k, v in os.environ.items()
|
|
33
|
+
if not any(marker in k.upper() for marker in _SECRET_MARKERS)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _kill_tree(proc: subprocess.Popen) -> None:
|
|
38
|
+
"""Kill the process *and its children* so a timeout actually stops the work.
|
|
39
|
+
|
|
40
|
+
``shell=True`` means ``proc`` is the shell; the real command is a child, so
|
|
41
|
+
killing only ``proc`` would orphan it and leave the output pipe open. We put
|
|
42
|
+
the process in its own group/session and tear the whole group down.
|
|
43
|
+
"""
|
|
44
|
+
if os.name == "posix":
|
|
45
|
+
try:
|
|
46
|
+
os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
|
|
47
|
+
except (ProcessLookupError, PermissionError): # pragma: no cover
|
|
48
|
+
proc.kill()
|
|
49
|
+
else:
|
|
50
|
+
# taskkill /T kills the cmd.exe child tree; then kill the shell itself.
|
|
51
|
+
subprocess.run(
|
|
52
|
+
["taskkill", "/F", "/T", "/PID", str(proc.pid)],
|
|
53
|
+
capture_output=True,
|
|
54
|
+
)
|
|
55
|
+
proc.kill()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class SandboxError(RuntimeError):
|
|
59
|
+
"""A sandbox lifecycle fault (e.g. the container could not be started)."""
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class LocalSandbox:
|
|
63
|
+
"""Subprocess execution confined to ``cwd``, with a real timeout (design §10.3)."""
|
|
64
|
+
|
|
65
|
+
def run(self, command: str, *, cwd: str, timeout: int) -> tuple[int, str, str]:
|
|
66
|
+
kwargs: dict = dict(
|
|
67
|
+
shell=True,
|
|
68
|
+
cwd=cwd,
|
|
69
|
+
env=_scrubbed_env(),
|
|
70
|
+
stdout=subprocess.PIPE,
|
|
71
|
+
stderr=subprocess.PIPE,
|
|
72
|
+
text=True,
|
|
73
|
+
)
|
|
74
|
+
# Isolate the process group/tree so _kill_tree can stop it on timeout.
|
|
75
|
+
if os.name == "posix":
|
|
76
|
+
kwargs["start_new_session"] = True
|
|
77
|
+
else:
|
|
78
|
+
kwargs["creationflags"] = subprocess.CREATE_NEW_PROCESS_GROUP
|
|
79
|
+
|
|
80
|
+
proc = subprocess.Popen(command, **kwargs)
|
|
81
|
+
try:
|
|
82
|
+
out, err = proc.communicate(timeout=timeout)
|
|
83
|
+
return proc.returncode, out, err
|
|
84
|
+
except subprocess.TimeoutExpired:
|
|
85
|
+
_kill_tree(proc)
|
|
86
|
+
out, err = proc.communicate() # drain pipes after the tree is dead
|
|
87
|
+
return 124, out or "", (err or "") + f"\n[timed out after {timeout}s]"
|
|
88
|
+
|
|
89
|
+
def close(self) -> None:
|
|
90
|
+
"""No persistent resources to release (kept for the Sandbox protocol)."""
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _subprocess_runner(argv: list[str], timeout: int) -> tuple[int, str, str]:
|
|
94
|
+
"""Default ``CommandRunner``: run an argv list, capture output, honor timeout."""
|
|
95
|
+
try:
|
|
96
|
+
proc = subprocess.run(
|
|
97
|
+
argv, capture_output=True, text=True, timeout=timeout
|
|
98
|
+
)
|
|
99
|
+
return proc.returncode, proc.stdout, proc.stderr
|
|
100
|
+
except subprocess.TimeoutExpired as exc:
|
|
101
|
+
out = exc.stdout or ""
|
|
102
|
+
err = exc.stderr or ""
|
|
103
|
+
if isinstance(out, bytes):
|
|
104
|
+
out = out.decode("utf-8", "replace")
|
|
105
|
+
if isinstance(err, bytes):
|
|
106
|
+
err = err.decode("utf-8", "replace")
|
|
107
|
+
return 124, out, err + "\n[timed out]"
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class DockerSandbox:
|
|
111
|
+
"""Runs commands inside one long-lived container per project (design §10.3).
|
|
112
|
+
|
|
113
|
+
The host working directory is bind-mounted at ``workdir`` inside the
|
|
114
|
+
container; the container starts lazily on the first ``run`` and is removed by
|
|
115
|
+
``close``. By default it has no network and bounded memory/CPU/PIDs, so a
|
|
116
|
+
command cannot reach the host filesystem, the network, or exhaust resources —
|
|
117
|
+
the isolation ``LocalSandbox`` lacks.
|
|
118
|
+
|
|
119
|
+
The Docker CLI is invoked through an injectable ``runner`` so the argv
|
|
120
|
+
construction and lifecycle are unit-testable without a Docker daemon.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
def __init__(
|
|
124
|
+
self,
|
|
125
|
+
working_dir: str = ".",
|
|
126
|
+
*,
|
|
127
|
+
image: str = "python:3.12-slim",
|
|
128
|
+
network: str = "none",
|
|
129
|
+
memory: str = "512m",
|
|
130
|
+
cpus: str = "1.0",
|
|
131
|
+
pids_limit: int = 256,
|
|
132
|
+
workdir: str = "/workspace",
|
|
133
|
+
container_name: str | None = None,
|
|
134
|
+
runner: CommandRunner | None = None,
|
|
135
|
+
) -> None:
|
|
136
|
+
self._host_dir = str(Path(working_dir).resolve())
|
|
137
|
+
self._image = image
|
|
138
|
+
self._network = network
|
|
139
|
+
self._memory = memory
|
|
140
|
+
self._cpus = cpus
|
|
141
|
+
self._pids_limit = pids_limit
|
|
142
|
+
self._workdir = workdir
|
|
143
|
+
self._name = container_name or f"agentkernel-{uuid.uuid4().hex[:12]}"
|
|
144
|
+
self._run = runner or _subprocess_runner
|
|
145
|
+
self._started = False
|
|
146
|
+
|
|
147
|
+
# --- docker argv construction (pure; the unit-test surface) ------------
|
|
148
|
+
|
|
149
|
+
def _start_args(self) -> list[str]:
|
|
150
|
+
return [
|
|
151
|
+
"docker", "run", "-d", "--rm", "--name", self._name,
|
|
152
|
+
"--network", self._network,
|
|
153
|
+
"--memory", self._memory,
|
|
154
|
+
"--cpus", self._cpus,
|
|
155
|
+
"--pids-limit", str(self._pids_limit),
|
|
156
|
+
"--security-opt", "no-new-privileges",
|
|
157
|
+
"-v", f"{self._host_dir}:{self._workdir}",
|
|
158
|
+
"-w", self._workdir,
|
|
159
|
+
self._image, "sleep", "infinity",
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
def _exec_args(self, command: str, timeout: int) -> list[str]:
|
|
163
|
+
# Wrap in the container's `timeout` so the in-container process is
|
|
164
|
+
# actually killed, not just the host-side `docker exec`.
|
|
165
|
+
inner = f"timeout {timeout} sh -c {shlex.quote(command)}"
|
|
166
|
+
return ["docker", "exec", "-w", self._workdir, self._name, "sh", "-c", inner]
|
|
167
|
+
|
|
168
|
+
# --- lifecycle ---------------------------------------------------------
|
|
169
|
+
|
|
170
|
+
def start(self) -> None:
|
|
171
|
+
if self._started:
|
|
172
|
+
return
|
|
173
|
+
code, out, err = self._run(self._start_args(), 120)
|
|
174
|
+
if code != 0:
|
|
175
|
+
raise SandboxError(f"could not start container: {(err or out).strip()}")
|
|
176
|
+
self._started = True
|
|
177
|
+
|
|
178
|
+
def run(self, command: str, *, cwd: str, timeout: int) -> tuple[int, str, str]:
|
|
179
|
+
try:
|
|
180
|
+
self.start()
|
|
181
|
+
except (SandboxError, FileNotFoundError, OSError) as exc:
|
|
182
|
+
# Surface as a non-zero result so bash turns it into an error result
|
|
183
|
+
# the model can react to (design §8.3) rather than crashing the loop.
|
|
184
|
+
return 127, "", f"docker sandbox unavailable: {exc}"
|
|
185
|
+
# Allow a little host-side slack beyond the in-container timeout.
|
|
186
|
+
return self._run(self._exec_args(command, timeout), timeout + 5)
|
|
187
|
+
|
|
188
|
+
def close(self) -> None:
|
|
189
|
+
if not self._started:
|
|
190
|
+
return
|
|
191
|
+
self._run(["docker", "rm", "-f", self._name], 30)
|
|
192
|
+
self._started = False
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def make_sandbox(
|
|
196
|
+
sandbox: str, working_dir: str, *, image: str = "python:3.12-slim", network: str = "none"
|
|
197
|
+
):
|
|
198
|
+
"""Build the configured sandbox. ``"docker"`` -> isolated container; else local."""
|
|
199
|
+
if sandbox == "docker":
|
|
200
|
+
return DockerSandbox(working_dir, image=image, network=network)
|
|
201
|
+
return LocalSandbox()
|
agentkernel/budget.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Run-level budget guardrails (design §11 extension).
|
|
2
|
+
|
|
3
|
+
A ``BudgetGuard`` tracks cumulative usage for one ``Agent.run`` call and returns a
|
|
4
|
+
stop reason if the configured cost or token ceiling is exceeded. It is re-set at
|
|
5
|
+
the start of every run so the guard is per-run, not global across a REPL session.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
|
|
12
|
+
from agentkernel.telemetry import DEFAULT_PRICES, Price, estimate_cost
|
|
13
|
+
from agentkernel.types import Usage
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class BudgetGuard:
|
|
18
|
+
"""Simple guardrail against runaway spend or token usage.
|
|
19
|
+
|
|
20
|
+
``max_cost_usd`` and ``max_input_tokens`` are checked after each provider
|
|
21
|
+
completion. If a limit is exceeded, the loop stops before executing any
|
|
22
|
+
further tool calls; a final-answer turn is still returned because its tokens
|
|
23
|
+
have already been spent.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
max_cost_usd: float | None = None
|
|
27
|
+
max_input_tokens: int | None = None
|
|
28
|
+
model: str = "unknown"
|
|
29
|
+
prices: dict[str, Price] = field(default_factory=lambda: DEFAULT_PRICES.copy())
|
|
30
|
+
_total: Usage = field(default_factory=Usage)
|
|
31
|
+
|
|
32
|
+
def reset(self) -> None:
|
|
33
|
+
self._total = Usage()
|
|
34
|
+
|
|
35
|
+
def add(self, usage: Usage) -> None:
|
|
36
|
+
self._total.input_tokens += usage.input_tokens
|
|
37
|
+
self._total.output_tokens += usage.output_tokens
|
|
38
|
+
self._total.cache_read_tokens += usage.cache_read_tokens
|
|
39
|
+
self._total.cache_write_tokens += usage.cache_write_tokens
|
|
40
|
+
|
|
41
|
+
def exceeded(self) -> tuple[bool, str]:
|
|
42
|
+
"""Return ``(True, reason)`` if a budget has been exceeded."""
|
|
43
|
+
if self.max_input_tokens is not None and self._total.input_tokens > self.max_input_tokens:
|
|
44
|
+
return True, (
|
|
45
|
+
f"input_tokens {self._total.input_tokens:n} > limit {self.max_input_tokens:n}"
|
|
46
|
+
)
|
|
47
|
+
if self.max_cost_usd is not None:
|
|
48
|
+
cost = estimate_cost(self.model, self._total, self.prices)
|
|
49
|
+
if cost is not None and cost > self.max_cost_usd:
|
|
50
|
+
return True, f"cost ${cost:.6f} > limit ${self.max_cost_usd:.2f}"
|
|
51
|
+
return False, ""
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def total_usage(self) -> Usage:
|
|
55
|
+
return Usage(
|
|
56
|
+
input_tokens=self._total.input_tokens,
|
|
57
|
+
output_tokens=self._total.output_tokens,
|
|
58
|
+
cache_read_tokens=self._total.cache_read_tokens,
|
|
59
|
+
cache_write_tokens=self._total.cache_write_tokens,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def total_cost(self) -> float | None:
|
|
64
|
+
return estimate_cost(self.model, self._total, self.prices)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Filesystem checkpoints (design §18.1).
|
|
2
|
+
|
|
3
|
+
When enabled, the builtin file tools record a file's contents *before* they first
|
|
4
|
+
modify it this session. The ``rollback`` tool then restores every recorded file
|
|
5
|
+
to that original state — undoing the agent's edits in one step, including files it
|
|
6
|
+
created (which are deleted on rollback). This makes a destructive run reversible
|
|
7
|
+
without trusting the model to clean up after itself.
|
|
8
|
+
|
|
9
|
+
Backups are held per session (in memory); the first time a path is touched its
|
|
10
|
+
original bytes are captured, so repeated edits to the same file still roll back to
|
|
11
|
+
the pre-run state, not the previous edit.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Checkpointer:
|
|
20
|
+
"""Records pre-modification file state and restores it on rollback."""
|
|
21
|
+
|
|
22
|
+
def __init__(self) -> None:
|
|
23
|
+
# path -> original bytes, or None if the file did not exist yet.
|
|
24
|
+
self._original: dict[Path, bytes | None] = {}
|
|
25
|
+
|
|
26
|
+
def record(self, path: Path) -> None:
|
|
27
|
+
"""Capture ``path``'s current state, once, before it is first modified."""
|
|
28
|
+
key = path.resolve()
|
|
29
|
+
if key in self._original:
|
|
30
|
+
return # already captured the pre-run state; keep the earliest
|
|
31
|
+
self._original[key] = key.read_bytes() if key.is_file() else None
|
|
32
|
+
|
|
33
|
+
def pending(self) -> int:
|
|
34
|
+
"""How many files have a recorded checkpoint."""
|
|
35
|
+
return len(self._original)
|
|
36
|
+
|
|
37
|
+
def rollback(self) -> int:
|
|
38
|
+
"""Restore every recorded file to its captured state. Returns the count."""
|
|
39
|
+
restored = 0
|
|
40
|
+
for path, content in self._original.items():
|
|
41
|
+
if content is None:
|
|
42
|
+
# The file did not exist at checkpoint time → remove it.
|
|
43
|
+
if path.is_file():
|
|
44
|
+
path.unlink()
|
|
45
|
+
restored += 1
|
|
46
|
+
else:
|
|
47
|
+
path.write_bytes(content)
|
|
48
|
+
restored += 1
|
|
49
|
+
self._original.clear()
|
|
50
|
+
return restored
|