llmcode-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_code/__init__.py +2 -0
- llm_code/analysis/__init__.py +6 -0
- llm_code/analysis/cache.py +33 -0
- llm_code/analysis/engine.py +256 -0
- llm_code/analysis/go_rules.py +114 -0
- llm_code/analysis/js_rules.py +84 -0
- llm_code/analysis/python_rules.py +311 -0
- llm_code/analysis/rules.py +140 -0
- llm_code/analysis/rust_rules.py +108 -0
- llm_code/analysis/universal_rules.py +111 -0
- llm_code/api/__init__.py +0 -0
- llm_code/api/client.py +90 -0
- llm_code/api/errors.py +73 -0
- llm_code/api/openai_compat.py +390 -0
- llm_code/api/provider.py +35 -0
- llm_code/api/sse.py +52 -0
- llm_code/api/types.py +140 -0
- llm_code/cli/__init__.py +0 -0
- llm_code/cli/commands.py +70 -0
- llm_code/cli/image.py +122 -0
- llm_code/cli/render.py +214 -0
- llm_code/cli/status_line.py +79 -0
- llm_code/cli/streaming.py +92 -0
- llm_code/cli/tui_main.py +220 -0
- llm_code/computer_use/__init__.py +11 -0
- llm_code/computer_use/app_detect.py +49 -0
- llm_code/computer_use/app_tier.py +57 -0
- llm_code/computer_use/coordinator.py +99 -0
- llm_code/computer_use/input_control.py +71 -0
- llm_code/computer_use/screenshot.py +93 -0
- llm_code/cron/__init__.py +13 -0
- llm_code/cron/parser.py +145 -0
- llm_code/cron/scheduler.py +135 -0
- llm_code/cron/storage.py +126 -0
- llm_code/enterprise/__init__.py +1 -0
- llm_code/enterprise/audit.py +59 -0
- llm_code/enterprise/auth.py +26 -0
- llm_code/enterprise/oidc.py +95 -0
- llm_code/enterprise/rbac.py +65 -0
- llm_code/harness/__init__.py +5 -0
- llm_code/harness/config.py +33 -0
- llm_code/harness/engine.py +129 -0
- llm_code/harness/guides.py +41 -0
- llm_code/harness/sensors.py +68 -0
- llm_code/harness/templates.py +84 -0
- llm_code/hida/__init__.py +1 -0
- llm_code/hida/classifier.py +187 -0
- llm_code/hida/engine.py +49 -0
- llm_code/hida/profiles.py +95 -0
- llm_code/hida/types.py +28 -0
- llm_code/ide/__init__.py +1 -0
- llm_code/ide/bridge.py +80 -0
- llm_code/ide/detector.py +76 -0
- llm_code/ide/server.py +169 -0
- llm_code/logging.py +29 -0
- llm_code/lsp/__init__.py +0 -0
- llm_code/lsp/client.py +298 -0
- llm_code/lsp/detector.py +42 -0
- llm_code/lsp/manager.py +56 -0
- llm_code/lsp/tools.py +288 -0
- llm_code/marketplace/__init__.py +0 -0
- llm_code/marketplace/builtin_registry.py +102 -0
- llm_code/marketplace/installer.py +162 -0
- llm_code/marketplace/plugin.py +78 -0
- llm_code/marketplace/registry.py +360 -0
- llm_code/mcp/__init__.py +0 -0
- llm_code/mcp/bridge.py +87 -0
- llm_code/mcp/client.py +117 -0
- llm_code/mcp/health.py +120 -0
- llm_code/mcp/manager.py +214 -0
- llm_code/mcp/oauth.py +219 -0
- llm_code/mcp/transport.py +254 -0
- llm_code/mcp/types.py +53 -0
- llm_code/remote/__init__.py +0 -0
- llm_code/remote/client.py +136 -0
- llm_code/remote/protocol.py +22 -0
- llm_code/remote/server.py +275 -0
- llm_code/remote/ssh_proxy.py +56 -0
- llm_code/runtime/__init__.py +0 -0
- llm_code/runtime/auto_commit.py +56 -0
- llm_code/runtime/auto_diagnose.py +62 -0
- llm_code/runtime/checkpoint.py +70 -0
- llm_code/runtime/checkpoint_recovery.py +142 -0
- llm_code/runtime/compaction.py +35 -0
- llm_code/runtime/compressor.py +415 -0
- llm_code/runtime/config.py +533 -0
- llm_code/runtime/context.py +49 -0
- llm_code/runtime/conversation.py +921 -0
- llm_code/runtime/cost_tracker.py +126 -0
- llm_code/runtime/dream.py +127 -0
- llm_code/runtime/file_protection.py +150 -0
- llm_code/runtime/hardware.py +85 -0
- llm_code/runtime/hooks.py +223 -0
- llm_code/runtime/indexer.py +230 -0
- llm_code/runtime/knowledge_compiler.py +232 -0
- llm_code/runtime/memory.py +132 -0
- llm_code/runtime/memory_layers.py +467 -0
- llm_code/runtime/memory_lint.py +252 -0
- llm_code/runtime/model_aliases.py +37 -0
- llm_code/runtime/ollama.py +93 -0
- llm_code/runtime/overlay.py +124 -0
- llm_code/runtime/permissions.py +200 -0
- llm_code/runtime/plan.py +45 -0
- llm_code/runtime/prompt.py +238 -0
- llm_code/runtime/repo_map.py +174 -0
- llm_code/runtime/sandbox.py +116 -0
- llm_code/runtime/session.py +268 -0
- llm_code/runtime/skill_resolver.py +61 -0
- llm_code/runtime/skills.py +133 -0
- llm_code/runtime/speculative.py +75 -0
- llm_code/runtime/streaming_executor.py +216 -0
- llm_code/runtime/telemetry.py +196 -0
- llm_code/runtime/token_budget.py +26 -0
- llm_code/runtime/vcr.py +142 -0
- llm_code/runtime/vision.py +102 -0
- llm_code/swarm/__init__.py +1 -0
- llm_code/swarm/backend_subprocess.py +108 -0
- llm_code/swarm/backend_tmux.py +103 -0
- llm_code/swarm/backend_worktree.py +306 -0
- llm_code/swarm/checkpoint.py +74 -0
- llm_code/swarm/coordinator.py +236 -0
- llm_code/swarm/mailbox.py +88 -0
- llm_code/swarm/manager.py +202 -0
- llm_code/swarm/memory_sync.py +80 -0
- llm_code/swarm/recovery.py +21 -0
- llm_code/swarm/team.py +67 -0
- llm_code/swarm/types.py +31 -0
- llm_code/task/__init__.py +16 -0
- llm_code/task/diagnostics.py +93 -0
- llm_code/task/manager.py +162 -0
- llm_code/task/types.py +112 -0
- llm_code/task/verifier.py +104 -0
- llm_code/tools/__init__.py +0 -0
- llm_code/tools/agent.py +145 -0
- llm_code/tools/agent_roles.py +82 -0
- llm_code/tools/base.py +94 -0
- llm_code/tools/bash.py +565 -0
- llm_code/tools/computer_use_tools.py +278 -0
- llm_code/tools/coordinator_tool.py +75 -0
- llm_code/tools/cron_create.py +90 -0
- llm_code/tools/cron_delete.py +49 -0
- llm_code/tools/cron_list.py +51 -0
- llm_code/tools/deferred.py +92 -0
- llm_code/tools/dump.py +116 -0
- llm_code/tools/edit_file.py +282 -0
- llm_code/tools/git_tools.py +531 -0
- llm_code/tools/glob_search.py +112 -0
- llm_code/tools/grep_search.py +144 -0
- llm_code/tools/ide_diagnostics.py +59 -0
- llm_code/tools/ide_open.py +58 -0
- llm_code/tools/ide_selection.py +52 -0
- llm_code/tools/memory_tools.py +138 -0
- llm_code/tools/multi_edit.py +143 -0
- llm_code/tools/notebook_edit.py +107 -0
- llm_code/tools/notebook_read.py +81 -0
- llm_code/tools/parsing.py +63 -0
- llm_code/tools/read_file.py +154 -0
- llm_code/tools/registry.py +58 -0
- llm_code/tools/search_backends/__init__.py +56 -0
- llm_code/tools/search_backends/brave.py +56 -0
- llm_code/tools/search_backends/duckduckgo.py +129 -0
- llm_code/tools/search_backends/searxng.py +71 -0
- llm_code/tools/search_backends/tavily.py +73 -0
- llm_code/tools/swarm_create.py +109 -0
- llm_code/tools/swarm_delete.py +95 -0
- llm_code/tools/swarm_list.py +44 -0
- llm_code/tools/swarm_message.py +109 -0
- llm_code/tools/task_close.py +79 -0
- llm_code/tools/task_plan.py +79 -0
- llm_code/tools/task_verify.py +90 -0
- llm_code/tools/tool_search.py +65 -0
- llm_code/tools/web_common.py +258 -0
- llm_code/tools/web_fetch.py +223 -0
- llm_code/tools/web_search.py +280 -0
- llm_code/tools/write_file.py +118 -0
- llm_code/tui/__init__.py +1 -0
- llm_code/tui/app.py +2432 -0
- llm_code/tui/chat_view.py +82 -0
- llm_code/tui/chat_widgets.py +309 -0
- llm_code/tui/header_bar.py +46 -0
- llm_code/tui/input_bar.py +349 -0
- llm_code/tui/keybindings.py +142 -0
- llm_code/tui/marketplace.py +210 -0
- llm_code/tui/status_bar.py +72 -0
- llm_code/tui/theme.py +96 -0
- llm_code/utils/__init__.py +0 -0
- llm_code/utils/diff.py +111 -0
- llm_code/utils/errors.py +70 -0
- llm_code/utils/hyperlink.py +73 -0
- llm_code/utils/notebook.py +179 -0
- llm_code/utils/search.py +69 -0
- llm_code/utils/text_normalize.py +28 -0
- llm_code/utils/version_check.py +62 -0
- llm_code/vim/__init__.py +4 -0
- llm_code/vim/engine.py +51 -0
- llm_code/vim/motions.py +172 -0
- llm_code/vim/operators.py +183 -0
- llm_code/vim/text_objects.py +139 -0
- llm_code/vim/transitions.py +279 -0
- llm_code/vim/types.py +68 -0
- llm_code/voice/__init__.py +1 -0
- llm_code/voice/languages.py +43 -0
- llm_code/voice/recorder.py +136 -0
- llm_code/voice/stt.py +36 -0
- llm_code/voice/stt_anthropic.py +66 -0
- llm_code/voice/stt_google.py +32 -0
- llm_code/voice/stt_whisper.py +52 -0
- llmcode_cli-1.0.0.dist-info/METADATA +524 -0
- llmcode_cli-1.0.0.dist-info/RECORD +212 -0
- llmcode_cli-1.0.0.dist-info/WHEEL +4 -0
- llmcode_cli-1.0.0.dist-info/entry_points.txt +2 -0
- llmcode_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
llm_code/swarm/types.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Frozen dataclasses for swarm member state and messages."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from enum import Enum
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SwarmStatus(Enum):
|
|
9
|
+
STARTING = "starting"
|
|
10
|
+
RUNNING = "running"
|
|
11
|
+
STOPPED = "stopped"
|
|
12
|
+
FAILED = "failed"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class SwarmMember:
|
|
17
|
+
id: str
|
|
18
|
+
role: str
|
|
19
|
+
task: str
|
|
20
|
+
backend: str # "tmux" | "subprocess"
|
|
21
|
+
pid: int | None
|
|
22
|
+
status: SwarmStatus
|
|
23
|
+
model: str = ""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class SwarmMessage:
|
|
28
|
+
from_id: str
|
|
29
|
+
to_id: str # member id, or "*" for broadcast
|
|
30
|
+
text: str
|
|
31
|
+
timestamp: str # ISO-8601
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Structured task lifecycle management."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from llm_code.task.types import (
|
|
5
|
+
CheckResult,
|
|
6
|
+
TaskState,
|
|
7
|
+
TaskStatus,
|
|
8
|
+
VerifyResult,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"CheckResult",
|
|
13
|
+
"TaskState",
|
|
14
|
+
"TaskStatus",
|
|
15
|
+
"VerifyResult",
|
|
16
|
+
]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Diagnostics engine: analyze verification failures and recommend actions."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import dataclasses
|
|
5
|
+
import json
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from llm_code.task.types import TaskState, VerifyResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclasses.dataclass(frozen=True)
|
|
13
|
+
class DiagnosticReport:
|
|
14
|
+
task_id: str
|
|
15
|
+
failed_checks: tuple[str, ...]
|
|
16
|
+
recommendation: str # "continue" | "replan" | "escalate"
|
|
17
|
+
summary: str
|
|
18
|
+
report_path: str
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DiagnosticsEngine:
|
|
22
|
+
"""Analyze verify failures and recommend next action."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, diagnostics_dir: Path) -> None:
|
|
25
|
+
self._dir = diagnostics_dir
|
|
26
|
+
self._dir.mkdir(parents=True, exist_ok=True)
|
|
27
|
+
|
|
28
|
+
def analyze(self, task: TaskState, verify_result: VerifyResult) -> DiagnosticReport:
|
|
29
|
+
"""Analyze a VerifyResult and return a DiagnosticReport with recommendation."""
|
|
30
|
+
if verify_result.all_passed:
|
|
31
|
+
return DiagnosticReport(
|
|
32
|
+
task_id=task.id,
|
|
33
|
+
failed_checks=(),
|
|
34
|
+
recommendation="continue",
|
|
35
|
+
summary="All checks passed.",
|
|
36
|
+
report_path="",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
failed = tuple(c.check_name for c in verify_result.checks if not c.passed)
|
|
40
|
+
total_checks = len(verify_result.checks)
|
|
41
|
+
failed_count = len(failed)
|
|
42
|
+
|
|
43
|
+
# Determine recommendation based on failure severity and history
|
|
44
|
+
prior_failures = sum(
|
|
45
|
+
1 for vr in task.verify_results if not vr.all_passed
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if prior_failures >= 2:
|
|
49
|
+
# Multiple prior failures -> escalate
|
|
50
|
+
recommendation = "escalate"
|
|
51
|
+
summary = (
|
|
52
|
+
f"Task has failed verification {prior_failures + 1} times. "
|
|
53
|
+
f"Current failures: {', '.join(failed)}. Recommend escalation."
|
|
54
|
+
)
|
|
55
|
+
elif failed_count == total_checks:
|
|
56
|
+
# All checks failed -> escalate
|
|
57
|
+
recommendation = "escalate"
|
|
58
|
+
summary = (
|
|
59
|
+
f"All {total_checks} checks failed ({', '.join(failed)}). "
|
|
60
|
+
"Recommend escalation."
|
|
61
|
+
)
|
|
62
|
+
else:
|
|
63
|
+
# Partial failure -> replan
|
|
64
|
+
recommendation = "replan"
|
|
65
|
+
summary = (
|
|
66
|
+
f"{failed_count}/{total_checks} checks failed ({', '.join(failed)}). "
|
|
67
|
+
"Recommend replanning the failing areas."
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Save report to disk
|
|
71
|
+
report_data = {
|
|
72
|
+
"task_id": task.id,
|
|
73
|
+
"task_title": task.title,
|
|
74
|
+
"failed_checks": list(failed),
|
|
75
|
+
"recommendation": recommendation,
|
|
76
|
+
"summary": summary,
|
|
77
|
+
"check_details": [
|
|
78
|
+
{"name": c.check_name, "passed": c.passed, "output": c.output}
|
|
79
|
+
for c in verify_result.checks
|
|
80
|
+
],
|
|
81
|
+
"prior_failure_count": prior_failures,
|
|
82
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
83
|
+
}
|
|
84
|
+
report_path = self._dir / f"{task.id}-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}.json"
|
|
85
|
+
report_path.write_text(json.dumps(report_data, indent=2), encoding="utf-8")
|
|
86
|
+
|
|
87
|
+
return DiagnosticReport(
|
|
88
|
+
task_id=task.id,
|
|
89
|
+
failed_checks=failed,
|
|
90
|
+
recommendation=recommendation,
|
|
91
|
+
summary=summary,
|
|
92
|
+
report_path=str(report_path),
|
|
93
|
+
)
|
llm_code/task/manager.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Task lifecycle manager with state machine transitions and disk persistence."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import dataclasses
|
|
5
|
+
import json
|
|
6
|
+
import uuid
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from llm_code.task.types import TaskState, TaskStatus, VerifyResult, _now_iso
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Valid transitions: from_status -> set of allowed to_statuses
|
|
13
|
+
_TRANSITIONS: dict[TaskStatus, frozenset[TaskStatus]] = {
|
|
14
|
+
TaskStatus.PLAN: frozenset({TaskStatus.DO, TaskStatus.BLOCKED}),
|
|
15
|
+
TaskStatus.DO: frozenset({TaskStatus.VERIFY, TaskStatus.BLOCKED}),
|
|
16
|
+
TaskStatus.VERIFY: frozenset({TaskStatus.CLOSE, TaskStatus.DO, TaskStatus.BLOCKED}),
|
|
17
|
+
TaskStatus.CLOSE: frozenset({TaskStatus.DONE, TaskStatus.BLOCKED}),
|
|
18
|
+
TaskStatus.DONE: frozenset(), # terminal
|
|
19
|
+
TaskStatus.BLOCKED: frozenset({TaskStatus.PLAN, TaskStatus.DO, TaskStatus.VERIFY}),
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TaskLifecycleManager:
|
|
24
|
+
"""Manages task creation, state transitions, and persistence."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, task_dir: Path) -> None:
|
|
27
|
+
self._task_dir = task_dir
|
|
28
|
+
self._task_dir.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
|
|
30
|
+
def create_task(
|
|
31
|
+
self,
|
|
32
|
+
title: str,
|
|
33
|
+
plan: str = "",
|
|
34
|
+
goals: tuple[str, ...] = (),
|
|
35
|
+
session_id: str = "",
|
|
36
|
+
) -> TaskState:
|
|
37
|
+
"""Create a new task in PLAN status and persist to disk."""
|
|
38
|
+
task_id = f"task-{uuid.uuid4().hex[:8]}"
|
|
39
|
+
now = _now_iso()
|
|
40
|
+
task = TaskState(
|
|
41
|
+
id=task_id,
|
|
42
|
+
title=title,
|
|
43
|
+
status=TaskStatus.PLAN,
|
|
44
|
+
plan=plan,
|
|
45
|
+
goals=goals,
|
|
46
|
+
created_at=now,
|
|
47
|
+
updated_at=now,
|
|
48
|
+
session_id=session_id,
|
|
49
|
+
)
|
|
50
|
+
self._save(task)
|
|
51
|
+
return task
|
|
52
|
+
|
|
53
|
+
def transition(self, task_id: str, to_status: TaskStatus) -> TaskState:
|
|
54
|
+
"""Transition a task to a new status, validating the state machine."""
|
|
55
|
+
task = self._load(task_id)
|
|
56
|
+
if task is None:
|
|
57
|
+
raise KeyError(f"Task not found: {task_id}")
|
|
58
|
+
|
|
59
|
+
allowed = _TRANSITIONS.get(task.status, frozenset())
|
|
60
|
+
if to_status not in allowed:
|
|
61
|
+
raise ValueError(
|
|
62
|
+
f"Invalid transition: {task.status.value} -> {to_status.value}. "
|
|
63
|
+
f"Allowed: {', '.join(s.value for s in allowed)}"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
updated = dataclasses.replace(task, status=to_status, updated_at=_now_iso())
|
|
67
|
+
self._save(updated)
|
|
68
|
+
return updated
|
|
69
|
+
|
|
70
|
+
def get_task(self, task_id: str) -> TaskState | None:
|
|
71
|
+
"""Get a task by ID, or None if not found."""
|
|
72
|
+
return self._load(task_id)
|
|
73
|
+
|
|
74
|
+
def list_tasks(
|
|
75
|
+
self,
|
|
76
|
+
status: TaskStatus | None = None,
|
|
77
|
+
exclude_done: bool = False,
|
|
78
|
+
) -> tuple[TaskState, ...]:
|
|
79
|
+
"""List all tasks, optionally filtered by status."""
|
|
80
|
+
tasks: list[TaskState] = []
|
|
81
|
+
for path in sorted(self._task_dir.glob("task-*.json")):
|
|
82
|
+
try:
|
|
83
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
84
|
+
task = TaskState.from_dict(data)
|
|
85
|
+
if status is not None and task.status != status:
|
|
86
|
+
continue
|
|
87
|
+
if exclude_done and task.status == TaskStatus.DONE:
|
|
88
|
+
continue
|
|
89
|
+
tasks.append(task)
|
|
90
|
+
except (json.JSONDecodeError, KeyError, ValueError):
|
|
91
|
+
continue
|
|
92
|
+
return tuple(tasks)
|
|
93
|
+
|
|
94
|
+
def update_task(self, task_id: str, **kwargs) -> TaskState:
|
|
95
|
+
"""Update arbitrary fields on a task (immutable replace)."""
|
|
96
|
+
task = self._load(task_id)
|
|
97
|
+
if task is None:
|
|
98
|
+
raise KeyError(f"Task not found: {task_id}")
|
|
99
|
+
# Convert list values to tuples for frozen dataclass compatibility
|
|
100
|
+
clean_kwargs: dict = {}
|
|
101
|
+
for k, v in kwargs.items():
|
|
102
|
+
if isinstance(v, list):
|
|
103
|
+
clean_kwargs[k] = tuple(v)
|
|
104
|
+
else:
|
|
105
|
+
clean_kwargs[k] = v
|
|
106
|
+
clean_kwargs["updated_at"] = _now_iso()
|
|
107
|
+
updated = dataclasses.replace(task, **clean_kwargs)
|
|
108
|
+
self._save(updated)
|
|
109
|
+
return updated
|
|
110
|
+
|
|
111
|
+
def append_verify_result(self, task_id: str, result: VerifyResult) -> TaskState:
|
|
112
|
+
"""Append a VerifyResult to a task's verify_results tuple."""
|
|
113
|
+
task = self._load(task_id)
|
|
114
|
+
if task is None:
|
|
115
|
+
raise KeyError(f"Task not found: {task_id}")
|
|
116
|
+
updated = dataclasses.replace(
|
|
117
|
+
task,
|
|
118
|
+
verify_results=task.verify_results + (result,),
|
|
119
|
+
updated_at=_now_iso(),
|
|
120
|
+
)
|
|
121
|
+
self._save(updated)
|
|
122
|
+
return updated
|
|
123
|
+
|
|
124
|
+
def _save(self, task: TaskState) -> None:
|
|
125
|
+
path = self._task_dir / f"{task.id}.json"
|
|
126
|
+
path.write_text(json.dumps(task.to_dict(), indent=2), encoding="utf-8")
|
|
127
|
+
|
|
128
|
+
def _load(self, task_id: str) -> TaskState | None:
|
|
129
|
+
path = self._task_dir / f"{task_id}.json"
|
|
130
|
+
if not path.exists():
|
|
131
|
+
return None
|
|
132
|
+
try:
|
|
133
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
134
|
+
return TaskState.from_dict(data)
|
|
135
|
+
except (json.JSONDecodeError, KeyError, ValueError):
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def build_incomplete_tasks_prompt(manager: TaskLifecycleManager) -> str:
|
|
140
|
+
"""Build a system prompt section listing incomplete tasks from prior sessions."""
|
|
141
|
+
tasks = manager.list_tasks(exclude_done=True)
|
|
142
|
+
if not tasks:
|
|
143
|
+
return ""
|
|
144
|
+
|
|
145
|
+
lines = [
|
|
146
|
+
"## Incomplete Tasks (from prior sessions)",
|
|
147
|
+
"",
|
|
148
|
+
"The following tasks are still in progress. Resume or address them:",
|
|
149
|
+
"",
|
|
150
|
+
]
|
|
151
|
+
for task in tasks:
|
|
152
|
+
lines.append(f"- **{task.id}** [{task.status.value}]: {task.title}")
|
|
153
|
+
if task.plan:
|
|
154
|
+
plan_preview = task.plan[:200].replace("\n", " ")
|
|
155
|
+
lines.append(f" Plan: {plan_preview}")
|
|
156
|
+
if task.goals:
|
|
157
|
+
lines.append(f" Goals: {', '.join(task.goals)}")
|
|
158
|
+
if task.files_modified:
|
|
159
|
+
lines.append(f" Files: {', '.join(task.files_modified)}")
|
|
160
|
+
lines.append("")
|
|
161
|
+
|
|
162
|
+
return "\n".join(lines)
|
llm_code/task/types.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Frozen dataclasses for the task lifecycle."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import dataclasses
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from enum import Enum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TaskStatus(Enum):
|
|
10
|
+
PLAN = "plan"
|
|
11
|
+
DO = "do"
|
|
12
|
+
VERIFY = "verify"
|
|
13
|
+
CLOSE = "close"
|
|
14
|
+
DONE = "done"
|
|
15
|
+
BLOCKED = "blocked"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclasses.dataclass(frozen=True)
|
|
19
|
+
class CheckResult:
|
|
20
|
+
check_name: str
|
|
21
|
+
passed: bool
|
|
22
|
+
output: str = ""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclasses.dataclass(frozen=True)
|
|
26
|
+
class VerifyResult:
|
|
27
|
+
task_id: str
|
|
28
|
+
all_passed: bool
|
|
29
|
+
checks: tuple[CheckResult, ...] = ()
|
|
30
|
+
llm_judgment: str = ""
|
|
31
|
+
recommended_action: str = "continue" # "continue" | "replan" | "escalate"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _now_iso() -> str:
|
|
35
|
+
return datetime.now(timezone.utc).isoformat()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclasses.dataclass(frozen=True)
|
|
39
|
+
class TaskState:
|
|
40
|
+
id: str
|
|
41
|
+
title: str
|
|
42
|
+
status: TaskStatus = TaskStatus.PLAN
|
|
43
|
+
plan: str = ""
|
|
44
|
+
goals: tuple[str, ...] = ()
|
|
45
|
+
files_modified: tuple[str, ...] = ()
|
|
46
|
+
verify_results: tuple[VerifyResult, ...] = ()
|
|
47
|
+
diagnostic_path: str = ""
|
|
48
|
+
created_at: str = dataclasses.field(default_factory=_now_iso)
|
|
49
|
+
updated_at: str = dataclasses.field(default_factory=_now_iso)
|
|
50
|
+
session_id: str = ""
|
|
51
|
+
|
|
52
|
+
def to_dict(self) -> dict:
|
|
53
|
+
"""Serialize to a JSON-compatible dict."""
|
|
54
|
+
return {
|
|
55
|
+
"id": self.id,
|
|
56
|
+
"title": self.title,
|
|
57
|
+
"status": self.status.value,
|
|
58
|
+
"plan": self.plan,
|
|
59
|
+
"goals": list(self.goals),
|
|
60
|
+
"files_modified": list(self.files_modified),
|
|
61
|
+
"verify_results": [
|
|
62
|
+
{
|
|
63
|
+
"task_id": vr.task_id,
|
|
64
|
+
"all_passed": vr.all_passed,
|
|
65
|
+
"checks": [
|
|
66
|
+
{"check_name": c.check_name, "passed": c.passed, "output": c.output}
|
|
67
|
+
for c in vr.checks
|
|
68
|
+
],
|
|
69
|
+
"llm_judgment": vr.llm_judgment,
|
|
70
|
+
"recommended_action": vr.recommended_action,
|
|
71
|
+
}
|
|
72
|
+
for vr in self.verify_results
|
|
73
|
+
],
|
|
74
|
+
"diagnostic_path": self.diagnostic_path,
|
|
75
|
+
"created_at": self.created_at,
|
|
76
|
+
"updated_at": self.updated_at,
|
|
77
|
+
"session_id": self.session_id,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
@classmethod
|
|
81
|
+
def from_dict(cls, data: dict) -> TaskState:
|
|
82
|
+
"""Deserialize from a dict."""
|
|
83
|
+
verify_results = tuple(
|
|
84
|
+
VerifyResult(
|
|
85
|
+
task_id=vr["task_id"],
|
|
86
|
+
all_passed=vr["all_passed"],
|
|
87
|
+
checks=tuple(
|
|
88
|
+
CheckResult(
|
|
89
|
+
check_name=c["check_name"],
|
|
90
|
+
passed=c["passed"],
|
|
91
|
+
output=c.get("output", ""),
|
|
92
|
+
)
|
|
93
|
+
for c in vr.get("checks", [])
|
|
94
|
+
),
|
|
95
|
+
llm_judgment=vr.get("llm_judgment", ""),
|
|
96
|
+
recommended_action=vr.get("recommended_action", "continue"),
|
|
97
|
+
)
|
|
98
|
+
for vr in data.get("verify_results", [])
|
|
99
|
+
)
|
|
100
|
+
return cls(
|
|
101
|
+
id=data["id"],
|
|
102
|
+
title=data["title"],
|
|
103
|
+
status=TaskStatus(data["status"]),
|
|
104
|
+
plan=data.get("plan", ""),
|
|
105
|
+
goals=tuple(data.get("goals", [])),
|
|
106
|
+
files_modified=tuple(data.get("files_modified", [])),
|
|
107
|
+
verify_results=verify_results,
|
|
108
|
+
diagnostic_path=data.get("diagnostic_path", ""),
|
|
109
|
+
created_at=data.get("created_at", _now_iso()),
|
|
110
|
+
updated_at=data.get("updated_at", _now_iso()),
|
|
111
|
+
session_id=data.get("session_id", ""),
|
|
112
|
+
)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Verifier: run automatic checks (pytest, ruff, file_exists) for a task."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import subprocess
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from llm_code.task.types import CheckResult, TaskState, VerifyResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Verifier:
|
|
11
|
+
"""Runs automated verification checks against a task's output."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, cwd: Path, timeout: int = 120) -> None:
|
|
14
|
+
self._cwd = cwd
|
|
15
|
+
self._timeout = timeout
|
|
16
|
+
|
|
17
|
+
def verify(self, task: TaskState) -> VerifyResult:
|
|
18
|
+
"""Run all applicable checks for a task and return a VerifyResult."""
|
|
19
|
+
checks: list[CheckResult] = []
|
|
20
|
+
|
|
21
|
+
# Always run pytest and ruff
|
|
22
|
+
checks.append(self.run_check_pytest())
|
|
23
|
+
checks.append(self.run_check_ruff())
|
|
24
|
+
|
|
25
|
+
# Check that modified files exist
|
|
26
|
+
if task.files_modified:
|
|
27
|
+
checks.append(self.run_check_files_exist(task.files_modified))
|
|
28
|
+
|
|
29
|
+
all_passed = all(c.passed for c in checks)
|
|
30
|
+
|
|
31
|
+
return VerifyResult(
|
|
32
|
+
task_id=task.id,
|
|
33
|
+
all_passed=all_passed,
|
|
34
|
+
checks=tuple(checks),
|
|
35
|
+
llm_judgment="", # filled by LLM in a separate step
|
|
36
|
+
recommended_action="continue" if all_passed else "replan",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def run_check_pytest(self) -> CheckResult:
|
|
40
|
+
"""Run pytest and return a CheckResult."""
|
|
41
|
+
try:
|
|
42
|
+
result = subprocess.run(
|
|
43
|
+
["python", "-m", "pytest", "--tb=short", "-q"],
|
|
44
|
+
cwd=str(self._cwd),
|
|
45
|
+
capture_output=True,
|
|
46
|
+
text=True,
|
|
47
|
+
timeout=self._timeout,
|
|
48
|
+
)
|
|
49
|
+
return CheckResult(
|
|
50
|
+
check_name="pytest",
|
|
51
|
+
passed=result.returncode == 0,
|
|
52
|
+
output=(result.stdout + result.stderr).strip()[:2000],
|
|
53
|
+
)
|
|
54
|
+
except subprocess.TimeoutExpired:
|
|
55
|
+
return CheckResult(
|
|
56
|
+
check_name="pytest",
|
|
57
|
+
passed=False,
|
|
58
|
+
output=f"Timeout after {self._timeout}s",
|
|
59
|
+
)
|
|
60
|
+
except FileNotFoundError:
|
|
61
|
+
return CheckResult(
|
|
62
|
+
check_name="pytest",
|
|
63
|
+
passed=False,
|
|
64
|
+
output="pytest not found in PATH",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def run_check_ruff(self) -> CheckResult:
|
|
68
|
+
"""Run ruff check and return a CheckResult."""
|
|
69
|
+
try:
|
|
70
|
+
result = subprocess.run(
|
|
71
|
+
["ruff", "check", "."],
|
|
72
|
+
cwd=str(self._cwd),
|
|
73
|
+
capture_output=True,
|
|
74
|
+
text=True,
|
|
75
|
+
timeout=self._timeout,
|
|
76
|
+
)
|
|
77
|
+
return CheckResult(
|
|
78
|
+
check_name="ruff",
|
|
79
|
+
passed=result.returncode == 0,
|
|
80
|
+
output=(result.stdout + result.stderr).strip()[:2000],
|
|
81
|
+
)
|
|
82
|
+
except subprocess.TimeoutExpired:
|
|
83
|
+
return CheckResult(
|
|
84
|
+
check_name="ruff",
|
|
85
|
+
passed=False,
|
|
86
|
+
output=f"Timeout after {self._timeout}s",
|
|
87
|
+
)
|
|
88
|
+
except FileNotFoundError:
|
|
89
|
+
return CheckResult(
|
|
90
|
+
check_name="ruff",
|
|
91
|
+
passed=False,
|
|
92
|
+
output="ruff not found in PATH",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def run_check_files_exist(self, files: tuple[str, ...]) -> CheckResult:
|
|
96
|
+
"""Check that all modified files exist on disk."""
|
|
97
|
+
missing = [f for f in files if not (self._cwd / f).exists()]
|
|
98
|
+
if missing:
|
|
99
|
+
return CheckResult(
|
|
100
|
+
check_name="file_exists",
|
|
101
|
+
passed=False,
|
|
102
|
+
output=f"Missing files: {', '.join(missing)}",
|
|
103
|
+
)
|
|
104
|
+
return CheckResult(check_name="file_exists", passed=True, output="All files present")
|
|
File without changes
|
llm_code/tools/agent.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""AgentTool — spawns a sub-agent runtime to handle a delegated sub-task."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import concurrent.futures
|
|
6
|
+
import inspect
|
|
7
|
+
from typing import Callable
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
from llm_code.api.types import StreamTextDelta
|
|
12
|
+
from llm_code.tools.agent_roles import BUILT_IN_ROLES, AgentRole
|
|
13
|
+
from llm_code.tools.base import PermissionLevel, Tool, ToolResult
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AgentInput(BaseModel):
|
|
17
|
+
task: str
|
|
18
|
+
model: str = ""
|
|
19
|
+
role: str = ""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AgentTool(Tool):
|
|
23
|
+
"""Spawn a sub-agent to handle a sub-task, up to max_depth levels deep."""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
runtime_factory: Callable,
|
|
28
|
+
max_depth: int = 3,
|
|
29
|
+
current_depth: int = 0,
|
|
30
|
+
) -> None:
|
|
31
|
+
self._runtime_factory = runtime_factory
|
|
32
|
+
self._max_depth = max_depth
|
|
33
|
+
self._current_depth = current_depth
|
|
34
|
+
|
|
35
|
+
# ------------------------------------------------------------------
|
|
36
|
+
# Tool interface
|
|
37
|
+
# ------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def name(self) -> str:
|
|
41
|
+
return "agent"
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def description(self) -> str:
|
|
45
|
+
return "Spawn a sub-agent to handle a sub-task"
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def input_schema(self) -> dict:
|
|
49
|
+
return {
|
|
50
|
+
"type": "object",
|
|
51
|
+
"properties": {
|
|
52
|
+
"task": {
|
|
53
|
+
"type": "string",
|
|
54
|
+
"description": "Task for the sub-agent",
|
|
55
|
+
},
|
|
56
|
+
"model": {
|
|
57
|
+
"type": "string",
|
|
58
|
+
"description": "Optional model override",
|
|
59
|
+
},
|
|
60
|
+
"role": {
|
|
61
|
+
"type": "string",
|
|
62
|
+
"enum": ["explore", "plan", "verify"],
|
|
63
|
+
"description": "Built-in agent role with restricted tools",
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
"required": ["task"],
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def required_permission(self) -> PermissionLevel:
|
|
71
|
+
return PermissionLevel.FULL_ACCESS
|
|
72
|
+
|
|
73
|
+
def is_concurrency_safe(self, args: dict) -> bool:
|
|
74
|
+
# Each sub-agent has its own session
|
|
75
|
+
return True
|
|
76
|
+
|
|
77
|
+
# ------------------------------------------------------------------
|
|
78
|
+
# Execution
|
|
79
|
+
# ------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
def execute(self, args: dict) -> ToolResult:
|
|
82
|
+
if self._current_depth >= self._max_depth:
|
|
83
|
+
return ToolResult(
|
|
84
|
+
output=f"Max agent depth reached ({self._max_depth})",
|
|
85
|
+
is_error=True,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
task: str = args.get("task", "")
|
|
89
|
+
model: str | None = args.get("model") or None
|
|
90
|
+
role_name: str = args.get("role", "")
|
|
91
|
+
|
|
92
|
+
# Resolve role
|
|
93
|
+
role: AgentRole | None = None
|
|
94
|
+
if role_name:
|
|
95
|
+
role = BUILT_IN_ROLES.get(role_name)
|
|
96
|
+
if role is None:
|
|
97
|
+
return ToolResult(
|
|
98
|
+
output=f"Unknown role: '{role_name}'. Valid roles: {list(BUILT_IN_ROLES)}",
|
|
99
|
+
is_error=True,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Sync wrapper: works whether or not an event loop is running
|
|
103
|
+
try:
|
|
104
|
+
loop = asyncio.get_running_loop()
|
|
105
|
+
except RuntimeError:
|
|
106
|
+
loop = None
|
|
107
|
+
|
|
108
|
+
if loop and loop.is_running():
|
|
109
|
+
with concurrent.futures.ThreadPoolExecutor() as pool:
|
|
110
|
+
result = pool.submit(
|
|
111
|
+
asyncio.run, self._execute_async(task, model, role)
|
|
112
|
+
).result()
|
|
113
|
+
else:
|
|
114
|
+
result = asyncio.run(self._execute_async(task, model, role))
|
|
115
|
+
|
|
116
|
+
return result
|
|
117
|
+
|
|
118
|
+
def _call_factory(self, model: str | None, role: AgentRole | None):
|
|
119
|
+
"""Call runtime_factory, passing role= only if the factory accepts it."""
|
|
120
|
+
try:
|
|
121
|
+
sig = inspect.signature(self._runtime_factory)
|
|
122
|
+
params = sig.parameters
|
|
123
|
+
accepts_role = (
|
|
124
|
+
"role" in params
|
|
125
|
+
or any(
|
|
126
|
+
p.kind == inspect.Parameter.VAR_KEYWORD
|
|
127
|
+
for p in params.values()
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
except (ValueError, TypeError):
|
|
131
|
+
accepts_role = False
|
|
132
|
+
|
|
133
|
+
if accepts_role:
|
|
134
|
+
return self._runtime_factory(model, role=role)
|
|
135
|
+
return self._runtime_factory(model)
|
|
136
|
+
|
|
137
|
+
async def _execute_async(
|
|
138
|
+
self, task: str, model: str | None, role: AgentRole | None
|
|
139
|
+
) -> ToolResult:
|
|
140
|
+
runtime = self._call_factory(model, role)
|
|
141
|
+
collected: list[str] = []
|
|
142
|
+
async for event in runtime.run_turn(task):
|
|
143
|
+
if isinstance(event, StreamTextDelta):
|
|
144
|
+
collected.append(event.text)
|
|
145
|
+
return ToolResult(output="".join(collected) or "(no output)")
|