bareagent-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bareagent/__init__.py +10 -0
- bareagent/concurrency/__init__.py +6 -0
- bareagent/concurrency/background.py +97 -0
- bareagent/concurrency/notification.py +61 -0
- bareagent/concurrency/scheduler.py +136 -0
- bareagent/config.toml +299 -0
- bareagent/core/__init__.py +1 -0
- bareagent/core/config_paths.py +49 -0
- bareagent/core/context.py +127 -0
- bareagent/core/fileutil.py +103 -0
- bareagent/core/goal.py +214 -0
- bareagent/core/handlers/__init__.py +1 -0
- bareagent/core/handlers/bash.py +79 -0
- bareagent/core/handlers/file_edit.py +47 -0
- bareagent/core/handlers/file_read.py +270 -0
- bareagent/core/handlers/file_write.py +34 -0
- bareagent/core/handlers/glob_search.py +30 -0
- bareagent/core/handlers/goal.py +60 -0
- bareagent/core/handlers/grep_search.py +52 -0
- bareagent/core/handlers/memory.py +71 -0
- bareagent/core/handlers/plan.py +106 -0
- bareagent/core/handlers/search_utils.py +77 -0
- bareagent/core/handlers/skill.py +87 -0
- bareagent/core/handlers/subagent_send.py +70 -0
- bareagent/core/handlers/web_fetch.py +126 -0
- bareagent/core/handlers/web_search.py +165 -0
- bareagent/core/handlers/workflow.py +190 -0
- bareagent/core/loop.py +535 -0
- bareagent/core/retry.py +131 -0
- bareagent/core/sandbox.py +27 -0
- bareagent/core/schema.py +21 -0
- bareagent/core/tools.py +779 -0
- bareagent/core/workflow.py +517 -0
- bareagent/core/workflow_registry.py +219 -0
- bareagent/debug/__init__.py +0 -0
- bareagent/debug/interaction_log.py +263 -0
- bareagent/debug/viewer.html +1750 -0
- bareagent/debug/web_viewer.py +157 -0
- bareagent/hooks/__init__.py +32 -0
- bareagent/hooks/config.py +118 -0
- bareagent/hooks/engine.py +197 -0
- bareagent/hooks/errors.py +14 -0
- bareagent/hooks/events.py +22 -0
- bareagent/lsp/__init__.py +63 -0
- bareagent/lsp/config.py +134 -0
- bareagent/lsp/coord.py +118 -0
- bareagent/lsp/diagnostics.py +240 -0
- bareagent/lsp/errors.py +24 -0
- bareagent/lsp/manager.py +866 -0
- bareagent/lsp/tools.py +629 -0
- bareagent/lsp/workspace_edit.py +305 -0
- bareagent/main.py +4205 -0
- bareagent/mcp/__init__.py +69 -0
- bareagent/mcp/_sse.py +69 -0
- bareagent/mcp/client.py +341 -0
- bareagent/mcp/config.py +169 -0
- bareagent/mcp/errors.py +32 -0
- bareagent/mcp/manager.py +318 -0
- bareagent/mcp/protocol.py +187 -0
- bareagent/mcp/registry.py +557 -0
- bareagent/mcp/transport/__init__.py +15 -0
- bareagent/mcp/transport/base.py +149 -0
- bareagent/mcp/transport/http_legacy.py +192 -0
- bareagent/mcp/transport/http_streamable.py +217 -0
- bareagent/mcp/transport/stdio.py +202 -0
- bareagent/memory/__init__.py +1 -0
- bareagent/memory/compact.py +203 -0
- bareagent/memory/conversation_io.py +226 -0
- bareagent/memory/embedding.py +194 -0
- bareagent/memory/persistent.py +515 -0
- bareagent/memory/token_counter.py +67 -0
- bareagent/memory/token_tracker.py +262 -0
- bareagent/memory/transcript.py +100 -0
- bareagent/permission/__init__.py +1 -0
- bareagent/permission/guard.py +329 -0
- bareagent/permission/rules.py +19 -0
- bareagent/planning/__init__.py +19 -0
- bareagent/planning/agent_types.py +169 -0
- bareagent/planning/skill_gen.py +141 -0
- bareagent/planning/skill_store.py +173 -0
- bareagent/planning/skills.py +146 -0
- bareagent/planning/subagent.py +355 -0
- bareagent/planning/subagent_registry.py +77 -0
- bareagent/planning/tasks.py +348 -0
- bareagent/planning/todo.py +153 -0
- bareagent/planning/worktree.py +122 -0
- bareagent/provider/__init__.py +1 -0
- bareagent/provider/anthropic.py +348 -0
- bareagent/provider/base.py +136 -0
- bareagent/provider/factory.py +130 -0
- bareagent/provider/openai.py +881 -0
- bareagent/provider/presets.py +72 -0
- bareagent/provider/setup.py +356 -0
- bareagent/skills/.gitkeep +1 -0
- bareagent/skills/code-review/SKILL.md +68 -0
- bareagent/skills/git/SKILL.md +68 -0
- bareagent/skills/test/SKILL.md +70 -0
- bareagent/team/__init__.py +17 -0
- bareagent/team/autonomous.py +193 -0
- bareagent/team/mailbox.py +239 -0
- bareagent/team/manager.py +155 -0
- bareagent/team/protocols.py +129 -0
- bareagent/tracing/__init__.py +12 -0
- bareagent/tracing/_api.py +92 -0
- bareagent/tracing/_proxy.py +60 -0
- bareagent/tracing/composite.py +115 -0
- bareagent/tracing/json_file.py +115 -0
- bareagent/tracing/langfuse.py +139 -0
- bareagent/tracing/otel.py +107 -0
- bareagent/tracing/setup.py +85 -0
- bareagent/ui/__init__.py +24 -0
- bareagent/ui/console.py +167 -0
- bareagent/ui/prompt.py +78 -0
- bareagent/ui/protocol.py +24 -0
- bareagent/ui/stream.py +66 -0
- bareagent/ui/theme.py +240 -0
- bareagent_cli-0.1.0.dist-info/METADATA +331 -0
- bareagent_cli-0.1.0.dist-info/RECORD +121 -0
- bareagent_cli-0.1.0.dist-info/WHEEL +4 -0
- bareagent_cli-0.1.0.dist-info/entry_points.txt +2 -0
- bareagent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
from datetime import date
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
BASE_SYSTEM_PROMPT = "You are BareAgent, a terminal-based coding assistant."
|
|
9
|
+
|
|
10
|
+
# Injected into the system context only while the permission mode is PLAN
|
|
11
|
+
# (see ``main.py:_refresh_plan_directive``). Tells the model how the plan-mode
|
|
12
|
+
# workflow works: research read-only, then present a plan via ``exit_plan_mode``
|
|
13
|
+
# rather than blindly attempting write tools (which PLAN blocks).
|
|
14
|
+
PLAN_MODE_DIRECTIVE = (
|
|
15
|
+
"You are in PLAN mode. Investigate and design only -- do NOT modify files, "
|
|
16
|
+
"run state-changing commands, or perform other side effects. Use the "
|
|
17
|
+
"read-only tools (read_file, glob, grep, web_fetch, web_search, load_skill) "
|
|
18
|
+
"to research the task thoroughly.\n"
|
|
19
|
+
"When your implementation plan is ready, call the exit_plan_mode tool with "
|
|
20
|
+
"the full plan as markdown to present it for approval. That tool is the only "
|
|
21
|
+
"way to leave plan mode -- do not ask for approval in plain prose.\n"
|
|
22
|
+
"If the user approves, the permission mode switches and you continue with the "
|
|
23
|
+
"implementation in this same conversation. If the user rejects, you stay in "
|
|
24
|
+
"PLAN mode: revise the plan using their feedback and call exit_plan_mode again."
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _normalize_workspace(workspace: Path) -> Path:
|
|
29
|
+
return workspace.expanduser().resolve()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _run_git_command(workspace: Path, *args: str) -> str:
|
|
33
|
+
completed = subprocess.run(
|
|
34
|
+
["git", *args],
|
|
35
|
+
cwd=workspace,
|
|
36
|
+
capture_output=True,
|
|
37
|
+
check=True,
|
|
38
|
+
text=True,
|
|
39
|
+
encoding="utf-8",
|
|
40
|
+
errors="replace",
|
|
41
|
+
timeout=5,
|
|
42
|
+
)
|
|
43
|
+
return completed.stdout.strip()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@lru_cache(maxsize=1)
|
|
47
|
+
def _get_system_context_cached(workspace: Path) -> str:
|
|
48
|
+
try:
|
|
49
|
+
branch = _run_git_command(workspace, "branch", "--show-current") or "detached"
|
|
50
|
+
except (OSError, subprocess.SubprocessError):
|
|
51
|
+
branch = "unknown"
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
recent_commits = (
|
|
55
|
+
_run_git_command(workspace, "log", "--oneline", "-3") or "No commits found."
|
|
56
|
+
)
|
|
57
|
+
except (OSError, subprocess.SubprocessError):
|
|
58
|
+
recent_commits = "No commits found." if branch != "unknown" else "Unavailable."
|
|
59
|
+
|
|
60
|
+
return "\n".join(
|
|
61
|
+
[
|
|
62
|
+
f"Git branch: {branch}",
|
|
63
|
+
"Recent commits:",
|
|
64
|
+
recent_commits,
|
|
65
|
+
]
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def get_system_context(workspace: Path | None = None) -> str:
|
|
70
|
+
"""Return git metadata for the requested workspace without repeating git calls."""
|
|
71
|
+
resolved_workspace = _normalize_workspace(workspace or Path.cwd())
|
|
72
|
+
cached = _get_system_context_cached(resolved_workspace)
|
|
73
|
+
return f"{cached}\nDate: {date.today().isoformat()}"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _read_context_file(path: Path) -> str:
|
|
77
|
+
try:
|
|
78
|
+
return path.read_text(encoding="utf-8").strip()
|
|
79
|
+
except OSError:
|
|
80
|
+
return ""
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def get_user_context(workspace: Path) -> str:
|
|
84
|
+
"""Load global and workspace-level BAREAGENT.md files."""
|
|
85
|
+
workspace = _normalize_workspace(workspace)
|
|
86
|
+
context_files = [
|
|
87
|
+
Path.home() / ".bareagent" / "BAREAGENT.md",
|
|
88
|
+
workspace / "BAREAGENT.md",
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
sections: list[str] = []
|
|
92
|
+
for path in context_files:
|
|
93
|
+
content = _read_context_file(path)
|
|
94
|
+
if content:
|
|
95
|
+
sections.append(f"# From {path}\n{content}")
|
|
96
|
+
|
|
97
|
+
return "\n\n".join(sections)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def assemble_system_prompt(
|
|
101
|
+
workspace: Path,
|
|
102
|
+
skill_summary: str = "",
|
|
103
|
+
nag_reminder: str = "",
|
|
104
|
+
memory_context: str = "",
|
|
105
|
+
) -> str:
|
|
106
|
+
"""Assemble the full system prompt from dynamic context fragments."""
|
|
107
|
+
workspace = _normalize_workspace(workspace)
|
|
108
|
+
sections = [
|
|
109
|
+
BASE_SYSTEM_PROMPT,
|
|
110
|
+
f"Workspace: {workspace}",
|
|
111
|
+
get_system_context(workspace),
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
user_context = get_user_context(workspace)
|
|
115
|
+
if user_context:
|
|
116
|
+
sections.append(f"<user-instructions>\n{user_context}\n</user-instructions>")
|
|
117
|
+
|
|
118
|
+
if memory_context.strip():
|
|
119
|
+
sections.append(memory_context.strip())
|
|
120
|
+
|
|
121
|
+
if skill_summary.strip():
|
|
122
|
+
sections.append(f"<skill-summary>\n{skill_summary.strip()}\n</skill-summary>")
|
|
123
|
+
|
|
124
|
+
if nag_reminder.strip():
|
|
125
|
+
sections.append(f"<nag-reminder>\n{nag_reminder.strip()}\n</nag-reminder>")
|
|
126
|
+
|
|
127
|
+
return "\n\n".join(sections)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""Shared file-system and small utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import secrets
|
|
8
|
+
import string
|
|
9
|
+
import tempfile
|
|
10
|
+
from datetime import UTC, datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
_ID_ALPHABET = string.ascii_letters + string.digits
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def stringify(value: Any) -> str:
|
|
18
|
+
"""Convert any value to a string suitable for tool output or serialization."""
|
|
19
|
+
if isinstance(value, str):
|
|
20
|
+
return value
|
|
21
|
+
if value is None:
|
|
22
|
+
return ""
|
|
23
|
+
return json.dumps(value, ensure_ascii=False, default=str)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def generate_random_id(length: int = 8) -> str:
|
|
27
|
+
"""Return a cryptographically random alphanumeric string."""
|
|
28
|
+
return "".join(secrets.choice(_ID_ALPHABET) for _ in range(length))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def is_tool_result_message(msg: dict[str, Any]) -> bool:
|
|
32
|
+
"""Check whether a message contains tool_result blocks."""
|
|
33
|
+
content = msg.get("content")
|
|
34
|
+
return isinstance(content, list) and any(
|
|
35
|
+
isinstance(block, dict) and block.get("type") == "tool_result" for block in content
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def atomic_write_json(file_path: Path, payload: Any) -> None:
|
|
40
|
+
"""Atomically write *payload* as JSON to *file_path* via tempfile + rename."""
|
|
41
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
fd, tmp_path = tempfile.mkstemp(dir=str(file_path.parent), suffix=".tmp")
|
|
43
|
+
try:
|
|
44
|
+
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
|
45
|
+
json.dump(payload, f, ensure_ascii=False, indent=2)
|
|
46
|
+
os.replace(tmp_path, str(file_path))
|
|
47
|
+
except BaseException:
|
|
48
|
+
try:
|
|
49
|
+
os.unlink(tmp_path)
|
|
50
|
+
except OSError:
|
|
51
|
+
pass
|
|
52
|
+
raise
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def atomic_write_text(file_path: Path, text: str) -> None:
|
|
56
|
+
"""Atomically write *text* to *file_path* via tempfile + rename.
|
|
57
|
+
|
|
58
|
+
Text counterpart of :func:`atomic_write_json` for persistent Markdown
|
|
59
|
+
state (memory files, MEMORY.md). ``newline="\\n"`` keeps line endings
|
|
60
|
+
stable across platforms so ``str_replace`` / ``insert`` matching is
|
|
61
|
+
deterministic on Windows and POSIX alike.
|
|
62
|
+
"""
|
|
63
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
64
|
+
fd, tmp_path = tempfile.mkstemp(dir=str(file_path.parent), suffix=".tmp")
|
|
65
|
+
try:
|
|
66
|
+
with os.fdopen(fd, "w", encoding="utf-8", newline="\n") as f:
|
|
67
|
+
f.write(text)
|
|
68
|
+
os.replace(tmp_path, str(file_path))
|
|
69
|
+
except BaseException:
|
|
70
|
+
try:
|
|
71
|
+
os.unlink(tmp_path)
|
|
72
|
+
except OSError:
|
|
73
|
+
pass
|
|
74
|
+
raise
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def utc_timestamp_iso() -> str:
|
|
78
|
+
"""Return the current UTC time as an ISO-8601 string."""
|
|
79
|
+
return datetime.now(UTC).isoformat()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def optional_string(value: Any) -> str | None:
|
|
83
|
+
"""Normalize *value* to a stripped string, or ``None`` if blank/None."""
|
|
84
|
+
if value is None:
|
|
85
|
+
return None
|
|
86
|
+
normalized = str(value).strip()
|
|
87
|
+
return normalized or None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def collect_tool_names(messages: list[dict[str, Any]]) -> dict[str, str]:
|
|
91
|
+
"""Build a mapping from tool_use id → tool name across all messages."""
|
|
92
|
+
tool_name_by_id: dict[str, str] = {}
|
|
93
|
+
for message in messages:
|
|
94
|
+
content = message.get("content")
|
|
95
|
+
if not isinstance(content, list):
|
|
96
|
+
continue
|
|
97
|
+
for block in content:
|
|
98
|
+
if not isinstance(block, dict) or block.get("type") != "tool_use":
|
|
99
|
+
continue
|
|
100
|
+
tool_id = str(block.get("id", ""))
|
|
101
|
+
if tool_id:
|
|
102
|
+
tool_name_by_id[tool_id] = str(block.get("name", "unknown"))
|
|
103
|
+
return tool_name_by_id
|
bareagent/core/goal.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Goal completion loop: drive turns until an evaluator judges a condition met.
|
|
2
|
+
|
|
3
|
+
Pure logic with no LLM / loop / REPL / SDK dependencies, so the loop driver,
|
|
4
|
+
verdict parsing, prompt construction, and command parsing are fully unit-testable
|
|
5
|
+
with injected callbacks (mirrors the ``src/core/retry.py`` and
|
|
6
|
+
``src/planning/skill_gen.py`` pure-module pattern).
|
|
7
|
+
|
|
8
|
+
Division of labor (see task 06-06-goal-completion-loop):
|
|
9
|
+
- This module owns the *control flow*: how prompts are sequenced, when to stop,
|
|
10
|
+
and why (``run_goal_loop``), plus the pure text/parse helpers.
|
|
11
|
+
- The REPL (``main.py``) owns the side-effecting parts: running the real
|
|
12
|
+
``agent_loop`` turn and the isolated evaluator LLM call, which it injects into
|
|
13
|
+
:func:`run_goal_loop` as the ``run_turn`` / ``evaluate`` callbacks.
|
|
14
|
+
|
|
15
|
+
The loop is *synchronous and non-persistent*: ``/goal <condition>`` blocks the
|
|
16
|
+
REPL, drives turns until the condition is met or ``max_turns`` is hit, then
|
|
17
|
+
returns. There is no cross-input goal state (persistence is out of scope), so the
|
|
18
|
+
driver only needs the condition and the turn budget.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from collections.abc import Callable
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
from enum import Enum
|
|
26
|
+
|
|
27
|
+
DEFAULT_MAX_TURNS = 25
|
|
28
|
+
|
|
29
|
+
GOAL_USAGE = (
|
|
30
|
+
"Usage: /goal [--max-turns N] <completion condition>\n"
|
|
31
|
+
" Drives the agent turn-after-turn until an independent evaluator judges "
|
|
32
|
+
"the condition met (or the turn budget is exhausted).\n"
|
|
33
|
+
" Example: /goal all tests in tests/test_goal.py pass and ruff is clean\n"
|
|
34
|
+
" The evaluator judges only from the transcript, so state a check the agent "
|
|
35
|
+
"can show evidence for (e.g. run pytest and include the exit code)."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class GoalOutcome(Enum):
|
|
40
|
+
"""Why :func:`run_goal_loop` stopped. ABORTED is set by the caller (the loop
|
|
41
|
+
itself only returns MET / MAX_TURNS; interrupts propagate out of the injected
|
|
42
|
+
callbacks for the caller to translate)."""
|
|
43
|
+
|
|
44
|
+
MET = "met"
|
|
45
|
+
MAX_TURNS = "max_turns"
|
|
46
|
+
ABORTED = "aborted"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass(slots=True)
|
|
50
|
+
class GoalState:
|
|
51
|
+
"""Runtime state for one ``/goal`` invocation."""
|
|
52
|
+
|
|
53
|
+
condition: str
|
|
54
|
+
max_turns: int = DEFAULT_MAX_TURNS
|
|
55
|
+
turns_used: int = 0
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass(slots=True)
|
|
59
|
+
class Verdict:
|
|
60
|
+
"""An evaluator's judgement on whether the condition is satisfied.
|
|
61
|
+
|
|
62
|
+
``malformed`` flags a verdict that the evaluator failed to produce cleanly
|
|
63
|
+
(LLM error, no tool call, missing field). A malformed verdict is always
|
|
64
|
+
treated as *not met* so the loop falls through to its ``max_turns`` guard
|
|
65
|
+
instead of crashing or stopping early.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
met: bool
|
|
69
|
+
reason: str = ""
|
|
70
|
+
malformed: bool = False
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass(slots=True)
|
|
74
|
+
class GoalCommand:
|
|
75
|
+
"""Parsed ``/goal`` command. ``action`` is ``"run" | "usage" | "error"``."""
|
|
76
|
+
|
|
77
|
+
action: str
|
|
78
|
+
condition: str = ""
|
|
79
|
+
max_turns: int = DEFAULT_MAX_TURNS
|
|
80
|
+
message: str = "" # usage / error text for the non-run actions
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _coerce_bool(value: object) -> bool:
|
|
84
|
+
if isinstance(value, bool):
|
|
85
|
+
return value
|
|
86
|
+
if isinstance(value, str):
|
|
87
|
+
return value.strip().lower() in {"true", "1", "yes", "y"}
|
|
88
|
+
return bool(value)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def parse_verdict(tool_input: dict | None) -> Verdict:
|
|
92
|
+
"""Coerce a ``goal_verdict`` tool input into a :class:`Verdict` defensively.
|
|
93
|
+
|
|
94
|
+
A missing/absent ``met`` field yields a malformed (= not met) verdict rather
|
|
95
|
+
than guessing, so a confused evaluator never accidentally reports success.
|
|
96
|
+
"""
|
|
97
|
+
if not isinstance(tool_input, dict):
|
|
98
|
+
return Verdict(met=False, reason="", malformed=True)
|
|
99
|
+
reason = str(tool_input.get("reason", "") or "").strip()
|
|
100
|
+
if "met" not in tool_input or tool_input.get("met") is None:
|
|
101
|
+
return Verdict(met=False, reason=reason, malformed=True)
|
|
102
|
+
return Verdict(met=_coerce_bool(tool_input.get("met")), reason=reason)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def build_initial_prompt(condition: str) -> str:
|
|
106
|
+
"""User message that kicks off the self-driving loop (turn 1)."""
|
|
107
|
+
return (
|
|
108
|
+
"Work autonomously toward the goal below until it is fully satisfied. "
|
|
109
|
+
"After each step an independent evaluator checks whether the condition is "
|
|
110
|
+
"met and tells you what is still missing.\n\n"
|
|
111
|
+
f"<goal-condition>\n{condition.strip()}\n</goal-condition>\n\n"
|
|
112
|
+
"Make concrete progress now. When you believe the condition is met, run "
|
|
113
|
+
"the relevant checks and include their output so it can be verified from "
|
|
114
|
+
"this conversation."
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def build_evaluator_prompt(condition: str) -> str:
|
|
119
|
+
"""User message appended to the transcript COPY for the isolated evaluator."""
|
|
120
|
+
return (
|
|
121
|
+
"You are a strict goal-completion evaluator. The conversation above shows "
|
|
122
|
+
"an agent working toward this completion condition:\n\n"
|
|
123
|
+
f"<goal-condition>\n{condition.strip()}\n</goal-condition>\n\n"
|
|
124
|
+
"Judge ONLY from the conversation above whether the condition is now fully "
|
|
125
|
+
"satisfied. Do not assume work that is not shown: if success is claimed but "
|
|
126
|
+
"the supporting evidence (tool results, command output) is not present in "
|
|
127
|
+
"the transcript, treat it as NOT met.\n\n"
|
|
128
|
+
"Call the `goal_verdict` tool exactly once:\n"
|
|
129
|
+
"- met=true only if the condition is fully and verifiably satisfied.\n"
|
|
130
|
+
"- met=false otherwise, with a concrete `reason` naming what is still "
|
|
131
|
+
"missing and what the agent should do next.\n"
|
|
132
|
+
"Output nothing else."
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def build_continuation_prompt(reason: str) -> str:
|
|
137
|
+
"""User message fed back to the main loop when the goal is not yet met."""
|
|
138
|
+
base = "The goal is not yet satisfied."
|
|
139
|
+
reason = (reason or "").strip()
|
|
140
|
+
if reason:
|
|
141
|
+
base += f" Evaluator feedback: {reason}"
|
|
142
|
+
return base + " Keep working toward the goal."
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def parse_goal_command(rest: str, *, default_max_turns: int = DEFAULT_MAX_TURNS) -> GoalCommand:
|
|
146
|
+
"""Parse the text after ``/goal`` into a :class:`GoalCommand`.
|
|
147
|
+
|
|
148
|
+
Forms: ``""`` -> usage; ``--max-turns N <condition>`` -> run with override;
|
|
149
|
+
``<condition>`` -> run with the default budget. Pure (no I/O) so it is
|
|
150
|
+
directly unit-testable.
|
|
151
|
+
"""
|
|
152
|
+
rest = (rest or "").strip()
|
|
153
|
+
if not rest:
|
|
154
|
+
return GoalCommand(action="usage", message=GOAL_USAGE)
|
|
155
|
+
|
|
156
|
+
max_turns = default_max_turns
|
|
157
|
+
if rest.startswith("--max-turns"):
|
|
158
|
+
parts = rest.split(None, 2) # ["--max-turns", "N", "<condition...>"]
|
|
159
|
+
if len(parts) < 3:
|
|
160
|
+
return GoalCommand(action="error", message="Usage: /goal [--max-turns N] <condition>")
|
|
161
|
+
try:
|
|
162
|
+
max_turns = int(parts[1])
|
|
163
|
+
except ValueError:
|
|
164
|
+
return GoalCommand(
|
|
165
|
+
action="error",
|
|
166
|
+
message=f"Invalid --max-turns value: {parts[1]!r} (expected an integer).",
|
|
167
|
+
)
|
|
168
|
+
if max_turns < 1:
|
|
169
|
+
return GoalCommand(action="error", message="--max-turns must be >= 1.")
|
|
170
|
+
condition = parts[2].strip()
|
|
171
|
+
else:
|
|
172
|
+
condition = rest
|
|
173
|
+
|
|
174
|
+
if not condition:
|
|
175
|
+
return GoalCommand(
|
|
176
|
+
action="error", message="Provide a completion condition: /goal <condition>"
|
|
177
|
+
)
|
|
178
|
+
return GoalCommand(action="run", condition=condition, max_turns=max_turns)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def run_goal_loop(
|
|
182
|
+
state: GoalState,
|
|
183
|
+
*,
|
|
184
|
+
run_turn: Callable[[str], None],
|
|
185
|
+
evaluate: Callable[[], Verdict],
|
|
186
|
+
on_progress: Callable[[str], None] | None = None,
|
|
187
|
+
) -> tuple[GoalOutcome, Verdict | None]:
|
|
188
|
+
"""Drive turns until the condition is met or the turn budget is exhausted.
|
|
189
|
+
|
|
190
|
+
- ``run_turn(prompt)`` runs one real agent turn (appends ``prompt`` as a user
|
|
191
|
+
message and runs ``agent_loop`` to completion). It owns its own rollback on
|
|
192
|
+
failure and may raise (e.g. ``LLMCallError`` / ``KeyboardInterrupt``); such
|
|
193
|
+
exceptions propagate out of this function for the caller to treat as
|
|
194
|
+
``ABORTED``.
|
|
195
|
+
- ``evaluate()`` runs the isolated evaluator and returns a :class:`Verdict`.
|
|
196
|
+
It must NOT raise for ordinary evaluator failures (return a malformed,
|
|
197
|
+
not-met verdict instead); only a user interrupt should propagate.
|
|
198
|
+
|
|
199
|
+
Returns ``(outcome, last_verdict)``. ``last_verdict`` is ``None`` only if the
|
|
200
|
+
loop never ran (``max_turns < 1``, which the command parser already rejects).
|
|
201
|
+
"""
|
|
202
|
+
last: Verdict | None = None
|
|
203
|
+
prompt = build_initial_prompt(state.condition)
|
|
204
|
+
while state.turns_used < state.max_turns:
|
|
205
|
+
state.turns_used += 1
|
|
206
|
+
if on_progress is not None:
|
|
207
|
+
on_progress(f"Goal turn {state.turns_used}/{state.max_turns}...")
|
|
208
|
+
run_turn(prompt)
|
|
209
|
+
verdict = evaluate()
|
|
210
|
+
last = verdict
|
|
211
|
+
if verdict.met:
|
|
212
|
+
return GoalOutcome.MET, verdict
|
|
213
|
+
prompt = build_continuation_prompt(verdict.reason)
|
|
214
|
+
return GoalOutcome.MAX_TURNS, last
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Tool handlers for BareAgent."""
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import subprocess
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def run_bash(
|
|
9
|
+
command: str,
|
|
10
|
+
timeout: int = 30,
|
|
11
|
+
*,
|
|
12
|
+
cwd: Path | None = None,
|
|
13
|
+
raise_on_error: bool = False,
|
|
14
|
+
) -> str:
|
|
15
|
+
"""Run a shell command and return combined stdout/stderr."""
|
|
16
|
+
if os.name == "nt":
|
|
17
|
+
# Windows PowerShell 5.1 on a Chinese locale writes stdout/stderr as
|
|
18
|
+
# GBK(cp936) by default; the Python side decodes as UTF-8 below, so we
|
|
19
|
+
# force the console output encoding to UTF-8 to keep both ends aligned
|
|
20
|
+
# (otherwise Chinese cmdlet output/errors decode into U+FFFD). The setter
|
|
21
|
+
# is wrapped in try/catch so an environment that rejects it never blocks
|
|
22
|
+
# the actual command from running.
|
|
23
|
+
windows_prefix = (
|
|
24
|
+
"try { [Console]::OutputEncoding = [System.Text.Encoding]::UTF8 } "
|
|
25
|
+
"catch {}; "
|
|
26
|
+
)
|
|
27
|
+
completed_command = [
|
|
28
|
+
"powershell",
|
|
29
|
+
"-NoProfile",
|
|
30
|
+
"-Command",
|
|
31
|
+
windows_prefix + command,
|
|
32
|
+
]
|
|
33
|
+
else:
|
|
34
|
+
completed_command = ["bash", "-lc", command]
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
result = subprocess.run(
|
|
38
|
+
completed_command,
|
|
39
|
+
capture_output=True,
|
|
40
|
+
timeout=timeout,
|
|
41
|
+
check=False,
|
|
42
|
+
cwd=None if cwd is None else str(cwd),
|
|
43
|
+
text=True,
|
|
44
|
+
encoding="utf-8",
|
|
45
|
+
errors="replace",
|
|
46
|
+
)
|
|
47
|
+
except subprocess.TimeoutExpired as exc:
|
|
48
|
+
output = _join_output(exc.stdout, exc.stderr)
|
|
49
|
+
if output:
|
|
50
|
+
message = f"Error: command timed out after {timeout} seconds\n{output}"
|
|
51
|
+
else:
|
|
52
|
+
message = f"Error: command timed out after {timeout} seconds"
|
|
53
|
+
if raise_on_error:
|
|
54
|
+
raise RuntimeError(message) from exc
|
|
55
|
+
return message
|
|
56
|
+
|
|
57
|
+
output = _join_output(result.stdout, result.stderr)
|
|
58
|
+
if result.returncode != 0:
|
|
59
|
+
if output:
|
|
60
|
+
message = f"Command failed with exit code {result.returncode}\n{output}"
|
|
61
|
+
else:
|
|
62
|
+
message = f"Command failed with exit code {result.returncode}"
|
|
63
|
+
if raise_on_error:
|
|
64
|
+
raise RuntimeError(message)
|
|
65
|
+
return message
|
|
66
|
+
return output
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _join_output(stdout: str | bytes | None, stderr: str | bytes | None) -> str:
|
|
70
|
+
parts: list[str] = []
|
|
71
|
+
for value in (stdout, stderr):
|
|
72
|
+
if value is None:
|
|
73
|
+
continue
|
|
74
|
+
if isinstance(value, bytes):
|
|
75
|
+
value = value.decode("utf-8", errors="replace")
|
|
76
|
+
text = value.rstrip()
|
|
77
|
+
if text:
|
|
78
|
+
parts.append(text)
|
|
79
|
+
return "\n".join(parts)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from bareagent.core.sandbox import safe_path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def run_edit(
|
|
11
|
+
file_path: str,
|
|
12
|
+
old_text: str,
|
|
13
|
+
new_text: str,
|
|
14
|
+
*,
|
|
15
|
+
workspace: Path,
|
|
16
|
+
diagnostics_hook: Callable[[str, Any], str | None] | None = None,
|
|
17
|
+
) -> str:
|
|
18
|
+
"""Replace the first matching block of text in a workspace file.
|
|
19
|
+
|
|
20
|
+
``diagnostics_hook`` is the Hybrid auto-diagnostics callback supplied by
|
|
21
|
+
``get_handlers`` when LSP is active. The handler invokes it before and
|
|
22
|
+
after the write so any newly-introduced diagnostics can be appended to
|
|
23
|
+
the tool result. The hook signature is
|
|
24
|
+
``(file_path, before) -> str | None`` — passing ``before=None`` on the
|
|
25
|
+
pre-edit call lets the hook implementation produce its own snapshot,
|
|
26
|
+
and ``None`` is returned whenever LSP is unavailable or the config flag
|
|
27
|
+
is off so the happy path stays zero-cost.
|
|
28
|
+
"""
|
|
29
|
+
resolved = safe_path(file_path, workspace)
|
|
30
|
+
current = resolved.read_text(encoding="utf-8")
|
|
31
|
+
if old_text not in current:
|
|
32
|
+
raise ValueError("old_text not found in file")
|
|
33
|
+
|
|
34
|
+
# Snapshot diagnostics before the edit so the hook can diff against the
|
|
35
|
+
# post-edit state. The hook itself handles "LSP off" / "no route" cases.
|
|
36
|
+
before = diagnostics_hook(str(resolved), None) if diagnostics_hook else None
|
|
37
|
+
|
|
38
|
+
updated = current.replace(old_text, new_text, 1)
|
|
39
|
+
resolved.write_text(updated, encoding="utf-8")
|
|
40
|
+
relative = resolved.relative_to(workspace.resolve(strict=False))
|
|
41
|
+
result = f"Edited {relative.as_posix()}"
|
|
42
|
+
|
|
43
|
+
if diagnostics_hook is not None:
|
|
44
|
+
appendix = diagnostics_hook(str(resolved), before)
|
|
45
|
+
if appendix:
|
|
46
|
+
result = f"{result}{appendix}"
|
|
47
|
+
return result
|