pascal-agent 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pascal/__init__.py +3 -0
- pascal/__main__.py +880 -0
- pascal/actions.py +1066 -0
- pascal/capability.py +218 -0
- pascal/channels/__init__.py +0 -0
- pascal/channels/telegram.py +108 -0
- pascal/clipboard.py +38 -0
- pascal/config.py +134 -0
- pascal/daemon.py +211 -0
- pascal/desk.py +633 -0
- pascal/effect.py +155 -0
- pascal/eval/__init__.py +1 -0
- pascal/eval/smoke.py +213 -0
- pascal/llm/__init__.py +1 -0
- pascal/llm/anthropic.py +225 -0
- pascal/llm/codex.py +331 -0
- pascal/llm/openai.py +224 -0
- pascal/loop.py +1037 -0
- pascal/mcp.py +206 -0
- pascal/prompt.py +141 -0
- pascal/receipts.py +147 -0
- pascal/sandbox.py +287 -0
- pascal/scheduler.py +243 -0
- pascal/schemas.py +183 -0
- pascal/state.py +790 -0
- pascal/tools.py +672 -0
- pascal/trust.py +150 -0
- pascal/types.py +337 -0
- pascal/uia.py +316 -0
- pascal_agent-0.3.0.dist-info/METADATA +262 -0
- pascal_agent-0.3.0.dist-info/RECORD +33 -0
- pascal_agent-0.3.0.dist-info/WHEEL +4 -0
- pascal_agent-0.3.0.dist-info/entry_points.txt +2 -0
pascal/actions.py
ADDED
|
@@ -0,0 +1,1066 @@
|
|
|
1
|
+
"""Pascal action handlers -- extracted from loop.py for maintainability.
|
|
2
|
+
|
|
3
|
+
Each handler processes one action type and returns a result dict.
|
|
4
|
+
Called by the main loop's dispatcher in loop.py.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Any, Callable
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
from pascal.effect import classify_command, check_allowed, get_max_effect, escalation_message, effect_to_int
|
|
18
|
+
from pascal.mcp import MCPManager
|
|
19
|
+
from pascal.receipts import Ledger
|
|
20
|
+
from pascal.sandbox import SandboxManager
|
|
21
|
+
from pascal.state import PascalStore
|
|
22
|
+
from pascal.tools import (
|
|
23
|
+
execute_tool, TOOL_REGISTRY, TOOL_EFFECTS,
|
|
24
|
+
get_channel_bot, get_owner_chat_id,
|
|
25
|
+
)
|
|
26
|
+
from pascal.trust import scan_tool_input
|
|
27
|
+
from pascal.types import Message, Role, PlanNode, TaskPlan
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
# Delegate tool configs
|
|
32
|
+
DELEGATE_TOOLS = {
|
|
33
|
+
"claude-code": ["claude", "--print", "-p"],
|
|
34
|
+
"codex": ["codex", "-q"],
|
|
35
|
+
}
|
|
36
|
+
_DELEGATE_MAX_TIMEOUT = 600
|
|
37
|
+
_COMPLETE_TASK_FAILURE_WINDOW = 3
|
|
38
|
+
_COMPLETE_TASK_HISTORY_SCAN_LIMIT = 20
|
|
39
|
+
_PLAN_MAX_STEPS = 20
|
|
40
|
+
_OUTPUT_SOFT_LIMIT = 4000 # chars — show more context than old 2000 hard limit
|
|
41
|
+
_OUTPUT_HARD_LIMIT = 8000 # chars — absolute max to prevent context blowup
|
|
42
|
+
|
|
43
|
+
# Actions that cannot appear inside plan steps (enforced at validation + runtime)
|
|
44
|
+
PLAN_FORBIDDEN_ACTIONS = frozenset({"plan", "complete_task", "fail_task", "wait", "escalate"})
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _plan_step_effect_level(
|
|
48
|
+
action: str, step: dict, max_effect: str | None, mcp_manager: MCPManager | None,
|
|
49
|
+
) -> str | None:
|
|
50
|
+
"""Compute effect level for a plan step. Returns None for actions without side effects."""
|
|
51
|
+
if action == "execute":
|
|
52
|
+
tool_name = str(step.get("tool") or "").strip()
|
|
53
|
+
if tool_name:
|
|
54
|
+
from pascal.tools import TOOL_EFFECTS
|
|
55
|
+
tool_effect = TOOL_EFFECTS.get(tool_name)
|
|
56
|
+
if tool_effect is not None:
|
|
57
|
+
return tool_effect
|
|
58
|
+
if mcp_manager is not None and mcp_manager.has_tool(tool_name):
|
|
59
|
+
specs = [s for s in mcp_manager.all_tool_specs() if s.name == tool_name]
|
|
60
|
+
return "E3" if (specs and specs[0].side_effects) else "E0"
|
|
61
|
+
return "E1"
|
|
62
|
+
command = str(step.get("command") or "").strip()
|
|
63
|
+
if command:
|
|
64
|
+
return classify_command(command)
|
|
65
|
+
elif action == "delegate":
|
|
66
|
+
return "E2"
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class ActionContext:
|
|
72
|
+
"""Bundles runtime dependencies for action handlers.
|
|
73
|
+
|
|
74
|
+
Replaces the 9-parameter kwargs signature. Handlers access only what they need.
|
|
75
|
+
Inspired by OpenAI Agents SDK RunContext / Claude Code tool context patterns.
|
|
76
|
+
"""
|
|
77
|
+
store: PascalStore
|
|
78
|
+
llm: Any = None
|
|
79
|
+
max_effect: str | None = None
|
|
80
|
+
sandbox: SandboxManager | None = None
|
|
81
|
+
mcp_manager: MCPManager | None = None
|
|
82
|
+
execute_command: Callable[[str], str] | None = None
|
|
83
|
+
thought_buffer: list[str] = field(default_factory=list)
|
|
84
|
+
ledger: Ledger | None = None
|
|
85
|
+
skip_verification: bool = False
|
|
86
|
+
notify_fn: Callable[[str], Any] | None = None
|
|
87
|
+
|
|
88
|
+
async def notify(self, msg: str) -> None:
|
|
89
|
+
if self.notify_fn is not None:
|
|
90
|
+
await self.notify_fn(msg)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _truncate_output(text: str) -> str:
|
|
94
|
+
"""Truncate output dynamically. Keeps more context than the old 2000-char hard limit."""
|
|
95
|
+
if len(text) <= _OUTPUT_SOFT_LIMIT:
|
|
96
|
+
return text
|
|
97
|
+
if len(text) <= _OUTPUT_HARD_LIMIT:
|
|
98
|
+
return text # within limit, keep full
|
|
99
|
+
return text[:_OUTPUT_HARD_LIMIT] + f"\n... [truncated, {len(text)} chars total]"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
async def _handle_think(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
103
|
+
thought = str(decision.get("thought") or decision.get("reason") or "").strip()
|
|
104
|
+
if thought:
|
|
105
|
+
ctx.thought_buffer.append(thought)
|
|
106
|
+
return {"thought": thought}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
async def _handle_plan(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
110
|
+
active = ctx.store.get_active_task()
|
|
111
|
+
if not active:
|
|
112
|
+
return {"error": "no active task to plan for"}
|
|
113
|
+
|
|
114
|
+
# Accept plan_tree (new tree format) or steps (legacy flat array)
|
|
115
|
+
plan_tree_data = decision.get("plan_tree")
|
|
116
|
+
steps = decision.get("steps") or []
|
|
117
|
+
patch_node_id = decision.get("patch_node_id") # for subtree repair
|
|
118
|
+
|
|
119
|
+
if plan_tree_data:
|
|
120
|
+
# New tree format: validate and store
|
|
121
|
+
try:
|
|
122
|
+
root = PlanNode.from_dict(plan_tree_data)
|
|
123
|
+
except (KeyError, TypeError) as e:
|
|
124
|
+
return {"error": f"invalid plan_tree: {e}"}
|
|
125
|
+
plan = TaskPlan(root=root)
|
|
126
|
+
errors = plan.validate()
|
|
127
|
+
if errors:
|
|
128
|
+
return {"error": f"plan validation failed: {'; '.join(errors[:5])}"}
|
|
129
|
+
elif steps:
|
|
130
|
+
# Legacy flat steps → wrap into tree
|
|
131
|
+
if len(steps) > _PLAN_MAX_STEPS:
|
|
132
|
+
return {"error": f"plan has {len(steps)} steps, max is {_PLAN_MAX_STEPS}"}
|
|
133
|
+
children = []
|
|
134
|
+
for i, step in enumerate(steps):
|
|
135
|
+
if not isinstance(step, dict):
|
|
136
|
+
return {"error": f"step {i} must be a dict"}
|
|
137
|
+
step_action = str(step.get("action") or "").strip()
|
|
138
|
+
if not step_action or step_action not in VALID_ACTIONS or step_action in PLAN_FORBIDDEN_ACTIONS:
|
|
139
|
+
return {"error": f"step {i}: action '{step_action}' is not allowed inside a plan"}
|
|
140
|
+
children.append(PlanNode(
|
|
141
|
+
id=f"s{i}",
|
|
142
|
+
title=str(step.get("reason") or step.get("command") or step.get("tool") or f"step {i}"),
|
|
143
|
+
kind="leaf",
|
|
144
|
+
done_when=str(step.get("done_when") or step.get("expected_evidence") or "action completes successfully"),
|
|
145
|
+
action=step,
|
|
146
|
+
))
|
|
147
|
+
root = PlanNode(id="root", title=active["goal"][:80], kind="branch", children=children)
|
|
148
|
+
plan = TaskPlan(root=root)
|
|
149
|
+
else:
|
|
150
|
+
return {"error": "plan requires 'plan_tree' or 'steps'"}
|
|
151
|
+
|
|
152
|
+
# Handle subtree repair: patch into existing plan
|
|
153
|
+
if patch_node_id:
|
|
154
|
+
existing_data = ctx.store.get_task_plan(active["id"])
|
|
155
|
+
if existing_data:
|
|
156
|
+
existing_plan = TaskPlan.from_dict(existing_data)
|
|
157
|
+
if existing_plan.root:
|
|
158
|
+
target = existing_plan.root.find_node(patch_node_id)
|
|
159
|
+
if target and plan.root:
|
|
160
|
+
# Replace the target's children with new plan's root children
|
|
161
|
+
target.kind = "branch"
|
|
162
|
+
target.children = plan.root.children if plan.root.kind == "branch" else [plan.root]
|
|
163
|
+
target.status = "pending"
|
|
164
|
+
target.action = None
|
|
165
|
+
target.last_error = None
|
|
166
|
+
target.attempts = 0
|
|
167
|
+
existing_plan.revision += 1
|
|
168
|
+
plan = existing_plan
|
|
169
|
+
else:
|
|
170
|
+
return {"error": f"patch target node '{patch_node_id}' not found in existing plan"}
|
|
171
|
+
else:
|
|
172
|
+
return {"error": "patch_node_id specified but no existing plan"}
|
|
173
|
+
|
|
174
|
+
# Store the plan
|
|
175
|
+
if not patch_node_id:
|
|
176
|
+
plan.revision = (plan.revision or 0)
|
|
177
|
+
ctx.store.set_task_plan(active["id"], plan.to_dict())
|
|
178
|
+
|
|
179
|
+
# Hybrid execution: compile pending leaves to steps and execute eagerly
|
|
180
|
+
pending = plan.pending_leaves()
|
|
181
|
+
if not pending:
|
|
182
|
+
return {"plan_stored": True, "plan_completed": True, "leaves_total": len(plan.root.get_leaves()) if plan.root else 0}
|
|
183
|
+
|
|
184
|
+
plan_results: list[dict[str, Any]] = []
|
|
185
|
+
for leaf in pending:
|
|
186
|
+
leaf.status = "active"
|
|
187
|
+
plan.active_node_id = leaf.id
|
|
188
|
+
ctx.store.set_task_plan(active["id"], plan.to_dict())
|
|
189
|
+
|
|
190
|
+
step = leaf.action or {}
|
|
191
|
+
step_action = str(step.get("action") or "").strip()
|
|
192
|
+
if not step_action or step_action not in VALID_ACTIONS or step_action in PLAN_FORBIDDEN_ACTIONS:
|
|
193
|
+
leaf.status = "failed"
|
|
194
|
+
leaf.last_error = f"action '{step_action}' is not allowed inside a plan"
|
|
195
|
+
plan_results.append({"node_id": leaf.id, "error": leaf.last_error})
|
|
196
|
+
break
|
|
197
|
+
|
|
198
|
+
# Effect level gate for plan steps (defense-in-depth)
|
|
199
|
+
step_effect = _plan_step_effect_level(step_action, step, ctx.max_effect, ctx.mcp_manager)
|
|
200
|
+
if step_effect is not None:
|
|
201
|
+
allowed_max = get_max_effect(ctx.max_effect)
|
|
202
|
+
if not check_allowed(step_effect, allowed_max):
|
|
203
|
+
leaf.status = "failed"
|
|
204
|
+
leaf.last_error = escalation_message(
|
|
205
|
+
str(step.get("command") or step.get("tool") or step_action),
|
|
206
|
+
step_effect, allowed_max,
|
|
207
|
+
)
|
|
208
|
+
plan_results.append({"node_id": leaf.id, "error": leaf.last_error})
|
|
209
|
+
ctx.store.set_task_plan(active["id"], plan.to_dict())
|
|
210
|
+
break
|
|
211
|
+
|
|
212
|
+
step_result = await execute_action(
|
|
213
|
+
ctx.store, step, step_action, ctx=ctx,
|
|
214
|
+
)
|
|
215
|
+
plan_results.append({"node_id": leaf.id, "action": step_action, "result": step_result})
|
|
216
|
+
|
|
217
|
+
# Record in history
|
|
218
|
+
step_reason = str(step.get("reason") or step.get("command") or step.get("tool") or "")
|
|
219
|
+
ctx.store.record(
|
|
220
|
+
f"{step_action}: {step_reason}" if step_reason else step_action,
|
|
221
|
+
task_id=active["id"],
|
|
222
|
+
details={"decision": step, "result": step_result, "plan_node_id": leaf.id},
|
|
223
|
+
action_status=step_result.get("status", "ok") if isinstance(step_result, dict) else "ok",
|
|
224
|
+
)
|
|
225
|
+
if ctx.ledger is not None:
|
|
226
|
+
ctx.ledger.record_action(step_action, step, step_result)
|
|
227
|
+
|
|
228
|
+
# Update node status based on result
|
|
229
|
+
step_status = step_result.get("status", "ok") if isinstance(step_result, dict) else "ok"
|
|
230
|
+
if isinstance(step_result, dict) and (step_result.get("error") or step_result.get("escalate")):
|
|
231
|
+
leaf.status = "failed"
|
|
232
|
+
leaf.attempts += 1
|
|
233
|
+
leaf.last_error = str(step_result.get("error") or "escalated")[:200]
|
|
234
|
+
ctx.store.set_task_plan(active["id"], plan.to_dict())
|
|
235
|
+
break # Stop executing — loop.py will handle replanning
|
|
236
|
+
elif step_status == "unknown":
|
|
237
|
+
# Side effect uncertain — do NOT mark as done, treat as failed
|
|
238
|
+
# Propagate "unknown" status so loop.py can set has_unknown_step
|
|
239
|
+
leaf.status = "failed"
|
|
240
|
+
leaf.attempts += 1
|
|
241
|
+
leaf.last_error = "Action returned unknown status (side effect uncertain). Needs verification."
|
|
242
|
+
ctx.store.set_task_plan(active["id"], plan.to_dict())
|
|
243
|
+
# Ensure the aggregate result preserves "unknown" (not just "error")
|
|
244
|
+
# so loop.py's _observe_result sees ActionStatus.UNKNOWN
|
|
245
|
+
plan_results.append({"node_id": leaf.id, "action": step_action, "result": step_result})
|
|
246
|
+
break # Stop and let loop.py handle readback/replanning
|
|
247
|
+
else:
|
|
248
|
+
leaf.status = "done"
|
|
249
|
+
|
|
250
|
+
# Save final state
|
|
251
|
+
plan.active_node_id = None
|
|
252
|
+
ctx.store.set_task_plan(active["id"], plan.to_dict())
|
|
253
|
+
|
|
254
|
+
counts = plan.root.count_by_status() if plan.root else {}
|
|
255
|
+
all_done = counts.get("pending", 0) == 0 and counts.get("failed", 0) == 0
|
|
256
|
+
result: dict[str, Any] = {
|
|
257
|
+
"plan_stored": True,
|
|
258
|
+
"plan_completed": all_done,
|
|
259
|
+
"executed": len(plan_results),
|
|
260
|
+
"status_counts": counts,
|
|
261
|
+
}
|
|
262
|
+
# Propagate attachment from last successful step
|
|
263
|
+
for pr in reversed(plan_results):
|
|
264
|
+
sr: Any = pr.get("result", {})
|
|
265
|
+
if isinstance(sr, dict) and sr.get("attachment"):
|
|
266
|
+
result["attachment"] = sr["attachment"]
|
|
267
|
+
break
|
|
268
|
+
# Signal failure for loop.py plan-aware retry
|
|
269
|
+
if counts.get("failed", 0) > 0:
|
|
270
|
+
failed = plan.failed_leaves()
|
|
271
|
+
result["failed_node"] = failed[0].to_dict() if failed else None
|
|
272
|
+
# Preserve "unknown" status if any step had uncertain side effects,
|
|
273
|
+
# so loop.py can set has_unknown_step via _observe_result
|
|
274
|
+
has_unknown = any(
|
|
275
|
+
isinstance(pr.get("result"), dict) and pr["result"].get("status") == "unknown"
|
|
276
|
+
for pr in plan_results
|
|
277
|
+
)
|
|
278
|
+
result["status"] = "unknown" if has_unknown else "error"
|
|
279
|
+
return result
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
async def _handle_delegate_action(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
283
|
+
return await _handle_delegate(ctx, decision)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
async def _handle_execute(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
287
|
+
# Code execution: run Python snippet in sandbox (smolagents pattern)
|
|
288
|
+
code = str(decision.get("code") or "").strip()
|
|
289
|
+
if code:
|
|
290
|
+
scan_result = scan_tool_input("code", {"code": code})
|
|
291
|
+
if scan_result.verdict == "block":
|
|
292
|
+
return {"error": f"blocked by trust scanner: {scan_result.reason}", "status": "error"}
|
|
293
|
+
# Python code is E2 (local write potential)
|
|
294
|
+
allowed_max = get_max_effect(ctx.max_effect)
|
|
295
|
+
if not check_allowed("E2", allowed_max):
|
|
296
|
+
return {"error": escalation_message("python code", "E2", allowed_max), "status": "error", "escalate": True}
|
|
297
|
+
sb = ctx.sandbox or SandboxManager()
|
|
298
|
+
# Write code to a temp file to avoid shell quoting issues
|
|
299
|
+
import tempfile as _tf
|
|
300
|
+
with _tf.NamedTemporaryFile(mode="w", suffix=".py", delete=False, encoding="utf-8") as f:
|
|
301
|
+
f.write(code)
|
|
302
|
+
script_path = f.name
|
|
303
|
+
try:
|
|
304
|
+
sb_result = sb.run(f"python3 {script_path}", timeout=60)
|
|
305
|
+
finally:
|
|
306
|
+
try:
|
|
307
|
+
os.unlink(script_path)
|
|
308
|
+
except OSError:
|
|
309
|
+
pass
|
|
310
|
+
active = ctx.store.get_active_task()
|
|
311
|
+
if active:
|
|
312
|
+
ctx.store.update_task(active["id"], progress=f"Code: {code[:80]}")
|
|
313
|
+
return {"output": _truncate_output(sb_result.output), "status": sb_result.status}
|
|
314
|
+
|
|
315
|
+
tool_name = str(decision.get("tool") or "").strip()
|
|
316
|
+
if tool_name:
|
|
317
|
+
tool_params = decision.get("tool_params") or {}
|
|
318
|
+
if not isinstance(tool_params, dict):
|
|
319
|
+
return {"error": "tool_params must be a dict", "status": "error"}
|
|
320
|
+
scan = scan_tool_input(tool_name, tool_params)
|
|
321
|
+
if scan.verdict == "block":
|
|
322
|
+
return {"error": f"blocked by trust scanner: {scan.reason}", "status": "error"}
|
|
323
|
+
tool_effect = TOOL_EFFECTS.get(tool_name, None)
|
|
324
|
+
if tool_effect is None:
|
|
325
|
+
if ctx.mcp_manager is not None and ctx.mcp_manager.has_tool(tool_name):
|
|
326
|
+
specs = [s for s in ctx.mcp_manager.all_tool_specs() if s.name == tool_name]
|
|
327
|
+
tool_effect = "E3" if (specs and specs[0].side_effects) else "E0"
|
|
328
|
+
else:
|
|
329
|
+
tool_effect = "E1"
|
|
330
|
+
allowed_max = get_max_effect(ctx.max_effect)
|
|
331
|
+
if not check_allowed(tool_effect, allowed_max):
|
|
332
|
+
return {
|
|
333
|
+
"error": escalation_message(tool_name, tool_effect, allowed_max),
|
|
334
|
+
"status": "error", "effect_level": tool_effect, "escalate": True,
|
|
335
|
+
}
|
|
336
|
+
use_mcp = (ctx.mcp_manager is not None and ctx.mcp_manager.has_tool(tool_name)
|
|
337
|
+
and (tool_name not in TOOL_REGISTRY or "uid" in tool_params))
|
|
338
|
+
if use_mcp and ctx.mcp_manager is not None:
|
|
339
|
+
tool_result = await ctx.mcp_manager.call_tool(tool_name, tool_params)
|
|
340
|
+
elif tool_name in TOOL_REGISTRY:
|
|
341
|
+
tool_result = await execute_tool(tool_name, tool_params)
|
|
342
|
+
elif ctx.mcp_manager is not None and ctx.mcp_manager.has_tool(tool_name):
|
|
343
|
+
tool_result = await ctx.mcp_manager.call_tool(tool_name, tool_params)
|
|
344
|
+
else:
|
|
345
|
+
return {"error": f"Unknown tool: {tool_name}", "status": "error"}
|
|
346
|
+
active = ctx.store.get_active_task()
|
|
347
|
+
if active:
|
|
348
|
+
ctx.store.update_task(active["id"], progress=f"Tool: {tool_name}")
|
|
349
|
+
result_dict = {
|
|
350
|
+
"output": _truncate_output(tool_result.get("output") or ""),
|
|
351
|
+
"status": "ok" if tool_result.get("ok") else "error",
|
|
352
|
+
"error": tool_result.get("error", ""),
|
|
353
|
+
"tool": tool_name,
|
|
354
|
+
}
|
|
355
|
+
if tool_result.get("attachment") is not None:
|
|
356
|
+
result_dict["attachment"] = tool_result["attachment"]
|
|
357
|
+
return result_dict
|
|
358
|
+
|
|
359
|
+
command = str(decision.get("command") or "").strip()
|
|
360
|
+
if not command:
|
|
361
|
+
return {"error": "no command or tool provided"}
|
|
362
|
+
llm_effect = str(decision.get("effect_level") or "")
|
|
363
|
+
cmd_level = classify_command(command, llm_assessment=llm_effect)
|
|
364
|
+
allowed_max = get_max_effect(ctx.max_effect)
|
|
365
|
+
if not check_allowed(cmd_level, allowed_max):
|
|
366
|
+
return {
|
|
367
|
+
"error": escalation_message(command, cmd_level, allowed_max),
|
|
368
|
+
"status": "error", "effect_level": cmd_level, "escalate": True,
|
|
369
|
+
}
|
|
370
|
+
scan_result = scan_tool_input("shell", {"command": command})
|
|
371
|
+
if scan_result.verdict == "block":
|
|
372
|
+
return {"error": f"blocked by trust scanner: {scan_result.reason}", "status": "error"}
|
|
373
|
+
if scan_result.verdict == "warn":
|
|
374
|
+
logger.warning("Trust scanner warning: %s", scan_result.reason)
|
|
375
|
+
if ctx.execute_command:
|
|
376
|
+
try:
|
|
377
|
+
output = ctx.execute_command(command)
|
|
378
|
+
status = "ok" if output is not None else "error"
|
|
379
|
+
except Exception as exc:
|
|
380
|
+
output = str(exc)
|
|
381
|
+
status = "unknown"
|
|
382
|
+
else:
|
|
383
|
+
sb = ctx.sandbox or SandboxManager()
|
|
384
|
+
sb_result = sb.run(command, timeout=60)
|
|
385
|
+
output = sb_result.output
|
|
386
|
+
status = sb_result.status
|
|
387
|
+
active = ctx.store.get_active_task()
|
|
388
|
+
if active:
|
|
389
|
+
ctx.store.update_task(active["id"], progress=f"Ran: {command[:100]}")
|
|
390
|
+
safe_output = "" if output is None else str(output)
|
|
391
|
+
return {"output": _truncate_output(safe_output), "status": status}
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
async def _handle_create_task(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
395
|
+
goal = str(decision.get("goal") or "").strip()
|
|
396
|
+
if not goal:
|
|
397
|
+
return {"error": "goal is required for create_task"}
|
|
398
|
+
priority = str(decision.get("priority") or "normal").strip()
|
|
399
|
+
if priority not in ("urgent", "normal", "low"):
|
|
400
|
+
priority = "normal"
|
|
401
|
+
task_id = ctx.store.add_task(goal, priority=priority, source="agent")
|
|
402
|
+
estimated_effect = str(decision.get("estimated_effect") or "E1").strip()
|
|
403
|
+
if effect_to_int(estimated_effect) >= 2:
|
|
404
|
+
ctx.store.update_task(
|
|
405
|
+
task_id,
|
|
406
|
+
status="blocked",
|
|
407
|
+
progress=f"Awaiting approval (agent-initiated, {estimated_effect})",
|
|
408
|
+
)
|
|
409
|
+
await ctx.notify(f"📋 제안: {goal}\n효과 수준 {estimated_effect} -- 승인이 필요합니다.")
|
|
410
|
+
return {"created": task_id, "goal": goal, "source": "agent"}
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
async def _handle_pick_task(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
414
|
+
task_id = str(decision.get("task_id") or "").strip()
|
|
415
|
+
if not task_id:
|
|
416
|
+
return {"error": "task_id is required -- choose which task to activate"}
|
|
417
|
+
current = ctx.store.get_active_task()
|
|
418
|
+
if current and current["id"] == task_id:
|
|
419
|
+
return {"activated": task_id, "already_active": True}
|
|
420
|
+
target = ctx.store.connection.execute(
|
|
421
|
+
"SELECT id, status FROM tasks WHERE id = ?", (task_id,),
|
|
422
|
+
).fetchone()
|
|
423
|
+
if target is None:
|
|
424
|
+
return {"error": f"task {task_id} not found"}
|
|
425
|
+
if target["status"] in ("done", "failed"):
|
|
426
|
+
return {"error": f"task {task_id} is '{target['status']}' (terminal) -- pick a pending or paused task"}
|
|
427
|
+
if current and current["id"] != task_id:
|
|
428
|
+
ctx.store.pause_active_task("switching to another task")
|
|
429
|
+
ctx.store.activate_task(task_id)
|
|
430
|
+
task_row = ctx.store.connection.execute("SELECT goal FROM tasks WHERE id = ?", (task_id,)).fetchone()
|
|
431
|
+
goal = task_row["goal"] if task_row else task_id
|
|
432
|
+
await ctx.notify(f"▶ 시작: {goal}")
|
|
433
|
+
return {"activated": task_id}
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
async def _handle_create_subtask(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
437
|
+
active = ctx.store.get_active_task()
|
|
438
|
+
if not active:
|
|
439
|
+
return {"error": "no active task to create subtask under"}
|
|
440
|
+
goal = str(decision.get("subtask_goal") or "").strip()
|
|
441
|
+
if not goal:
|
|
442
|
+
return {"error": "subtask_goal is required"}
|
|
443
|
+
priority = str(decision.get("subtask_priority") or "normal").strip()
|
|
444
|
+
if priority not in ("urgent", "normal", "low"):
|
|
445
|
+
priority = "normal"
|
|
446
|
+
subtask_id = ctx.store.add_task(
|
|
447
|
+
goal, priority=priority, source="agent", parent_id=active["id"],
|
|
448
|
+
)
|
|
449
|
+
return {"created": subtask_id, "parent": active["id"], "goal": goal}
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
async def _handle_handle_notification(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
453
|
+
notif_id = str(decision.get("notification_id") or "").strip()
|
|
454
|
+
if not notif_id:
|
|
455
|
+
return {"error": "notification_id is required"}
|
|
456
|
+
response = str(decision.get("response") or "").strip()
|
|
457
|
+
reply_text = str(decision.get("reply_text") or "").strip()
|
|
458
|
+
notif = ctx.store.connection.execute(
|
|
459
|
+
"SELECT source, message, metadata FROM notifications WHERE id = ?", (notif_id,),
|
|
460
|
+
).fetchone()
|
|
461
|
+
# Reply to a notification is E2 (responding to human message, not unsolicited outbound).
|
|
462
|
+
# channel_reply tool (unsolicited) stays E3.
|
|
463
|
+
allowed_max = get_max_effect(ctx.max_effect)
|
|
464
|
+
if reply_text and not check_allowed("E2", allowed_max):
|
|
465
|
+
reply_text = ""
|
|
466
|
+
logger.warning("Reply suppressed: E2 exceeds max_effect %s", allowed_max)
|
|
467
|
+
if reply_text:
|
|
468
|
+
from pascal.trust import scan as _trust_scan
|
|
469
|
+
reply_scan = _trust_scan(reply_text)
|
|
470
|
+
if reply_scan.verdict == "block":
|
|
471
|
+
reply_text = ""
|
|
472
|
+
logger.warning("Reply blocked by trust scanner: %s", reply_scan.reason)
|
|
473
|
+
reply_sent = False
|
|
474
|
+
if reply_text and reply_text.strip().upper() != "NO_REPLY" and notif:
|
|
475
|
+
if get_channel_bot() is not None:
|
|
476
|
+
import json as _json
|
|
477
|
+
try:
|
|
478
|
+
meta = _json.loads(notif["metadata"]) if notif["metadata"] else {}
|
|
479
|
+
except (ValueError, TypeError):
|
|
480
|
+
meta = {}
|
|
481
|
+
chat_id = int(meta.get("chat_id", get_owner_chat_id() or 0))
|
|
482
|
+
if chat_id:
|
|
483
|
+
try:
|
|
484
|
+
await get_channel_bot().send_message(chat_id=chat_id, text=reply_text)
|
|
485
|
+
reply_sent = True
|
|
486
|
+
except Exception as e:
|
|
487
|
+
logger.warning("Channel reply failed: %s", e)
|
|
488
|
+
if not reply_sent and reply_text and notif:
|
|
489
|
+
source = notif["source"] if notif else ""
|
|
490
|
+
try:
|
|
491
|
+
meta = json.loads(notif["metadata"]) if notif["metadata"] else {}
|
|
492
|
+
except (ValueError, TypeError):
|
|
493
|
+
meta = {}
|
|
494
|
+
# Only use webhook URLs from trusted sources (env vars), never from metadata
|
|
495
|
+
reply_url = ""
|
|
496
|
+
if "slack" in source:
|
|
497
|
+
reply_url = os.environ.get("PASCAL_SLACK_WEBHOOK", "")
|
|
498
|
+
elif "discord" in source:
|
|
499
|
+
reply_url = os.environ.get("PASCAL_DISCORD_WEBHOOK", "")
|
|
500
|
+
if reply_url:
|
|
501
|
+
from pascal.scheduler import _send_webhook
|
|
502
|
+
reply_sent = _send_webhook(reply_url, reply_text)
|
|
503
|
+
ctx.store.handle_notification(notif_id)
|
|
504
|
+
if notif and reply_sent:
|
|
505
|
+
ctx.store.push_conversation_turn(notif["source"] if notif else "unknown", "assistant", reply_text)
|
|
506
|
+
return {"handled": notif_id, "response": response, "reply_sent": reply_sent}
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
async def _handle_dismiss_notification(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
510
|
+
notif_id = str(decision.get("notification_id") or "").strip()
|
|
511
|
+
if not notif_id:
|
|
512
|
+
return {"error": "notification_id is required"}
|
|
513
|
+
ctx.store.dismiss_notification(notif_id)
|
|
514
|
+
return {"dismissed": notif_id}
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
async def _handle_pause_task(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
518
|
+
pause_reason = str(decision.get("pause_reason") or "interrupted")
|
|
519
|
+
paused = ctx.store.pause_active_task(pause_reason)
|
|
520
|
+
return {"paused": paused}
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
async def _handle_complete_task(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
524
|
+
active = ctx.store.get_active_task()
|
|
525
|
+
if active:
|
|
526
|
+
summary = str(decision.get("summary") or "completed")
|
|
527
|
+
guard_history = ctx.store.get_recent_history(limit=_COMPLETE_TASK_HISTORY_SCAN_LIMIT)
|
|
528
|
+
if _recent_execute_failure_streak(guard_history, active["id"]):
|
|
529
|
+
reason = (
|
|
530
|
+
f"Cannot complete: the last {_COMPLETE_TASK_FAILURE_WINDOW} recent execute "
|
|
531
|
+
"actions for this task failed or returned empty output."
|
|
532
|
+
)
|
|
533
|
+
ctx.store.update_task(active["id"], progress=f"Completion blocked: {reason}")
|
|
534
|
+
return {"error": reason, "retry": True}
|
|
535
|
+
|
|
536
|
+
history = ctx.store.get_recent_history(limit=5)
|
|
537
|
+
proof_required = None
|
|
538
|
+
if active.get("proof_required"):
|
|
539
|
+
try:
|
|
540
|
+
proof_required = json.loads(active["proof_required"])
|
|
541
|
+
except (json.JSONDecodeError, TypeError):
|
|
542
|
+
pass
|
|
543
|
+
# Verification: only run when proof_required is explicitly set on the task.
|
|
544
|
+
# For normal tasks, trust the LLM's completion claim -- the action history
|
|
545
|
+
# is already recorded for audit. This saves 1 LLM call per task completion.
|
|
546
|
+
if proof_required and ctx.llm is not None and not ctx.skip_verification:
|
|
547
|
+
passed, reason = await _verify_completion(
|
|
548
|
+
ctx.llm, active["goal"], summary, history, proof_required=proof_required,
|
|
549
|
+
)
|
|
550
|
+
else:
|
|
551
|
+
passed, reason = True, "no_proof_required"
|
|
552
|
+
if not passed:
|
|
553
|
+
ctx.store.update_task(active["id"], progress=f"Verification failed: {reason}")
|
|
554
|
+
return {"error": f"verification_failed: {reason}", "retry": True}
|
|
555
|
+
ctx.store.update_task(active["id"], status="done", result=summary)
|
|
556
|
+
# Auto-memorize: extract detailed, reusable procedure from task history
|
|
557
|
+
_auto_memorize_procedure(ctx.store, {**active, "result": summary}, history)
|
|
558
|
+
await ctx.notify(f"✓ 완료: {active['goal']}\n{summary}")
|
|
559
|
+
return {"completed": active["id"], "summary": summary, "verified": True}
|
|
560
|
+
return {"error": "no active task"}
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
async def _handle_block_task(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
564
|
+
active = ctx.store.get_active_task()
|
|
565
|
+
if active:
|
|
566
|
+
reason = str(decision.get("reason") or "blocked")
|
|
567
|
+
ctx.store.update_task(active["id"], status="blocked", progress=f"Blocked: {reason}")
|
|
568
|
+
return {"blocked": active["id"], "reason": reason}
|
|
569
|
+
return {"error": "no active task"}
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
async def _handle_fail_task(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
573
|
+
active = ctx.store.get_active_task()
|
|
574
|
+
if active:
|
|
575
|
+
reason = str(decision.get("reason") or "failed")
|
|
576
|
+
ctx.store.update_task(active["id"], status="failed", progress=f"Failed: {reason}")
|
|
577
|
+
_auto_postmortem(ctx.store, active, reason)
|
|
578
|
+
await ctx.notify(f"✗ 실패: {active['goal']}\n{reason}")
|
|
579
|
+
return {"failed": active["id"], "reason": reason}
|
|
580
|
+
return {"error": "no active task"}
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
async def _handle_add_todo(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
584
|
+
title = str(decision.get("todo_title") or decision.get("title") or "").strip()
|
|
585
|
+
active = ctx.store.get_active_task()
|
|
586
|
+
if not active:
|
|
587
|
+
return {"error": "no active task to add todo to"}
|
|
588
|
+
if not title:
|
|
589
|
+
return {"error": "empty todo title"}
|
|
590
|
+
todo_id = ctx.store.add_todo(task_id=active["id"], title=title)
|
|
591
|
+
return {"added": todo_id, "title": title}
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
async def _handle_complete_todo(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
595
|
+
todo_id = str(decision.get("todo_id") or "").strip()
|
|
596
|
+
if not todo_id:
|
|
597
|
+
return {"error": "no todo_id"}
|
|
598
|
+
ctx.store.complete_todo(todo_id)
|
|
599
|
+
return {"completed": todo_id}
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
async def _handle_memorize(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
603
|
+
kind = str(decision.get("memory_kind") or "").strip()
|
|
604
|
+
content = str(decision.get("memory_content") or "").strip()
|
|
605
|
+
if not content:
|
|
606
|
+
return {"error": "empty memory content"}
|
|
607
|
+
if kind not in ("fact", "lesson", "preference", "procedure"):
|
|
608
|
+
return {"error": f"invalid memory_kind '{kind}' -- must be fact|lesson|preference|procedure"}
|
|
609
|
+
active = ctx.store.get_active_task()
|
|
610
|
+
mem_id = ctx.store.add_memory(
|
|
611
|
+
kind=kind, content=content,
|
|
612
|
+
source_task_id=active["id"] if active else None,
|
|
613
|
+
)
|
|
614
|
+
return {"memorized": mem_id, "kind": kind}
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
async def _handle_add_rule(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
618
|
+
rule = str(decision.get("rule") or "").strip()
|
|
619
|
+
if rule:
|
|
620
|
+
rule_id = ctx.store.add_rule(rule, mutable=True, added_by="agent")
|
|
621
|
+
return {"added": rule_id, "rule": rule}
|
|
622
|
+
return {"error": "empty rule"}
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
async def _handle_remove_rule(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
626
|
+
rule_id = str(decision.get("rule_id") or "").strip()
|
|
627
|
+
if rule_id:
|
|
628
|
+
removed = ctx.store.remove_rule(rule_id)
|
|
629
|
+
return {"removed": removed, "rule_id": rule_id}
|
|
630
|
+
return {"error": "no rule_id"}
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
async def _handle_set_context(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
634
|
+
key = str(decision.get("key") or "").strip()
|
|
635
|
+
value = decision.get("value")
|
|
636
|
+
if not key:
|
|
637
|
+
return {"error": "no key"}
|
|
638
|
+
# Protect critical context keys from agent modification
|
|
639
|
+
_PROTECTED_CONTEXT_KEYS = frozenset({"mission", "identity", "routines"})
|
|
640
|
+
if key in _PROTECTED_CONTEXT_KEYS:
|
|
641
|
+
return {"error": f"context key '{key}' is protected and cannot be modified by the agent"}
|
|
642
|
+
ttl = decision.get("ttl_hours")
|
|
643
|
+
if isinstance(ttl, (int, float)) and ttl > 0:
|
|
644
|
+
ctx.store.set_context(key, value, ttl_hours=int(ttl))
|
|
645
|
+
else:
|
|
646
|
+
ctx.store.set_context(key, value)
|
|
647
|
+
return {"set": key}
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
async def _handle_escalate(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
651
|
+
question = str(decision.get("question") or "Need human input")
|
|
652
|
+
return {"escalated": True, "question": question}
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
async def _handle_wait(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
656
|
+
return {"waiting": True, "reason": str(decision.get("wait_reason") or "")}
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
# ── Handler registry ──────────────────────────────────────────────
|
|
660
|
+
|
|
661
|
+
ACTION_HANDLERS: dict[str, Callable] = {
|
|
662
|
+
"think": _handle_think,
|
|
663
|
+
"plan": _handle_plan,
|
|
664
|
+
"delegate": _handle_delegate_action,
|
|
665
|
+
"execute": _handle_execute,
|
|
666
|
+
"create_task": _handle_create_task,
|
|
667
|
+
"pick_task": _handle_pick_task,
|
|
668
|
+
"create_subtask": _handle_create_subtask,
|
|
669
|
+
"handle_notification": _handle_handle_notification,
|
|
670
|
+
"dismiss_notification": _handle_dismiss_notification,
|
|
671
|
+
"pause_task": _handle_pause_task,
|
|
672
|
+
"complete_task": _handle_complete_task,
|
|
673
|
+
"block_task": _handle_block_task,
|
|
674
|
+
"fail_task": _handle_fail_task,
|
|
675
|
+
"add_todo": _handle_add_todo,
|
|
676
|
+
"complete_todo": _handle_complete_todo,
|
|
677
|
+
"memorize": _handle_memorize,
|
|
678
|
+
"add_rule": _handle_add_rule,
|
|
679
|
+
"remove_rule": _handle_remove_rule,
|
|
680
|
+
"set_context": _handle_set_context,
|
|
681
|
+
"escalate": _handle_escalate,
|
|
682
|
+
"wait": _handle_wait,
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
# Valid actions -- single source of truth (must match ACTION_HANDLERS keys)
|
|
686
|
+
VALID_ACTIONS = frozenset(ACTION_HANDLERS)
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
# ── Public dispatch function ──────────────────────────────────────
|
|
690
|
+
|
|
691
|
+
async def execute_action(
|
|
692
|
+
store: PascalStore,
|
|
693
|
+
decision: dict[str, Any],
|
|
694
|
+
action: str,
|
|
695
|
+
*,
|
|
696
|
+
ctx: ActionContext | None = None,
|
|
697
|
+
llm=None,
|
|
698
|
+
max_effect: str | None = None,
|
|
699
|
+
sandbox: SandboxManager | None = None,
|
|
700
|
+
mcp_manager: MCPManager | None = None,
|
|
701
|
+
execute_command: Callable[[str], str] | None = None,
|
|
702
|
+
thought_buffer: list[str] | None = None,
|
|
703
|
+
ledger: Ledger | None = None,
|
|
704
|
+
skip_verification: bool = False,
|
|
705
|
+
notify_fn: Callable[[str], Any] | None = None,
|
|
706
|
+
) -> dict[str, Any]:
|
|
707
|
+
"""Execute a single action and return the result.
|
|
708
|
+
|
|
709
|
+
Accepts either an ActionContext (preferred) or individual kwargs (backward compat).
|
|
710
|
+
"""
|
|
711
|
+
handler = ACTION_HANDLERS.get(action)
|
|
712
|
+
if handler is None:
|
|
713
|
+
return {"error": f"unhandled action: {action}"}
|
|
714
|
+
if ctx is not None:
|
|
715
|
+
return await handler(ctx, decision)
|
|
716
|
+
shim_ctx = ActionContext(
|
|
717
|
+
store=store,
|
|
718
|
+
llm=llm,
|
|
719
|
+
max_effect=max_effect,
|
|
720
|
+
sandbox=sandbox,
|
|
721
|
+
mcp_manager=mcp_manager,
|
|
722
|
+
execute_command=execute_command,
|
|
723
|
+
thought_buffer=thought_buffer or [],
|
|
724
|
+
ledger=ledger,
|
|
725
|
+
skip_verification=skip_verification,
|
|
726
|
+
notify_fn=notify_fn,
|
|
727
|
+
)
|
|
728
|
+
return await handler(shim_ctx, decision)
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
# ── Delegate handler ──────────────────────────────────────────────
|
|
732
|
+
|
|
733
|
+
async def _handle_delegate(ctx: ActionContext, decision: dict[str, Any]) -> dict[str, Any]:
|
|
734
|
+
"""Delegate a task to Claude Code or Codex CLI via async subprocess."""
|
|
735
|
+
tool_name = str(decision.get("tool") or "").strip()
|
|
736
|
+
if tool_name not in DELEGATE_TOOLS:
|
|
737
|
+
return {"error": f"Unknown delegate tool '{tool_name}'. Available: {', '.join(DELEGATE_TOOLS)}", "status": "error"}
|
|
738
|
+
|
|
739
|
+
task_desc = str(decision.get("task") or "").strip()
|
|
740
|
+
if not task_desc:
|
|
741
|
+
return {"error": "delegate requires a 'task' field", "status": "error"}
|
|
742
|
+
|
|
743
|
+
context = str(decision.get("context") or "").strip()
|
|
744
|
+
success_criteria = str(decision.get("success_criteria") or "").strip()
|
|
745
|
+
|
|
746
|
+
combined_text = f"{task_desc}\n{context}\n{success_criteria}"
|
|
747
|
+
scan_result = scan_tool_input("delegate", {"task": combined_text})
|
|
748
|
+
if scan_result.verdict == "block":
|
|
749
|
+
return {"error": f"blocked by trust scanner: {scan_result.reason}", "status": "error"}
|
|
750
|
+
|
|
751
|
+
allowed_max = get_max_effect(ctx.max_effect)
|
|
752
|
+
if not check_allowed("E2", allowed_max):
|
|
753
|
+
return {
|
|
754
|
+
"error": escalation_message(f"delegate:{tool_name}", "E2", allowed_max),
|
|
755
|
+
"status": "error", "effect_level": "E2", "escalate": True,
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
prompt_parts = [task_desc]
|
|
759
|
+
if context:
|
|
760
|
+
prompt_parts.append(f"\nContext: {context}")
|
|
761
|
+
if success_criteria:
|
|
762
|
+
prompt_parts.append(f"\nSuccess criteria: {success_criteria}")
|
|
763
|
+
prompt = "\n".join(prompt_parts)
|
|
764
|
+
|
|
765
|
+
cmd_base = DELEGATE_TOOLS[tool_name]
|
|
766
|
+
cmd = cmd_base + [prompt]
|
|
767
|
+
|
|
768
|
+
timeout = min(int(decision.get("timeout") or 300), _DELEGATE_MAX_TIMEOUT)
|
|
769
|
+
|
|
770
|
+
import shutil as _shutil
|
|
771
|
+
from pascal.sandbox import make_safe_env
|
|
772
|
+
from pathlib import Path as _P
|
|
773
|
+
|
|
774
|
+
safe_env, tmp_home = make_safe_env()
|
|
775
|
+
|
|
776
|
+
def _cleanup_tmp():
|
|
777
|
+
try:
|
|
778
|
+
if os.path.isdir(tmp_home):
|
|
779
|
+
_shutil.rmtree(tmp_home, ignore_errors=True)
|
|
780
|
+
from pascal.sandbox import _sandbox_temp_dirs
|
|
781
|
+
if tmp_home in _sandbox_temp_dirs:
|
|
782
|
+
_sandbox_temp_dirs.remove(tmp_home)
|
|
783
|
+
except Exception:
|
|
784
|
+
pass
|
|
785
|
+
|
|
786
|
+
real_home = _P.home()
|
|
787
|
+
_AUTH_FILES = {
|
|
788
|
+
".codex": ["auth.json"],
|
|
789
|
+
".claude": ["auth.json", "config.toml"],
|
|
790
|
+
}
|
|
791
|
+
for auth_dir, allowed_files in _AUTH_FILES.items():
|
|
792
|
+
src = real_home / auth_dir
|
|
793
|
+
dst = _P(tmp_home) / auth_dir
|
|
794
|
+
if src.is_dir():
|
|
795
|
+
try:
|
|
796
|
+
dst.mkdir(exist_ok=True)
|
|
797
|
+
for f in allowed_files:
|
|
798
|
+
sf = src / f
|
|
799
|
+
if sf.exists():
|
|
800
|
+
_shutil.copy2(str(sf), str(dst / f))
|
|
801
|
+
except Exception:
|
|
802
|
+
pass
|
|
803
|
+
|
|
804
|
+
ctx.store.refresh_lock()
|
|
805
|
+
|
|
806
|
+
workspace = ctx.sandbox._restricted._workspace if ctx.sandbox else ""
|
|
807
|
+
cwd = workspace if workspace and os.path.isdir(workspace) else os.getcwd()
|
|
808
|
+
try:
|
|
809
|
+
proc = await asyncio.create_subprocess_exec(
|
|
810
|
+
*cmd,
|
|
811
|
+
stdout=asyncio.subprocess.PIPE,
|
|
812
|
+
stderr=asyncio.subprocess.PIPE,
|
|
813
|
+
cwd=cwd,
|
|
814
|
+
env=safe_env,
|
|
815
|
+
)
|
|
816
|
+
|
|
817
|
+
# Refresh lock periodically during long delegate calls to prevent
|
|
818
|
+
# another runner from acquiring the lock (TTL=600s, delegate up to 600s)
|
|
819
|
+
async def _refresh_until_done():
|
|
820
|
+
while proc.returncode is None:
|
|
821
|
+
await asyncio.sleep(60)
|
|
822
|
+
ctx.store.refresh_lock()
|
|
823
|
+
|
|
824
|
+
refresh_task = asyncio.create_task(_refresh_until_done())
|
|
825
|
+
try:
|
|
826
|
+
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
|
827
|
+
proc.communicate(), timeout=timeout,
|
|
828
|
+
)
|
|
829
|
+
finally:
|
|
830
|
+
refresh_task.cancel()
|
|
831
|
+
try:
|
|
832
|
+
await refresh_task
|
|
833
|
+
except asyncio.CancelledError:
|
|
834
|
+
pass
|
|
835
|
+
stdout = (stdout_bytes or b"").decode("utf-8", errors="replace")
|
|
836
|
+
stderr = (stderr_bytes or b"").decode("utf-8", errors="replace")
|
|
837
|
+
ok = proc.returncode == 0
|
|
838
|
+
except asyncio.TimeoutError:
|
|
839
|
+
try:
|
|
840
|
+
proc.kill()
|
|
841
|
+
except ProcessLookupError:
|
|
842
|
+
pass
|
|
843
|
+
_cleanup_tmp()
|
|
844
|
+
return {"error": f"delegate:{tool_name} timed out ({timeout}s)", "status": "unknown"}
|
|
845
|
+
except FileNotFoundError:
|
|
846
|
+
_cleanup_tmp()
|
|
847
|
+
return {"error": f"'{cmd_base[0]}' not found. Is {tool_name} installed?", "status": "error"}
|
|
848
|
+
except Exception as exc:
|
|
849
|
+
_cleanup_tmp()
|
|
850
|
+
return {"error": f"delegate failed: {exc}", "status": "error"}
|
|
851
|
+
|
|
852
|
+
_cleanup_tmp()
|
|
853
|
+
|
|
854
|
+
active = ctx.store.get_active_task()
|
|
855
|
+
if active:
|
|
856
|
+
ctx.store.update_task(active["id"], progress=f"Delegated to {tool_name}")
|
|
857
|
+
|
|
858
|
+
return {
|
|
859
|
+
"output": stdout[:8000],
|
|
860
|
+
"stderr": stderr[:2000] if stderr else "",
|
|
861
|
+
"status": "ok" if ok else "error",
|
|
862
|
+
"tool": tool_name,
|
|
863
|
+
"delegate": True,
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
|
|
867
|
+
# ── Verifier ──────────────────────────────────────────────────────
|
|
868
|
+
|
|
869
|
+
async def _verify_completion(
|
|
870
|
+
llm,
|
|
871
|
+
task_goal: str,
|
|
872
|
+
task_summary: str,
|
|
873
|
+
recent_history: list[dict[str, Any]],
|
|
874
|
+
proof_required: list[str] | None = None,
|
|
875
|
+
) -> tuple[bool, str]:
|
|
876
|
+
"""Ask the LLM to verify whether the task was actually completed."""
|
|
877
|
+
from pascal.types import extract_json as _extract_json
|
|
878
|
+
history_text = "\n".join(f"- {h['summary']}" for h in recent_history[-5:])
|
|
879
|
+
proof_text = ""
|
|
880
|
+
if proof_required:
|
|
881
|
+
proof_text = f"\nRequired proof: {', '.join(proof_required)}"
|
|
882
|
+
messages = [
|
|
883
|
+
Message(
|
|
884
|
+
role=Role.SYSTEM,
|
|
885
|
+
content=(
|
|
886
|
+
"You verify task completion. Return JSON only.\n"
|
|
887
|
+
'{"passed": true/false, "reason": "why"}\n'
|
|
888
|
+
"Be strict: did the actions actually accomplish the goal?\n"
|
|
889
|
+
"Check that claimed evidence matches what actually happened."
|
|
890
|
+
),
|
|
891
|
+
),
|
|
892
|
+
Message(
|
|
893
|
+
role=Role.USER,
|
|
894
|
+
content=f"Goal: {task_goal}\nClaimed result: {task_summary}{proof_text}\nRecent actions:\n{history_text}",
|
|
895
|
+
),
|
|
896
|
+
]
|
|
897
|
+
try:
|
|
898
|
+
response = await llm.chat(messages, tools=[])
|
|
899
|
+
payload = json.loads(_extract_json(response.text or "{}"))
|
|
900
|
+
return bool(payload.get("passed")), str(payload.get("reason") or "")
|
|
901
|
+
except Exception as exc:
|
|
902
|
+
logger.warning("Verification error: %s", exc)
|
|
903
|
+
return False, str(exc)
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
# ── Auto post-mortem ──────────────────────────────────────────────
|
|
907
|
+
|
|
908
|
+
def _auto_postmortem(store: PascalStore, task: dict[str, Any], failure_reason: str) -> None:
|
|
909
|
+
"""Generate a lesson from task failure. No LLM needed -- just pattern extraction."""
|
|
910
|
+
try:
|
|
911
|
+
history = store.get_recent_history(limit=10)
|
|
912
|
+
actions = []
|
|
913
|
+
errors = []
|
|
914
|
+
for h in history:
|
|
915
|
+
if h.get("task_id") == task["id"]:
|
|
916
|
+
actions.append(h["summary"])
|
|
917
|
+
details = h.get("details")
|
|
918
|
+
if isinstance(details, str):
|
|
919
|
+
try:
|
|
920
|
+
details = json.loads(details)
|
|
921
|
+
except Exception:
|
|
922
|
+
details = {}
|
|
923
|
+
if isinstance(details, dict):
|
|
924
|
+
result = details.get("result", {})
|
|
925
|
+
if isinstance(result, dict) and result.get("error"):
|
|
926
|
+
errors.append(result["error"][:100])
|
|
927
|
+
lesson = f"Task '{task['goal']}' failed: {failure_reason}."
|
|
928
|
+
if errors:
|
|
929
|
+
lesson += f" Errors: {'; '.join(errors[:3])}"
|
|
930
|
+
if actions:
|
|
931
|
+
lesson += f" Attempted: {'; '.join(actions[-3:])}"
|
|
932
|
+
store.add_memory(kind="lesson", content=lesson, source_task_id=task["id"])
|
|
933
|
+
logger.info("Auto post-mortem lesson: %s", lesson[:100])
|
|
934
|
+
except Exception:
|
|
935
|
+
logger.warning("Post-mortem failed", exc_info=True)
|
|
936
|
+
|
|
937
|
+
|
|
938
|
+
def _auto_memorize_procedure(
|
|
939
|
+
store: PascalStore, task: dict[str, Any], history: list[dict[str, Any]]
|
|
940
|
+
) -> None:
|
|
941
|
+
"""Extract a detailed, reusable procedure from completed task history.
|
|
942
|
+
|
|
943
|
+
Saves actual commands, tools, and parameters -- not just action names.
|
|
944
|
+
This creates a 'work manual' entry that Pascal can replay for similar tasks.
|
|
945
|
+
"""
|
|
946
|
+
try:
|
|
947
|
+
task_actions = [h for h in history if h.get("task_id") == task["id"]]
|
|
948
|
+
if len(task_actions) < 1:
|
|
949
|
+
return
|
|
950
|
+
|
|
951
|
+
steps = []
|
|
952
|
+
for h in task_actions:
|
|
953
|
+
details = h.get("details")
|
|
954
|
+
if isinstance(details, str):
|
|
955
|
+
try:
|
|
956
|
+
details = json.loads(details)
|
|
957
|
+
except (json.JSONDecodeError, TypeError):
|
|
958
|
+
continue
|
|
959
|
+
if not isinstance(details, dict):
|
|
960
|
+
continue
|
|
961
|
+
|
|
962
|
+
dec = details.get("decision", {})
|
|
963
|
+
res = details.get("result", {})
|
|
964
|
+
action_type = dec.get("action", "")
|
|
965
|
+
|
|
966
|
+
# Skip think, pick_task, wait -- not part of the procedure
|
|
967
|
+
if action_type in ("think", "pick_task", "wait", "complete_task", ""):
|
|
968
|
+
continue
|
|
969
|
+
# Skip failed actions
|
|
970
|
+
if isinstance(res, dict) and res.get("status") == "error":
|
|
971
|
+
continue
|
|
972
|
+
|
|
973
|
+
step: dict[str, Any] = {"action": action_type}
|
|
974
|
+
if action_type == "execute":
|
|
975
|
+
if dec.get("tool"):
|
|
976
|
+
step["tool"] = dec["tool"]
|
|
977
|
+
# Generalize params: keep structure but mark variable values
|
|
978
|
+
params = dec.get("tool_params", {})
|
|
979
|
+
if isinstance(params, dict):
|
|
980
|
+
step["tool_params"] = {
|
|
981
|
+
k: v if k in ("region",) else f"<{k}>"
|
|
982
|
+
for k, v in params.items()
|
|
983
|
+
}
|
|
984
|
+
elif dec.get("command"):
|
|
985
|
+
step["command"] = dec["command"]
|
|
986
|
+
elif action_type == "delegate":
|
|
987
|
+
step["tool"] = dec.get("tool", "")
|
|
988
|
+
step["task_pattern"] = dec.get("task", "")[:60]
|
|
989
|
+
elif action_type == "plan":
|
|
990
|
+
plan_steps = dec.get("steps", [])
|
|
991
|
+
step["step_count"] = len(plan_steps)
|
|
992
|
+
|
|
993
|
+
steps.append(step)
|
|
994
|
+
|
|
995
|
+
if not steps:
|
|
996
|
+
return
|
|
997
|
+
|
|
998
|
+
import platform as _platform
|
|
999
|
+
task_summary = str(task.get("result") or "")
|
|
1000
|
+
proc = json.dumps({
|
|
1001
|
+
"name": task["goal"][:80],
|
|
1002
|
+
"platform": _platform.system(),
|
|
1003
|
+
"steps": steps[-8:], # Keep last 8 successful steps max
|
|
1004
|
+
"summary": task_summary[:100],
|
|
1005
|
+
}, ensure_ascii=False)
|
|
1006
|
+
|
|
1007
|
+
store.add_memory(kind="procedure", content=proc, source_task_id=task["id"])
|
|
1008
|
+
logger.info("Auto-memorized procedure: %s (%d steps)", task["goal"][:40], len(steps))
|
|
1009
|
+
except Exception:
|
|
1010
|
+
logger.warning("Auto-memorize failed", exc_info=True)
|
|
1011
|
+
|
|
1012
|
+
|
|
1013
|
+
def _recent_execute_failure_streak(
|
|
1014
|
+
history: list[dict[str, Any]],
|
|
1015
|
+
task_id: str,
|
|
1016
|
+
*,
|
|
1017
|
+
window: int = _COMPLETE_TASK_FAILURE_WINDOW,
|
|
1018
|
+
) -> bool:
|
|
1019
|
+
"""Return True when the last N execute actions for a task all failed or were empty."""
|
|
1020
|
+
recent_executes: list[dict[str, Any]] = []
|
|
1021
|
+
for h in reversed(history):
|
|
1022
|
+
if h.get("task_id") != task_id:
|
|
1023
|
+
continue
|
|
1024
|
+
details = h.get("details")
|
|
1025
|
+
if isinstance(details, str):
|
|
1026
|
+
try:
|
|
1027
|
+
details = json.loads(details)
|
|
1028
|
+
except (json.JSONDecodeError, TypeError):
|
|
1029
|
+
details = {}
|
|
1030
|
+
if not isinstance(details, dict):
|
|
1031
|
+
continue
|
|
1032
|
+
decision = details.get("decision", {})
|
|
1033
|
+
if not isinstance(decision, dict) or decision.get("action") != "execute":
|
|
1034
|
+
continue
|
|
1035
|
+
result = details.get("result", {})
|
|
1036
|
+
recent_executes.append(result if isinstance(result, dict) else {})
|
|
1037
|
+
if len(recent_executes) >= window:
|
|
1038
|
+
break
|
|
1039
|
+
if len(recent_executes) < window:
|
|
1040
|
+
return False
|
|
1041
|
+
return all(_execute_result_failed_or_empty(result) for result in recent_executes)
|
|
1042
|
+
|
|
1043
|
+
|
|
1044
|
+
def _execute_result_failed_or_empty(result: dict[str, Any]) -> bool:
|
|
1045
|
+
"""Treat explicit failures as insufficient completion evidence.
|
|
1046
|
+
|
|
1047
|
+
Successful commands often emit no stdout (mkdir, touch, git add), so a blank
|
|
1048
|
+
output stream alone is not enough to call the step a failure once status=ok
|
|
1049
|
+
is recorded.
|
|
1050
|
+
"""
|
|
1051
|
+
if not isinstance(result, dict):
|
|
1052
|
+
return False
|
|
1053
|
+
status = str(result.get("status") or "").strip().lower()
|
|
1054
|
+
if status == "ok":
|
|
1055
|
+
return False
|
|
1056
|
+
if status in {"error", "unknown"}:
|
|
1057
|
+
return True
|
|
1058
|
+
error = result.get("error")
|
|
1059
|
+
if isinstance(error, str) and error.strip():
|
|
1060
|
+
return True
|
|
1061
|
+
if result.get("attachment") is not None:
|
|
1062
|
+
return False
|
|
1063
|
+
output = result.get("output")
|
|
1064
|
+
if isinstance(output, str):
|
|
1065
|
+
return not output.strip()
|
|
1066
|
+
return not bool(output)
|