cawdex 1.35.75 → 1.35.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/bin/anycode.js +2 -2
- package/bin/cawdex.js +408 -408
- package/bin/ecc-hooks.cjs +11 -11
- package/dist/agents-md.d.ts +31 -0
- package/dist/agents-md.js +340 -0
- package/dist/agents-md.js.map +1 -0
- package/dist/agents.js +1424 -1424
- package/dist/api.d.ts +1 -0
- package/dist/api.js +19 -14
- package/dist/api.js.map +1 -1
- package/dist/autonomous-loops.js +287 -287
- package/dist/benchmark-repos.d.ts +31 -0
- package/dist/benchmark-repos.js +234 -8
- package/dist/benchmark-repos.js.map +1 -1
- package/dist/command-palette.js +4 -2
- package/dist/command-palette.js.map +1 -1
- package/dist/compaction.js +8 -8
- package/dist/config.js +51 -36
- package/dist/config.js.map +1 -1
- package/dist/content-engine.js +543 -543
- package/dist/context-brief.d.ts +4 -0
- package/dist/context-brief.js +230 -0
- package/dist/context-brief.js.map +1 -0
- package/dist/cost-tracker.d.ts +33 -14
- package/dist/cost-tracker.js +81 -19
- package/dist/cost-tracker.js.map +1 -1
- package/dist/coverage.js +39 -39
- package/dist/docs-sync.js +98 -98
- package/dist/evaluation.js +452 -452
- package/dist/fixed-footer.d.ts +7 -1
- package/dist/fixed-footer.js +92 -18
- package/dist/fixed-footer.js.map +1 -1
- package/dist/git-workflow.js +49 -49
- package/dist/index.d.ts +2 -0
- package/dist/index.js +161 -63
- package/dist/index.js.map +1 -1
- package/dist/live-queue.js +1 -1
- package/dist/live-queue.js.map +1 -1
- package/dist/model-aliases.d.ts +37 -0
- package/dist/model-aliases.js +203 -0
- package/dist/model-aliases.js.map +1 -0
- package/dist/orchestration.js +15 -15
- package/dist/permissions.d.ts +6 -0
- package/dist/permissions.js +53 -0
- package/dist/permissions.js.map +1 -1
- package/dist/pm2-manager.js +26 -26
- package/dist/query.d.ts +0 -1
- package/dist/query.js +74 -39
- package/dist/query.js.map +1 -1
- package/dist/refactor.js +87 -87
- package/dist/repo-command.js +7 -1
- package/dist/repo-command.js.map +1 -1
- package/dist/search-first.js +92 -92
- package/dist/skill-create.js +100 -100
- package/dist/stitch.js +1 -1
- package/dist/system-prompt.d.ts +2 -1
- package/dist/system-prompt.js +10 -5
- package/dist/system-prompt.js.map +1 -1
- package/dist/tools/github-repo-digest.d.ts +1 -1
- package/dist/tools/github-repo-digest.js +38 -6
- package/dist/tools/github-repo-digest.js.map +1 -1
- package/dist/types.d.ts +3 -0
- package/dist/types.js.map +1 -1
- package/dist/verification.js +55 -55
- package/package.json +1 -1
- package/resources/__init__.py +1 -1
- package/resources/exgentic/cawdex_agent/README.md +114 -114
- package/resources/exgentic/cawdex_agent/__init__.py +5 -5
- package/resources/exgentic/cawdex_agent/agent.py +605 -605
- package/resources/exgentic/cawdex_agent/requirements.txt +2 -2
- package/resources/exgentic/cawdex_agent/setup.sh +21 -21
- package/resources/exgentic/cawdex_agent/utils.py +1061 -1061
- package/resources/hal/cawdex_agent/README.md +24 -24
- package/resources/hal/cawdex_agent/__init__.py +1 -1
- package/resources/hal/cawdex_agent/main.py +550 -550
- package/resources/hal/cawdex_agent/requirements.txt +2 -2
- package/resources/kbench/cawdex_agent/README.md +107 -107
- package/resources/kbench/cawdex_agent/adapter.manifest.json +19 -19
- package/resources/kbench/cawdex_agent/runner.mjs +753 -753
- package/resources/open_agent_leaderboard/cawdex-agent-card.md +119 -119
- package/resources/terminal_bench/__init__.py +1 -1
- package/resources/terminal_bench/cawdex_agent.py +174 -174
- package/resources/terminal_bench/setup.sh +121 -121
|
@@ -1,605 +1,605 @@
|
|
|
1
|
-
"""Exgentic/Open Agent Leaderboard adapter for Cawdex."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import json
|
|
6
|
-
import os
|
|
7
|
-
import shlex
|
|
8
|
-
import subprocess
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
from typing import Any, ClassVar
|
|
11
|
-
|
|
12
|
-
from pydantic import Field
|
|
13
|
-
|
|
14
|
-
from exgentic.core.agent import Agent
|
|
15
|
-
from exgentic.core.agent_instance import AgentInstance
|
|
16
|
-
from exgentic.core.types import Action, ActionType, Observation
|
|
17
|
-
from exgentic.utils.cost import UpdatableCostReport
|
|
18
|
-
|
|
19
|
-
from .utils import (
|
|
20
|
-
ActionPayload,
|
|
21
|
-
extract_action_payload,
|
|
22
|
-
fallback_exgentic_action_payload,
|
|
23
|
-
fold_exgentic_history,
|
|
24
|
-
json_dumps,
|
|
25
|
-
repair_exgentic_action_payload,
|
|
26
|
-
redact,
|
|
27
|
-
safe_id,
|
|
28
|
-
shortlist_exgentic_actions,
|
|
29
|
-
truncate,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class CawdexAgent(Agent):
|
|
34
|
-
"""Host-side Exgentic config for Cawdex."""
|
|
35
|
-
|
|
36
|
-
display_name: ClassVar[str] = "Cawdex"
|
|
37
|
-
slug_name: ClassVar[str] = "cawdex_agent"
|
|
38
|
-
|
|
39
|
-
model: str = "openrouter/free"
|
|
40
|
-
provider: str | None = None
|
|
41
|
-
command: str = Field(default_factory=lambda: os.environ.get("CAWDEX_EXGENTIC_COMMAND") or os.environ.get("CAWDEX_EXGENTIC_COMMAND", "cawdex"))
|
|
42
|
-
permission: str = "yolo"
|
|
43
|
-
max_steps: int = 50
|
|
44
|
-
max_turns: int | None = None
|
|
45
|
-
max_tokens: int | None = None
|
|
46
|
-
context_window_tokens: int | None = None
|
|
47
|
-
temperature: float | None = None
|
|
48
|
-
output_format: str = "text"
|
|
49
|
-
timeout_sec: int = 1800
|
|
50
|
-
memory: bool = False
|
|
51
|
-
workdir: str | None = None
|
|
52
|
-
cawdex_home: str | None = None
|
|
53
|
-
extra_args: list[str] = Field(default_factory=list)
|
|
54
|
-
extra_env: dict[str, str] = Field(default_factory=dict)
|
|
55
|
-
|
|
56
|
-
@classmethod
|
|
57
|
-
def _get_instance_class(cls):
|
|
58
|
-
return CawdexAgentInstance
|
|
59
|
-
|
|
60
|
-
@classmethod
|
|
61
|
-
def _get_instance_class_ref(cls) -> str:
|
|
62
|
-
return f"{cls.__module__}:CawdexAgentInstance"
|
|
63
|
-
|
|
64
|
-
@property
|
|
65
|
-
def model_name(self) -> str: # type: ignore[override]
|
|
66
|
-
return self.model
|
|
67
|
-
|
|
68
|
-
def get_models_names(self) -> list[str]: # type: ignore[override]
|
|
69
|
-
return [self.model]
|
|
70
|
-
|
|
71
|
-
def _get_instance_kwargs(self, session_id: str) -> dict[str, Any]:
|
|
72
|
-
return {
|
|
73
|
-
"session_id": session_id,
|
|
74
|
-
"model": self.model,
|
|
75
|
-
"provider": self.provider,
|
|
76
|
-
"command": self.command,
|
|
77
|
-
"permission": self.permission,
|
|
78
|
-
"max_steps": self.max_steps,
|
|
79
|
-
"max_turns": self.max_turns,
|
|
80
|
-
"max_tokens": self.max_tokens,
|
|
81
|
-
"context_window_tokens": self.context_window_tokens,
|
|
82
|
-
"temperature": self.temperature,
|
|
83
|
-
"output_format": self.output_format,
|
|
84
|
-
"timeout_sec": self.timeout_sec,
|
|
85
|
-
"memory": self.memory,
|
|
86
|
-
"workdir": self.workdir,
|
|
87
|
-
"cawdex_home": self.cawdex_home,
|
|
88
|
-
"extra_args": self.extra_args,
|
|
89
|
-
"extra_env": self.extra_env,
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
class CawdexAgentInstance(AgentInstance):
|
|
94
|
-
"""Per-session Exgentic runtime that asks Cawdex for the next action."""
|
|
95
|
-
|
|
96
|
-
def __init__(
|
|
97
|
-
self,
|
|
98
|
-
session_id: str,
|
|
99
|
-
model: str = "openrouter/free",
|
|
100
|
-
provider: str | None = None,
|
|
101
|
-
command: str = "cawdex",
|
|
102
|
-
permission: str = "yolo",
|
|
103
|
-
max_steps: int = 50,
|
|
104
|
-
max_turns: int | None = None,
|
|
105
|
-
max_tokens: int | None = None,
|
|
106
|
-
context_window_tokens: int | None = None,
|
|
107
|
-
temperature: float | None = None,
|
|
108
|
-
output_format: str = "text",
|
|
109
|
-
timeout_sec: int = 1800,
|
|
110
|
-
memory: bool = False,
|
|
111
|
-
workdir: str | None = None,
|
|
112
|
-
cawdex_home: str | None = None,
|
|
113
|
-
extra_args: list[str] | None = None,
|
|
114
|
-
extra_env: dict[str, str] | None = None,
|
|
115
|
-
) -> None:
|
|
116
|
-
super().__init__(session_id=session_id)
|
|
117
|
-
self.model = model
|
|
118
|
-
self.provider = provider
|
|
119
|
-
self.command = command
|
|
120
|
-
self.permission = permission
|
|
121
|
-
self.max_steps = max_steps
|
|
122
|
-
self.max_turns = max_turns
|
|
123
|
-
self.max_tokens = max_tokens
|
|
124
|
-
self.context_window_tokens = context_window_tokens
|
|
125
|
-
self.temperature = temperature
|
|
126
|
-
self.output_format = output_format
|
|
127
|
-
self.timeout_sec = timeout_sec
|
|
128
|
-
self.memory = memory
|
|
129
|
-
self.workdir = workdir
|
|
130
|
-
self.cawdex_home = cawdex_home
|
|
131
|
-
self.extra_args = list(extra_args or [])
|
|
132
|
-
self.extra_env = dict(extra_env or {})
|
|
133
|
-
self._step = 0
|
|
134
|
-
self._history: list[dict[str, Any]] = []
|
|
135
|
-
self._cost_usd = 0.0
|
|
136
|
-
|
|
137
|
-
def react(self, observation: Observation | None) -> Action | None:
|
|
138
|
-
if self._step >= int(self.max_steps or 0):
|
|
139
|
-
return None
|
|
140
|
-
|
|
141
|
-
if observation is not None and not _observation_is_empty(observation):
|
|
142
|
-
self._history.append({"role": "observation", "content": _observation_to_data(observation)})
|
|
143
|
-
|
|
144
|
-
self._step += 1
|
|
145
|
-
prompt = self._build_prompt()
|
|
146
|
-
run = self._run_cawdex(prompt)
|
|
147
|
-
self._history.append(
|
|
148
|
-
{
|
|
149
|
-
"role": "cawdex",
|
|
150
|
-
"returncode": run["returncode"],
|
|
151
|
-
"stdout": truncate(run["stdout"], limit=16000),
|
|
152
|
-
"stderr": truncate(run["stderr"], limit=8000),
|
|
153
|
-
}
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
combined = "\n".join(part for part in [run["stdout"], run["stderr"]] if part)
|
|
157
|
-
payload = extract_action_payload(combined)
|
|
158
|
-
action = self._action_from_payload(payload) if payload is not None else None
|
|
159
|
-
if action is not None:
|
|
160
|
-
self._history.append({"role": "selected_action", "content": _single_action_to_data(action)})
|
|
161
|
-
return action
|
|
162
|
-
|
|
163
|
-
fallback = self._fallback_action(combined or "Cawdex produced no output")
|
|
164
|
-
if fallback is not None:
|
|
165
|
-
self._history.append({"role": "selected_action", "content": _single_action_to_data(fallback)})
|
|
166
|
-
return fallback
|
|
167
|
-
|
|
168
|
-
def get_cost(self) -> UpdatableCostReport:
|
|
169
|
-
report = UpdatableCostReport.initialize_empty(model_name=self.model)
|
|
170
|
-
if self._cost_usd:
|
|
171
|
-
report.add_cost(self._cost_usd)
|
|
172
|
-
return report
|
|
173
|
-
|
|
174
|
-
def close(self) -> None:
|
|
175
|
-
return None
|
|
176
|
-
|
|
177
|
-
def _build_prompt(self) -> str:
|
|
178
|
-
action_docs = [_action_type_to_doc(action) for action in getattr(self, "actions", [])]
|
|
179
|
-
context = getattr(self, "context", {}) or {}
|
|
180
|
-
task = getattr(self, "task", "")
|
|
181
|
-
profile = _profile_for_exgentic(task, context, action_docs)
|
|
182
|
-
action_names = [str(doc.get("name", "")) for doc in action_docs if doc.get("name")]
|
|
183
|
-
action_shortlist = shortlist_exgentic_actions(
|
|
184
|
-
action_docs,
|
|
185
|
-
task=task,
|
|
186
|
-
context=context,
|
|
187
|
-
history=self._history,
|
|
188
|
-
profile=profile,
|
|
189
|
-
)
|
|
190
|
-
lines = [
|
|
191
|
-
f"/benchmark {profile} Exgentic task",
|
|
192
|
-
"",
|
|
193
|
-
"You are running inside Exgentic/Open Agent Leaderboard.",
|
|
194
|
-
"Work from the current task, context, latest observation, and the available action schemas.",
|
|
195
|
-
"Choose exactly one available action. Do not invent action names.",
|
|
196
|
-
"Prefer the recommended action shortlist when it matches the latest observation; use the full schemas only when the current state clearly requires another available action.",
|
|
197
|
-
"For shortlisted actions, include every required_argument_key; when available_required_hints lists an exact value from latest observation or context, copy that value into the matching argument.",
|
|
198
|
-
"The benchmark may count malformed JSON, unknown action names, or schema-mismatched arguments as invalid actions.",
|
|
199
|
-
"End your response with one JSON object on its own line using this exact shape:",
|
|
200
|
-
'{"name":"<action name>","arguments":{}}',
|
|
201
|
-
"",
|
|
202
|
-
"If the benchmark exposes environment actions, return the next action to execute.",
|
|
203
|
-
"If the task is complete, use a finish/message action when one is available.",
|
|
204
|
-
_profile_guidance(profile),
|
|
205
|
-
"",
|
|
206
|
-
"## Task",
|
|
207
|
-
truncate(task),
|
|
208
|
-
"",
|
|
209
|
-
"## Context",
|
|
210
|
-
json_dumps(context),
|
|
211
|
-
"",
|
|
212
|
-
"## Recommended action shortlist",
|
|
213
|
-
json_dumps(action_shortlist),
|
|
214
|
-
"",
|
|
215
|
-
"## Available action names",
|
|
216
|
-
json_dumps(action_names),
|
|
217
|
-
"",
|
|
218
|
-
"## Available actions",
|
|
219
|
-
json_dumps(action_docs),
|
|
220
|
-
]
|
|
221
|
-
if self._history:
|
|
222
|
-
lines.extend(["", "## Folded session state", json_dumps(fold_exgentic_history(self._history, profile=profile), limit=24000)])
|
|
223
|
-
return "\n".join(lines)
|
|
224
|
-
|
|
225
|
-
def _run_cawdex(self, prompt: str) -> dict[str, Any]:
|
|
226
|
-
step_dir = self.paths.agent_dir / "cawdex" / f"step-{self._step:03d}"
|
|
227
|
-
trace_dir = step_dir / "trace"
|
|
228
|
-
step_dir.mkdir(parents=True, exist_ok=True)
|
|
229
|
-
trace_dir.mkdir(parents=True, exist_ok=True)
|
|
230
|
-
prompt_path = step_dir / "prompt.txt"
|
|
231
|
-
prompt_path.write_text(prompt, encoding="utf-8")
|
|
232
|
-
|
|
233
|
-
args = _split_command(self.command)
|
|
234
|
-
args.extend(["--prompt-file", str(prompt_path), "--perm", self.permission, "--benchmark-trace-dir", str(trace_dir)])
|
|
235
|
-
_append_flag(args, "--model", self.model)
|
|
236
|
-
_append_flag(args, "--provider", self.provider)
|
|
237
|
-
_append_flag(args, "--max-turns", self.max_turns)
|
|
238
|
-
_append_flag(args, "--max-tokens", self.max_tokens)
|
|
239
|
-
_append_flag(args, "--context-window-tokens", self.context_window_tokens)
|
|
240
|
-
_append_flag(args, "--temperature", self.temperature)
|
|
241
|
-
_append_flag(args, "--output-format", self.output_format)
|
|
242
|
-
args.extend(self.extra_args)
|
|
243
|
-
|
|
244
|
-
env = os.environ.copy()
|
|
245
|
-
env.update({str(key): str(value) for key, value in self.extra_env.items()})
|
|
246
|
-
env.setdefault("CAWDEX_ENV_CONFIG", "1")
|
|
247
|
-
env.setdefault("CAWDEX_THEME", "minimal")
|
|
248
|
-
env.setdefault("CAWDEX_SHOW_THINKING", "0")
|
|
249
|
-
env.setdefault("CAWDEX_BASH_TIMEOUT_MS", "300000")
|
|
250
|
-
env["CAWDEX_MEMORY"] = "1" if self.memory else "0"
|
|
251
|
-
if self.cawdex_home:
|
|
252
|
-
env["CAWDEX_HOME"] = self.cawdex_home
|
|
253
|
-
|
|
254
|
-
cwd = self._resolve_workdir()
|
|
255
|
-
try:
|
|
256
|
-
completed = subprocess.run(
|
|
257
|
-
args,
|
|
258
|
-
cwd=str(cwd),
|
|
259
|
-
env=env,
|
|
260
|
-
text=True,
|
|
261
|
-
capture_output=True,
|
|
262
|
-
timeout=self.timeout_sec,
|
|
263
|
-
check=False,
|
|
264
|
-
)
|
|
265
|
-
stdout = redact(completed.stdout)
|
|
266
|
-
stderr = redact(completed.stderr)
|
|
267
|
-
returncode = completed.returncode
|
|
268
|
-
except subprocess.TimeoutExpired as exc:
|
|
269
|
-
stdout = redact(exc.stdout)
|
|
270
|
-
stderr = redact(exc.stderr) + f"\ncawdex timed out after {self.timeout_sec}s"
|
|
271
|
-
returncode = 124
|
|
272
|
-
except Exception as exc:
|
|
273
|
-
stdout = ""
|
|
274
|
-
stderr = f"cawdex launch failed: {redact(exc)}"
|
|
275
|
-
returncode = 127
|
|
276
|
-
|
|
277
|
-
(step_dir / "argv.json").write_text(
|
|
278
|
-
json.dumps([redact(arg) for arg in args], ensure_ascii=False, indent=2),
|
|
279
|
-
encoding="utf-8",
|
|
280
|
-
)
|
|
281
|
-
(step_dir / "stdout.txt").write_text(stdout, encoding="utf-8")
|
|
282
|
-
(step_dir / "stderr.txt").write_text(stderr, encoding="utf-8")
|
|
283
|
-
self._load_cost(trace_dir)
|
|
284
|
-
return {"returncode": returncode, "stdout": stdout, "stderr": stderr, "trace_dir": str(trace_dir)}
|
|
285
|
-
|
|
286
|
-
def _load_cost(self, trace_dir: Path) -> None:
|
|
287
|
-
summaries = sorted(trace_dir.rglob("summary.json"), key=lambda item: item.stat().st_mtime)
|
|
288
|
-
if not summaries:
|
|
289
|
-
return
|
|
290
|
-
try:
|
|
291
|
-
summary = json.loads(summaries[-1].read_text(encoding="utf-8"))
|
|
292
|
-
except Exception:
|
|
293
|
-
return
|
|
294
|
-
usage = summary.get("usage") if isinstance(summary, dict) else None
|
|
295
|
-
if not isinstance(usage, dict):
|
|
296
|
-
return
|
|
297
|
-
try:
|
|
298
|
-
self._cost_usd += float(usage.get("estimatedCostUsd") or 0.0)
|
|
299
|
-
except Exception:
|
|
300
|
-
return
|
|
301
|
-
|
|
302
|
-
def _resolve_workdir(self) -> Path:
|
|
303
|
-
if self.workdir:
|
|
304
|
-
return Path(self.workdir).expanduser()
|
|
305
|
-
context = getattr(self, "context", {}) or {}
|
|
306
|
-
for key in ("workdir", "working_dir", "workspace", "repo_path", "cwd"):
|
|
307
|
-
value = context.get(key)
|
|
308
|
-
if isinstance(value, str) and value.strip():
|
|
309
|
-
return Path(value).expanduser()
|
|
310
|
-
return Path.cwd()
|
|
311
|
-
|
|
312
|
-
def _action_from_payload(self, payload: ActionPayload) -> Action | None:
|
|
313
|
-
actions = list(getattr(self, "actions", []) or [])
|
|
314
|
-
action_docs = [_action_type_to_doc(action) for action in actions]
|
|
315
|
-
repair = repair_exgentic_action_payload(
|
|
316
|
-
payload,
|
|
317
|
-
action_docs,
|
|
318
|
-
argument_hints={
|
|
319
|
-
"latest_observation": self._latest_observation_data(),
|
|
320
|
-
"context": getattr(self, "context", {}) or {},
|
|
321
|
-
},
|
|
322
|
-
)
|
|
323
|
-
if repair.diagnostics.get("status") != "unchanged":
|
|
324
|
-
self._history.append({"role": "action_repair", "content": repair.diagnostics})
|
|
325
|
-
|
|
326
|
-
repaired_payload = repair.payload
|
|
327
|
-
action_type = _find_action_type(actions, repaired_payload.name)
|
|
328
|
-
if action_type is None:
|
|
329
|
-
return None
|
|
330
|
-
args = _normalize_arguments(action_type, repaired_payload.arguments, fallback_text=json_dumps(repaired_payload.arguments))
|
|
331
|
-
try:
|
|
332
|
-
return action_type.build_action(args)
|
|
333
|
-
except Exception as exc:
|
|
334
|
-
self._history.append(
|
|
335
|
-
{
|
|
336
|
-
"role": "action_repair",
|
|
337
|
-
"content": {
|
|
338
|
-
"status": "build_failed",
|
|
339
|
-
"action": repaired_payload.name,
|
|
340
|
-
"error": truncate(exc, limit=1200),
|
|
341
|
-
},
|
|
342
|
-
}
|
|
343
|
-
)
|
|
344
|
-
return None
|
|
345
|
-
|
|
346
|
-
def _fallback_action(self, text: str) -> Action | None:
|
|
347
|
-
actions = list(getattr(self, "actions", []) or [])
|
|
348
|
-
if not actions:
|
|
349
|
-
return None
|
|
350
|
-
action_docs = [_action_type_to_doc(action) for action in actions]
|
|
351
|
-
profile = _profile_for_exgentic(getattr(self, "task", ""), getattr(self, "context", {}) or {}, action_docs)
|
|
352
|
-
fallback = fallback_exgentic_action_payload(
|
|
353
|
-
action_docs,
|
|
354
|
-
task=getattr(self, "task", ""),
|
|
355
|
-
context=getattr(self, "context", {}) or {},
|
|
356
|
-
history=self._history,
|
|
357
|
-
profile=profile,
|
|
358
|
-
reason="no_valid_action_json",
|
|
359
|
-
)
|
|
360
|
-
if fallback is not None:
|
|
361
|
-
self._history.append({"role": "action_repair", "content": fallback.diagnostics})
|
|
362
|
-
action = self._action_from_payload(fallback.payload)
|
|
363
|
-
if action is not None:
|
|
364
|
-
return action
|
|
365
|
-
|
|
366
|
-
preferred = _first_matching_action(actions, lambda action: bool(getattr(action, "is_finish", False)))
|
|
367
|
-
if preferred is None:
|
|
368
|
-
preferred = _first_matching_action(actions, lambda action: bool(getattr(action, "is_message", False)))
|
|
369
|
-
if preferred is None:
|
|
370
|
-
preferred = _first_matching_action(actions, lambda action: action.name.lower() in {"finish", "final", "done"})
|
|
371
|
-
if preferred is None and len(actions) == 1:
|
|
372
|
-
preferred = actions[0]
|
|
373
|
-
if preferred is None:
|
|
374
|
-
return None
|
|
375
|
-
args = _normalize_arguments(preferred, {}, fallback_text=truncate(text, limit=20000))
|
|
376
|
-
try:
|
|
377
|
-
return preferred.build_action(args)
|
|
378
|
-
except Exception:
|
|
379
|
-
return None
|
|
380
|
-
|
|
381
|
-
def _latest_observation_data(self) -> Any:
|
|
382
|
-
for item in reversed(self._history):
|
|
383
|
-
if isinstance(item, dict) and item.get("role") == "observation":
|
|
384
|
-
return item.get("content")
|
|
385
|
-
return None
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
def _split_command(command: str) -> list[str]:
|
|
389
|
-
parts = shlex.split(command, posix=os.name != "nt")
|
|
390
|
-
return parts or ["cawdex"]
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
def _append_flag(args: list[str], flag: str, value: Any) -> None:
|
|
394
|
-
if value is None:
|
|
395
|
-
return
|
|
396
|
-
text = str(value).strip()
|
|
397
|
-
if not text:
|
|
398
|
-
return
|
|
399
|
-
args.extend([flag, text])
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
def _action_type_to_doc(action: ActionType) -> dict[str, Any]:
|
|
403
|
-
args_type = getattr(action, "arguments", None)
|
|
404
|
-
schema: Any = None
|
|
405
|
-
if args_type is not None:
|
|
406
|
-
try:
|
|
407
|
-
schema = args_type.model_json_schema()
|
|
408
|
-
except Exception:
|
|
409
|
-
schema = str(args_type)
|
|
410
|
-
return {
|
|
411
|
-
"name": action.name,
|
|
412
|
-
"description": getattr(action, "description", ""),
|
|
413
|
-
"is_finish": bool(getattr(action, "is_finish", False)),
|
|
414
|
-
"is_message": bool(getattr(action, "is_message", False)),
|
|
415
|
-
"arguments_schema": schema,
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
def _profile_for_exgentic(task: Any, context: Any, action_docs: list[dict[str, Any]]) -> str:
|
|
420
|
-
text = " ".join(
|
|
421
|
-
[
|
|
422
|
-
str(task or ""),
|
|
423
|
-
json.dumps(context or {}, ensure_ascii=False, default=str),
|
|
424
|
-
json.dumps(action_docs or [], ensure_ascii=False, default=str),
|
|
425
|
-
]
|
|
426
|
-
).lower()
|
|
427
|
-
if any(token in text for token in ("appworld", "app-world", "app world")):
|
|
428
|
-
return "appworld"
|
|
429
|
-
if any(token in text for token in ("browsecomp", "browsecomp+", "browse-comp", "deep research", "web research")):
|
|
430
|
-
return "browsecomp"
|
|
431
|
-
if any(token in text for token in ("tau2", "tau 2", "tau-bench", "tau_bench", "taubench", "customer support", "customer-service")):
|
|
432
|
-
return "tau2"
|
|
433
|
-
if any(token in text for token in ("terminalworld", "terminal-world", "tw_", "asciinema")) or (
|
|
434
|
-
"instruction.md" in text and "solve.sh" in text
|
|
435
|
-
):
|
|
436
|
-
return "terminalworld"
|
|
437
|
-
if any(token in text for token in ("swe-bench mobile", "xcode", "swift", "objective-c", "simulator", "figma")):
|
|
438
|
-
return "swe-bench-mobile"
|
|
439
|
-
if any(token in text for token in ("swe-webdevbench", "swe-webdev-bench", "webdevbench", "webdev-bench", "vibe coding", "virtual software agency", "canary requirement", "frontend-backend", "production readiness")):
|
|
440
|
-
return "webdevbench"
|
|
441
|
-
if any(token in text for token in ("swe-cycle", "swecycle", "swe cycle", "swe-judge", "swejudge", "fullcycle", "codeimpl", "testgen", "run_script", "parsing_script", "selected_test_files_to_run", "environment_setup_commit", "before_repo_set_cmd", "bare repository")):
|
|
442
|
-
return "swe-cycle"
|
|
443
|
-
if any(token in text for token in ("swe-ci", "sweci", "swe ci", "run_tests", "define_requirements", "modify_code", "test gap", "current_sha", "target_sha", "ci-loop", "continuous integration loop")):
|
|
444
|
-
return "swe-ci"
|
|
445
|
-
if any(token in text for token in ("swe-prbench", "swe prbench", "swe-pr", "prbench", "pull request review", "code review quality", "human_review_comments", "diff_patch", "type2_contextual")):
|
|
446
|
-
return "swe-prbench"
|
|
447
|
-
if any(token in text for token in ("tml-bench", "tmlbench", "tabular ml", "kaggle-style", "kaggle style", "sample_submission", "private holdout", "train.csv", "test.csv")):
|
|
448
|
-
return "tml-bench"
|
|
449
|
-
if any(token in text for token in ("pi-bench", "pibench", "proactive personal assistant", "proactive assistant", "hidden intent", "latent intent", "user profile", "message history", "current app", "proactivity score", "completion score")):
|
|
450
|
-
return "pi-bench"
|
|
451
|
-
if any(token in text for token in ("saasbench", "saas-bench", "enterprise saas", "tenant", "migration")):
|
|
452
|
-
return "saasbench"
|
|
453
|
-
if any(token in text for token in ("roadmapbench", "roadmap-bench", "long-horizon", "version upgrade")):
|
|
454
|
-
return "roadmapbench"
|
|
455
|
-
if any(token in text for token in ("arc-agi", "arc prize", "kaggle arc")):
|
|
456
|
-
return "arc-agi"
|
|
457
|
-
return "generic"
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
def _profile_guidance(profile: str) -> str:
|
|
461
|
-
if profile == "appworld":
|
|
462
|
-
return "AppWorld discipline: track app/API state from observations, preserve record IDs and permissions, and finish only after the requested state change is confirmed."
|
|
463
|
-
if profile == "browsecomp":
|
|
464
|
-
return "BrowseComp+ discipline: decompose the research question, prefer primary/high-authority sources, cross-check facts, and include auditable source attribution in finish/message arguments."
|
|
465
|
-
if profile == "tau2":
|
|
466
|
-
return "tau2 discipline: read policy/context first, take only policy-supported tool actions, and confirm observations before promising customer outcomes."
|
|
467
|
-
if profile == "terminalworld":
|
|
468
|
-
return "TerminalWorld discipline: extract required artifacts from instruction.md/task text, avoid solve.sh/reference material, execute real CLI steps, and verify persistent files/services before finishing."
|
|
469
|
-
if profile == "swe-bench-mobile":
|
|
470
|
-
return "Mobile discipline: respect PRD/design/platform constraints and prefer platform validation evidence when the harness exposes it."
|
|
471
|
-
if profile == "webdevbench":
|
|
472
|
-
return "WebDevBench discipline: preserve canary business requirements, verify frontend-backend coupling, and seek production/security evidence before completion."
|
|
473
|
-
if profile == "swe-cycle":
|
|
474
|
-
return "SWE-Cycle discipline: carry lifecycle phase, environment setup state, implementation requirements, generated/selected tests, and static/dynamic judge evidence through each action."
|
|
475
|
-
if profile == "swe-ci":
|
|
476
|
-
return "SWE-CI discipline: carry current/target commits, test gaps, inferred requirements, code changes, and CI-loop validation deltas through each action."
|
|
477
|
-
if profile == "swe-prbench":
|
|
478
|
-
return "SWE-PRBench discipline: review diff first, expand only to evidence-needed context, and produce severity-rated findings with file/line evidence rather than patching unless explicitly requested."
|
|
479
|
-
if profile == "tml-bench":
|
|
480
|
-
return "TML-Bench discipline: establish data contract, avoid hidden-label leakage, validate an honest baseline, and produce a schema-valid submission artifact."
|
|
481
|
-
if profile == "pi-bench":
|
|
482
|
-
return "Pi-Bench discipline: build the user/workspace/app context contract, infer hidden intent with evidence, ask only necessary clarifying questions, and verify state after proactive actions."
|
|
483
|
-
if profile == "saasbench":
|
|
484
|
-
return "SaaS discipline: preserve tenant, auth, migration, and cross-component workflow integrity."
|
|
485
|
-
if profile == "roadmapbench":
|
|
486
|
-
return "Roadmap discipline: keep milestones explicit and avoid claiming completion while roadmap items remain unverified."
|
|
487
|
-
if profile == "arc-agi":
|
|
488
|
-
return "ARC discipline: infer environment dynamics with small experiments and avoid hardcoding hidden answers."
|
|
489
|
-
return "Generic discipline: use the available actions exactly, observe after state-changing actions, and finish only with benchmark-visible evidence."
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
def _find_action_type(actions: list[ActionType], name: str) -> ActionType | None:
|
|
493
|
-
for action in actions:
|
|
494
|
-
if action.name == name:
|
|
495
|
-
return action
|
|
496
|
-
lowered = name.lower()
|
|
497
|
-
for action in actions:
|
|
498
|
-
if action.name.lower() == lowered:
|
|
499
|
-
return action
|
|
500
|
-
return None
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
def _first_matching_action(actions: list[ActionType], predicate: Any) -> ActionType | None:
|
|
504
|
-
for action in actions:
|
|
505
|
-
if predicate(action):
|
|
506
|
-
return action
|
|
507
|
-
return None
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
def _normalize_arguments(action: ActionType, provided: dict[str, Any], fallback_text: str) -> dict[str, Any]:
|
|
511
|
-
args = dict(provided or {})
|
|
512
|
-
fields = _argument_fields(action)
|
|
513
|
-
if not fields:
|
|
514
|
-
return args
|
|
515
|
-
if any(key in args for key in fields):
|
|
516
|
-
return args
|
|
517
|
-
|
|
518
|
-
for key in ("answer", "final_answer", "response", "content", "message", "text", "result", "output"):
|
|
519
|
-
if key in fields:
|
|
520
|
-
args[key] = fallback_text
|
|
521
|
-
return args
|
|
522
|
-
|
|
523
|
-
for key, field in fields.items():
|
|
524
|
-
if _field_required(field):
|
|
525
|
-
args[key] = _fallback_value_for_field(field, fallback_text)
|
|
526
|
-
return args
|
|
527
|
-
return args
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
def _argument_fields(action: ActionType) -> dict[str, Any]:
|
|
531
|
-
args_type = getattr(action, "arguments", None)
|
|
532
|
-
return dict(getattr(args_type, "model_fields", {}) or getattr(args_type, "__fields__", {}) or {})
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
def _field_required(field: Any) -> bool:
|
|
536
|
-
method = getattr(field, "is_required", None)
|
|
537
|
-
if callable(method):
|
|
538
|
-
return bool(method())
|
|
539
|
-
return bool(getattr(field, "required", False))
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
def _fallback_value_for_field(field: Any, text: str) -> Any:
|
|
543
|
-
annotation = getattr(field, "annotation", None) or getattr(field, "type_", None)
|
|
544
|
-
if annotation is bool:
|
|
545
|
-
return False
|
|
546
|
-
if annotation is int:
|
|
547
|
-
return 0
|
|
548
|
-
if annotation is float:
|
|
549
|
-
return 0.0
|
|
550
|
-
if annotation is list:
|
|
551
|
-
return []
|
|
552
|
-
if annotation is dict:
|
|
553
|
-
return {}
|
|
554
|
-
return text
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
def _observation_is_empty(observation: Observation) -> bool:
|
|
558
|
-
try:
|
|
559
|
-
return bool(observation.is_empty())
|
|
560
|
-
except Exception:
|
|
561
|
-
return False
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
def _observation_to_data(observation: Observation) -> Any:
|
|
565
|
-
try:
|
|
566
|
-
items = observation.to_observation_list()
|
|
567
|
-
except Exception:
|
|
568
|
-
return str(observation)
|
|
569
|
-
data: list[Any] = []
|
|
570
|
-
for item in items:
|
|
571
|
-
result = getattr(item, "result", item)
|
|
572
|
-
data.append(result)
|
|
573
|
-
return data
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
def _single_action_to_data(action: Action) -> Any:
|
|
577
|
-
try:
|
|
578
|
-
values = []
|
|
579
|
-
for item in action.to_action_list():
|
|
580
|
-
args = getattr(item, "arguments", {})
|
|
581
|
-
if hasattr(args, "model_dump"):
|
|
582
|
-
args = args.model_dump()
|
|
583
|
-
values.append({"name": getattr(item, "name", ""), "arguments": args, "id": getattr(item, "id", "")})
|
|
584
|
-
return values
|
|
585
|
-
except Exception:
|
|
586
|
-
return {"id": safe_id(str(action)), "text": str(action)}
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
class CawdexAgentInstance(CawdexAgentInstance):
|
|
590
|
-
"""Preferred Exgentic runtime class name for Cawdex."""
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
class CawdexAgent(CawdexAgent):
|
|
594
|
-
"""Preferred Exgentic host class name for Cawdex."""
|
|
595
|
-
|
|
596
|
-
display_name: ClassVar[str] = "Cawdex"
|
|
597
|
-
slug_name: ClassVar[str] = "cawdex_agent"
|
|
598
|
-
|
|
599
|
-
@classmethod
|
|
600
|
-
def _get_instance_class(cls):
|
|
601
|
-
return CawdexAgentInstance
|
|
602
|
-
|
|
603
|
-
@classmethod
|
|
604
|
-
def _get_instance_class_ref(cls) -> str:
|
|
605
|
-
return f"{cls.__module__}:CawdexAgentInstance"
|
|
1
|
+
"""Exgentic/Open Agent Leaderboard adapter for Cawdex."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import shlex
|
|
8
|
+
import subprocess
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, ClassVar
|
|
11
|
+
|
|
12
|
+
from pydantic import Field
|
|
13
|
+
|
|
14
|
+
from exgentic.core.agent import Agent
|
|
15
|
+
from exgentic.core.agent_instance import AgentInstance
|
|
16
|
+
from exgentic.core.types import Action, ActionType, Observation
|
|
17
|
+
from exgentic.utils.cost import UpdatableCostReport
|
|
18
|
+
|
|
19
|
+
from .utils import (
|
|
20
|
+
ActionPayload,
|
|
21
|
+
extract_action_payload,
|
|
22
|
+
fallback_exgentic_action_payload,
|
|
23
|
+
fold_exgentic_history,
|
|
24
|
+
json_dumps,
|
|
25
|
+
repair_exgentic_action_payload,
|
|
26
|
+
redact,
|
|
27
|
+
safe_id,
|
|
28
|
+
shortlist_exgentic_actions,
|
|
29
|
+
truncate,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class CawdexAgent(Agent):
|
|
34
|
+
"""Host-side Exgentic config for Cawdex."""
|
|
35
|
+
|
|
36
|
+
display_name: ClassVar[str] = "Cawdex"
|
|
37
|
+
slug_name: ClassVar[str] = "cawdex_agent"
|
|
38
|
+
|
|
39
|
+
model: str = "openrouter/free"
|
|
40
|
+
provider: str | None = None
|
|
41
|
+
command: str = Field(default_factory=lambda: os.environ.get("CAWDEX_EXGENTIC_COMMAND") or os.environ.get("CAWDEX_EXGENTIC_COMMAND", "cawdex"))
|
|
42
|
+
permission: str = "yolo"
|
|
43
|
+
max_steps: int = 50
|
|
44
|
+
max_turns: int | None = None
|
|
45
|
+
max_tokens: int | None = None
|
|
46
|
+
context_window_tokens: int | None = None
|
|
47
|
+
temperature: float | None = None
|
|
48
|
+
output_format: str = "text"
|
|
49
|
+
timeout_sec: int = 1800
|
|
50
|
+
memory: bool = False
|
|
51
|
+
workdir: str | None = None
|
|
52
|
+
cawdex_home: str | None = None
|
|
53
|
+
extra_args: list[str] = Field(default_factory=list)
|
|
54
|
+
extra_env: dict[str, str] = Field(default_factory=dict)
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def _get_instance_class(cls):
|
|
58
|
+
return CawdexAgentInstance
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def _get_instance_class_ref(cls) -> str:
|
|
62
|
+
return f"{cls.__module__}:CawdexAgentInstance"
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def model_name(self) -> str: # type: ignore[override]
|
|
66
|
+
return self.model
|
|
67
|
+
|
|
68
|
+
def get_models_names(self) -> list[str]: # type: ignore[override]
|
|
69
|
+
return [self.model]
|
|
70
|
+
|
|
71
|
+
def _get_instance_kwargs(self, session_id: str) -> dict[str, Any]:
|
|
72
|
+
return {
|
|
73
|
+
"session_id": session_id,
|
|
74
|
+
"model": self.model,
|
|
75
|
+
"provider": self.provider,
|
|
76
|
+
"command": self.command,
|
|
77
|
+
"permission": self.permission,
|
|
78
|
+
"max_steps": self.max_steps,
|
|
79
|
+
"max_turns": self.max_turns,
|
|
80
|
+
"max_tokens": self.max_tokens,
|
|
81
|
+
"context_window_tokens": self.context_window_tokens,
|
|
82
|
+
"temperature": self.temperature,
|
|
83
|
+
"output_format": self.output_format,
|
|
84
|
+
"timeout_sec": self.timeout_sec,
|
|
85
|
+
"memory": self.memory,
|
|
86
|
+
"workdir": self.workdir,
|
|
87
|
+
"cawdex_home": self.cawdex_home,
|
|
88
|
+
"extra_args": self.extra_args,
|
|
89
|
+
"extra_env": self.extra_env,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class CawdexAgentInstance(AgentInstance):
|
|
94
|
+
"""Per-session Exgentic runtime that asks Cawdex for the next action."""
|
|
95
|
+
|
|
96
|
+
def __init__(
|
|
97
|
+
self,
|
|
98
|
+
session_id: str,
|
|
99
|
+
model: str = "openrouter/free",
|
|
100
|
+
provider: str | None = None,
|
|
101
|
+
command: str = "cawdex",
|
|
102
|
+
permission: str = "yolo",
|
|
103
|
+
max_steps: int = 50,
|
|
104
|
+
max_turns: int | None = None,
|
|
105
|
+
max_tokens: int | None = None,
|
|
106
|
+
context_window_tokens: int | None = None,
|
|
107
|
+
temperature: float | None = None,
|
|
108
|
+
output_format: str = "text",
|
|
109
|
+
timeout_sec: int = 1800,
|
|
110
|
+
memory: bool = False,
|
|
111
|
+
workdir: str | None = None,
|
|
112
|
+
cawdex_home: str | None = None,
|
|
113
|
+
extra_args: list[str] | None = None,
|
|
114
|
+
extra_env: dict[str, str] | None = None,
|
|
115
|
+
) -> None:
|
|
116
|
+
super().__init__(session_id=session_id)
|
|
117
|
+
self.model = model
|
|
118
|
+
self.provider = provider
|
|
119
|
+
self.command = command
|
|
120
|
+
self.permission = permission
|
|
121
|
+
self.max_steps = max_steps
|
|
122
|
+
self.max_turns = max_turns
|
|
123
|
+
self.max_tokens = max_tokens
|
|
124
|
+
self.context_window_tokens = context_window_tokens
|
|
125
|
+
self.temperature = temperature
|
|
126
|
+
self.output_format = output_format
|
|
127
|
+
self.timeout_sec = timeout_sec
|
|
128
|
+
self.memory = memory
|
|
129
|
+
self.workdir = workdir
|
|
130
|
+
self.cawdex_home = cawdex_home
|
|
131
|
+
self.extra_args = list(extra_args or [])
|
|
132
|
+
self.extra_env = dict(extra_env or {})
|
|
133
|
+
self._step = 0
|
|
134
|
+
self._history: list[dict[str, Any]] = []
|
|
135
|
+
self._cost_usd = 0.0
|
|
136
|
+
|
|
137
|
+
def react(self, observation: Observation | None) -> Action | None:
|
|
138
|
+
if self._step >= int(self.max_steps or 0):
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
if observation is not None and not _observation_is_empty(observation):
|
|
142
|
+
self._history.append({"role": "observation", "content": _observation_to_data(observation)})
|
|
143
|
+
|
|
144
|
+
self._step += 1
|
|
145
|
+
prompt = self._build_prompt()
|
|
146
|
+
run = self._run_cawdex(prompt)
|
|
147
|
+
self._history.append(
|
|
148
|
+
{
|
|
149
|
+
"role": "cawdex",
|
|
150
|
+
"returncode": run["returncode"],
|
|
151
|
+
"stdout": truncate(run["stdout"], limit=16000),
|
|
152
|
+
"stderr": truncate(run["stderr"], limit=8000),
|
|
153
|
+
}
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
combined = "\n".join(part for part in [run["stdout"], run["stderr"]] if part)
|
|
157
|
+
payload = extract_action_payload(combined)
|
|
158
|
+
action = self._action_from_payload(payload) if payload is not None else None
|
|
159
|
+
if action is not None:
|
|
160
|
+
self._history.append({"role": "selected_action", "content": _single_action_to_data(action)})
|
|
161
|
+
return action
|
|
162
|
+
|
|
163
|
+
fallback = self._fallback_action(combined or "Cawdex produced no output")
|
|
164
|
+
if fallback is not None:
|
|
165
|
+
self._history.append({"role": "selected_action", "content": _single_action_to_data(fallback)})
|
|
166
|
+
return fallback
|
|
167
|
+
|
|
168
|
+
def get_cost(self) -> UpdatableCostReport:
|
|
169
|
+
report = UpdatableCostReport.initialize_empty(model_name=self.model)
|
|
170
|
+
if self._cost_usd:
|
|
171
|
+
report.add_cost(self._cost_usd)
|
|
172
|
+
return report
|
|
173
|
+
|
|
174
|
+
def close(self) -> None:
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
def _build_prompt(self) -> str:
|
|
178
|
+
action_docs = [_action_type_to_doc(action) for action in getattr(self, "actions", [])]
|
|
179
|
+
context = getattr(self, "context", {}) or {}
|
|
180
|
+
task = getattr(self, "task", "")
|
|
181
|
+
profile = _profile_for_exgentic(task, context, action_docs)
|
|
182
|
+
action_names = [str(doc.get("name", "")) for doc in action_docs if doc.get("name")]
|
|
183
|
+
action_shortlist = shortlist_exgentic_actions(
|
|
184
|
+
action_docs,
|
|
185
|
+
task=task,
|
|
186
|
+
context=context,
|
|
187
|
+
history=self._history,
|
|
188
|
+
profile=profile,
|
|
189
|
+
)
|
|
190
|
+
lines = [
|
|
191
|
+
f"/benchmark {profile} Exgentic task",
|
|
192
|
+
"",
|
|
193
|
+
"You are running inside Exgentic/Open Agent Leaderboard.",
|
|
194
|
+
"Work from the current task, context, latest observation, and the available action schemas.",
|
|
195
|
+
"Choose exactly one available action. Do not invent action names.",
|
|
196
|
+
"Prefer the recommended action shortlist when it matches the latest observation; use the full schemas only when the current state clearly requires another available action.",
|
|
197
|
+
"For shortlisted actions, include every required_argument_key; when available_required_hints lists an exact value from latest observation or context, copy that value into the matching argument.",
|
|
198
|
+
"The benchmark may count malformed JSON, unknown action names, or schema-mismatched arguments as invalid actions.",
|
|
199
|
+
"End your response with one JSON object on its own line using this exact shape:",
|
|
200
|
+
'{"name":"<action name>","arguments":{}}',
|
|
201
|
+
"",
|
|
202
|
+
"If the benchmark exposes environment actions, return the next action to execute.",
|
|
203
|
+
"If the task is complete, use a finish/message action when one is available.",
|
|
204
|
+
_profile_guidance(profile),
|
|
205
|
+
"",
|
|
206
|
+
"## Task",
|
|
207
|
+
truncate(task),
|
|
208
|
+
"",
|
|
209
|
+
"## Context",
|
|
210
|
+
json_dumps(context),
|
|
211
|
+
"",
|
|
212
|
+
"## Recommended action shortlist",
|
|
213
|
+
json_dumps(action_shortlist),
|
|
214
|
+
"",
|
|
215
|
+
"## Available action names",
|
|
216
|
+
json_dumps(action_names),
|
|
217
|
+
"",
|
|
218
|
+
"## Available actions",
|
|
219
|
+
json_dumps(action_docs),
|
|
220
|
+
]
|
|
221
|
+
if self._history:
|
|
222
|
+
lines.extend(["", "## Folded session state", json_dumps(fold_exgentic_history(self._history, profile=profile), limit=24000)])
|
|
223
|
+
return "\n".join(lines)
|
|
224
|
+
|
|
225
|
+
def _run_cawdex(self, prompt: str) -> dict[str, Any]:
|
|
226
|
+
step_dir = self.paths.agent_dir / "cawdex" / f"step-{self._step:03d}"
|
|
227
|
+
trace_dir = step_dir / "trace"
|
|
228
|
+
step_dir.mkdir(parents=True, exist_ok=True)
|
|
229
|
+
trace_dir.mkdir(parents=True, exist_ok=True)
|
|
230
|
+
prompt_path = step_dir / "prompt.txt"
|
|
231
|
+
prompt_path.write_text(prompt, encoding="utf-8")
|
|
232
|
+
|
|
233
|
+
args = _split_command(self.command)
|
|
234
|
+
args.extend(["--prompt-file", str(prompt_path), "--perm", self.permission, "--benchmark-trace-dir", str(trace_dir)])
|
|
235
|
+
_append_flag(args, "--model", self.model)
|
|
236
|
+
_append_flag(args, "--provider", self.provider)
|
|
237
|
+
_append_flag(args, "--max-turns", self.max_turns)
|
|
238
|
+
_append_flag(args, "--max-tokens", self.max_tokens)
|
|
239
|
+
_append_flag(args, "--context-window-tokens", self.context_window_tokens)
|
|
240
|
+
_append_flag(args, "--temperature", self.temperature)
|
|
241
|
+
_append_flag(args, "--output-format", self.output_format)
|
|
242
|
+
args.extend(self.extra_args)
|
|
243
|
+
|
|
244
|
+
env = os.environ.copy()
|
|
245
|
+
env.update({str(key): str(value) for key, value in self.extra_env.items()})
|
|
246
|
+
env.setdefault("CAWDEX_ENV_CONFIG", "1")
|
|
247
|
+
env.setdefault("CAWDEX_THEME", "minimal")
|
|
248
|
+
env.setdefault("CAWDEX_SHOW_THINKING", "0")
|
|
249
|
+
env.setdefault("CAWDEX_BASH_TIMEOUT_MS", "300000")
|
|
250
|
+
env["CAWDEX_MEMORY"] = "1" if self.memory else "0"
|
|
251
|
+
if self.cawdex_home:
|
|
252
|
+
env["CAWDEX_HOME"] = self.cawdex_home
|
|
253
|
+
|
|
254
|
+
cwd = self._resolve_workdir()
|
|
255
|
+
try:
|
|
256
|
+
completed = subprocess.run(
|
|
257
|
+
args,
|
|
258
|
+
cwd=str(cwd),
|
|
259
|
+
env=env,
|
|
260
|
+
text=True,
|
|
261
|
+
capture_output=True,
|
|
262
|
+
timeout=self.timeout_sec,
|
|
263
|
+
check=False,
|
|
264
|
+
)
|
|
265
|
+
stdout = redact(completed.stdout)
|
|
266
|
+
stderr = redact(completed.stderr)
|
|
267
|
+
returncode = completed.returncode
|
|
268
|
+
except subprocess.TimeoutExpired as exc:
|
|
269
|
+
stdout = redact(exc.stdout)
|
|
270
|
+
stderr = redact(exc.stderr) + f"\ncawdex timed out after {self.timeout_sec}s"
|
|
271
|
+
returncode = 124
|
|
272
|
+
except Exception as exc:
|
|
273
|
+
stdout = ""
|
|
274
|
+
stderr = f"cawdex launch failed: {redact(exc)}"
|
|
275
|
+
returncode = 127
|
|
276
|
+
|
|
277
|
+
(step_dir / "argv.json").write_text(
|
|
278
|
+
json.dumps([redact(arg) for arg in args], ensure_ascii=False, indent=2),
|
|
279
|
+
encoding="utf-8",
|
|
280
|
+
)
|
|
281
|
+
(step_dir / "stdout.txt").write_text(stdout, encoding="utf-8")
|
|
282
|
+
(step_dir / "stderr.txt").write_text(stderr, encoding="utf-8")
|
|
283
|
+
self._load_cost(trace_dir)
|
|
284
|
+
return {"returncode": returncode, "stdout": stdout, "stderr": stderr, "trace_dir": str(trace_dir)}
|
|
285
|
+
|
|
286
|
+
def _load_cost(self, trace_dir: Path) -> None:
|
|
287
|
+
summaries = sorted(trace_dir.rglob("summary.json"), key=lambda item: item.stat().st_mtime)
|
|
288
|
+
if not summaries:
|
|
289
|
+
return
|
|
290
|
+
try:
|
|
291
|
+
summary = json.loads(summaries[-1].read_text(encoding="utf-8"))
|
|
292
|
+
except Exception:
|
|
293
|
+
return
|
|
294
|
+
usage = summary.get("usage") if isinstance(summary, dict) else None
|
|
295
|
+
if not isinstance(usage, dict):
|
|
296
|
+
return
|
|
297
|
+
try:
|
|
298
|
+
self._cost_usd += float(usage.get("estimatedCostUsd") or 0.0)
|
|
299
|
+
except Exception:
|
|
300
|
+
return
|
|
301
|
+
|
|
302
|
+
def _resolve_workdir(self) -> Path:
|
|
303
|
+
if self.workdir:
|
|
304
|
+
return Path(self.workdir).expanduser()
|
|
305
|
+
context = getattr(self, "context", {}) or {}
|
|
306
|
+
for key in ("workdir", "working_dir", "workspace", "repo_path", "cwd"):
|
|
307
|
+
value = context.get(key)
|
|
308
|
+
if isinstance(value, str) and value.strip():
|
|
309
|
+
return Path(value).expanduser()
|
|
310
|
+
return Path.cwd()
|
|
311
|
+
|
|
312
|
+
def _action_from_payload(self, payload: ActionPayload) -> Action | None:
|
|
313
|
+
actions = list(getattr(self, "actions", []) or [])
|
|
314
|
+
action_docs = [_action_type_to_doc(action) for action in actions]
|
|
315
|
+
repair = repair_exgentic_action_payload(
|
|
316
|
+
payload,
|
|
317
|
+
action_docs,
|
|
318
|
+
argument_hints={
|
|
319
|
+
"latest_observation": self._latest_observation_data(),
|
|
320
|
+
"context": getattr(self, "context", {}) or {},
|
|
321
|
+
},
|
|
322
|
+
)
|
|
323
|
+
if repair.diagnostics.get("status") != "unchanged":
|
|
324
|
+
self._history.append({"role": "action_repair", "content": repair.diagnostics})
|
|
325
|
+
|
|
326
|
+
repaired_payload = repair.payload
|
|
327
|
+
action_type = _find_action_type(actions, repaired_payload.name)
|
|
328
|
+
if action_type is None:
|
|
329
|
+
return None
|
|
330
|
+
args = _normalize_arguments(action_type, repaired_payload.arguments, fallback_text=json_dumps(repaired_payload.arguments))
|
|
331
|
+
try:
|
|
332
|
+
return action_type.build_action(args)
|
|
333
|
+
except Exception as exc:
|
|
334
|
+
self._history.append(
|
|
335
|
+
{
|
|
336
|
+
"role": "action_repair",
|
|
337
|
+
"content": {
|
|
338
|
+
"status": "build_failed",
|
|
339
|
+
"action": repaired_payload.name,
|
|
340
|
+
"error": truncate(exc, limit=1200),
|
|
341
|
+
},
|
|
342
|
+
}
|
|
343
|
+
)
|
|
344
|
+
return None
|
|
345
|
+
|
|
346
|
+
def _fallback_action(self, text: str) -> Action | None:
|
|
347
|
+
actions = list(getattr(self, "actions", []) or [])
|
|
348
|
+
if not actions:
|
|
349
|
+
return None
|
|
350
|
+
action_docs = [_action_type_to_doc(action) for action in actions]
|
|
351
|
+
profile = _profile_for_exgentic(getattr(self, "task", ""), getattr(self, "context", {}) or {}, action_docs)
|
|
352
|
+
fallback = fallback_exgentic_action_payload(
|
|
353
|
+
action_docs,
|
|
354
|
+
task=getattr(self, "task", ""),
|
|
355
|
+
context=getattr(self, "context", {}) or {},
|
|
356
|
+
history=self._history,
|
|
357
|
+
profile=profile,
|
|
358
|
+
reason="no_valid_action_json",
|
|
359
|
+
)
|
|
360
|
+
if fallback is not None:
|
|
361
|
+
self._history.append({"role": "action_repair", "content": fallback.diagnostics})
|
|
362
|
+
action = self._action_from_payload(fallback.payload)
|
|
363
|
+
if action is not None:
|
|
364
|
+
return action
|
|
365
|
+
|
|
366
|
+
preferred = _first_matching_action(actions, lambda action: bool(getattr(action, "is_finish", False)))
|
|
367
|
+
if preferred is None:
|
|
368
|
+
preferred = _first_matching_action(actions, lambda action: bool(getattr(action, "is_message", False)))
|
|
369
|
+
if preferred is None:
|
|
370
|
+
preferred = _first_matching_action(actions, lambda action: action.name.lower() in {"finish", "final", "done"})
|
|
371
|
+
if preferred is None and len(actions) == 1:
|
|
372
|
+
preferred = actions[0]
|
|
373
|
+
if preferred is None:
|
|
374
|
+
return None
|
|
375
|
+
args = _normalize_arguments(preferred, {}, fallback_text=truncate(text, limit=20000))
|
|
376
|
+
try:
|
|
377
|
+
return preferred.build_action(args)
|
|
378
|
+
except Exception:
|
|
379
|
+
return None
|
|
380
|
+
|
|
381
|
+
def _latest_observation_data(self) -> Any:
|
|
382
|
+
for item in reversed(self._history):
|
|
383
|
+
if isinstance(item, dict) and item.get("role") == "observation":
|
|
384
|
+
return item.get("content")
|
|
385
|
+
return None
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def _split_command(command: str) -> list[str]:
|
|
389
|
+
parts = shlex.split(command, posix=os.name != "nt")
|
|
390
|
+
return parts or ["cawdex"]
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def _append_flag(args: list[str], flag: str, value: Any) -> None:
|
|
394
|
+
if value is None:
|
|
395
|
+
return
|
|
396
|
+
text = str(value).strip()
|
|
397
|
+
if not text:
|
|
398
|
+
return
|
|
399
|
+
args.extend([flag, text])
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def _action_type_to_doc(action: ActionType) -> dict[str, Any]:
|
|
403
|
+
args_type = getattr(action, "arguments", None)
|
|
404
|
+
schema: Any = None
|
|
405
|
+
if args_type is not None:
|
|
406
|
+
try:
|
|
407
|
+
schema = args_type.model_json_schema()
|
|
408
|
+
except Exception:
|
|
409
|
+
schema = str(args_type)
|
|
410
|
+
return {
|
|
411
|
+
"name": action.name,
|
|
412
|
+
"description": getattr(action, "description", ""),
|
|
413
|
+
"is_finish": bool(getattr(action, "is_finish", False)),
|
|
414
|
+
"is_message": bool(getattr(action, "is_message", False)),
|
|
415
|
+
"arguments_schema": schema,
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def _profile_for_exgentic(task: Any, context: Any, action_docs: list[dict[str, Any]]) -> str:
|
|
420
|
+
text = " ".join(
|
|
421
|
+
[
|
|
422
|
+
str(task or ""),
|
|
423
|
+
json.dumps(context or {}, ensure_ascii=False, default=str),
|
|
424
|
+
json.dumps(action_docs or [], ensure_ascii=False, default=str),
|
|
425
|
+
]
|
|
426
|
+
).lower()
|
|
427
|
+
if any(token in text for token in ("appworld", "app-world", "app world")):
|
|
428
|
+
return "appworld"
|
|
429
|
+
if any(token in text for token in ("browsecomp", "browsecomp+", "browse-comp", "deep research", "web research")):
|
|
430
|
+
return "browsecomp"
|
|
431
|
+
if any(token in text for token in ("tau2", "tau 2", "tau-bench", "tau_bench", "taubench", "customer support", "customer-service")):
|
|
432
|
+
return "tau2"
|
|
433
|
+
if any(token in text for token in ("terminalworld", "terminal-world", "tw_", "asciinema")) or (
|
|
434
|
+
"instruction.md" in text and "solve.sh" in text
|
|
435
|
+
):
|
|
436
|
+
return "terminalworld"
|
|
437
|
+
if any(token in text for token in ("swe-bench mobile", "xcode", "swift", "objective-c", "simulator", "figma")):
|
|
438
|
+
return "swe-bench-mobile"
|
|
439
|
+
if any(token in text for token in ("swe-webdevbench", "swe-webdev-bench", "webdevbench", "webdev-bench", "vibe coding", "virtual software agency", "canary requirement", "frontend-backend", "production readiness")):
|
|
440
|
+
return "webdevbench"
|
|
441
|
+
if any(token in text for token in ("swe-cycle", "swecycle", "swe cycle", "swe-judge", "swejudge", "fullcycle", "codeimpl", "testgen", "run_script", "parsing_script", "selected_test_files_to_run", "environment_setup_commit", "before_repo_set_cmd", "bare repository")):
|
|
442
|
+
return "swe-cycle"
|
|
443
|
+
if any(token in text for token in ("swe-ci", "sweci", "swe ci", "run_tests", "define_requirements", "modify_code", "test gap", "current_sha", "target_sha", "ci-loop", "continuous integration loop")):
|
|
444
|
+
return "swe-ci"
|
|
445
|
+
if any(token in text for token in ("swe-prbench", "swe prbench", "swe-pr", "prbench", "pull request review", "code review quality", "human_review_comments", "diff_patch", "type2_contextual")):
|
|
446
|
+
return "swe-prbench"
|
|
447
|
+
if any(token in text for token in ("tml-bench", "tmlbench", "tabular ml", "kaggle-style", "kaggle style", "sample_submission", "private holdout", "train.csv", "test.csv")):
|
|
448
|
+
return "tml-bench"
|
|
449
|
+
if any(token in text for token in ("pi-bench", "pibench", "proactive personal assistant", "proactive assistant", "hidden intent", "latent intent", "user profile", "message history", "current app", "proactivity score", "completion score")):
|
|
450
|
+
return "pi-bench"
|
|
451
|
+
if any(token in text for token in ("saasbench", "saas-bench", "enterprise saas", "tenant", "migration")):
|
|
452
|
+
return "saasbench"
|
|
453
|
+
if any(token in text for token in ("roadmapbench", "roadmap-bench", "long-horizon", "version upgrade")):
|
|
454
|
+
return "roadmapbench"
|
|
455
|
+
if any(token in text for token in ("arc-agi", "arc prize", "kaggle arc")):
|
|
456
|
+
return "arc-agi"
|
|
457
|
+
return "generic"
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def _profile_guidance(profile: str) -> str:
|
|
461
|
+
if profile == "appworld":
|
|
462
|
+
return "AppWorld discipline: track app/API state from observations, preserve record IDs and permissions, and finish only after the requested state change is confirmed."
|
|
463
|
+
if profile == "browsecomp":
|
|
464
|
+
return "BrowseComp+ discipline: decompose the research question, prefer primary/high-authority sources, cross-check facts, and include auditable source attribution in finish/message arguments."
|
|
465
|
+
if profile == "tau2":
|
|
466
|
+
return "tau2 discipline: read policy/context first, take only policy-supported tool actions, and confirm observations before promising customer outcomes."
|
|
467
|
+
if profile == "terminalworld":
|
|
468
|
+
return "TerminalWorld discipline: extract required artifacts from instruction.md/task text, avoid solve.sh/reference material, execute real CLI steps, and verify persistent files/services before finishing."
|
|
469
|
+
if profile == "swe-bench-mobile":
|
|
470
|
+
return "Mobile discipline: respect PRD/design/platform constraints and prefer platform validation evidence when the harness exposes it."
|
|
471
|
+
if profile == "webdevbench":
|
|
472
|
+
return "WebDevBench discipline: preserve canary business requirements, verify frontend-backend coupling, and seek production/security evidence before completion."
|
|
473
|
+
if profile == "swe-cycle":
|
|
474
|
+
return "SWE-Cycle discipline: carry lifecycle phase, environment setup state, implementation requirements, generated/selected tests, and static/dynamic judge evidence through each action."
|
|
475
|
+
if profile == "swe-ci":
|
|
476
|
+
return "SWE-CI discipline: carry current/target commits, test gaps, inferred requirements, code changes, and CI-loop validation deltas through each action."
|
|
477
|
+
if profile == "swe-prbench":
|
|
478
|
+
return "SWE-PRBench discipline: review diff first, expand only to evidence-needed context, and produce severity-rated findings with file/line evidence rather than patching unless explicitly requested."
|
|
479
|
+
if profile == "tml-bench":
|
|
480
|
+
return "TML-Bench discipline: establish data contract, avoid hidden-label leakage, validate an honest baseline, and produce a schema-valid submission artifact."
|
|
481
|
+
if profile == "pi-bench":
|
|
482
|
+
return "Pi-Bench discipline: build the user/workspace/app context contract, infer hidden intent with evidence, ask only necessary clarifying questions, and verify state after proactive actions."
|
|
483
|
+
if profile == "saasbench":
|
|
484
|
+
return "SaaS discipline: preserve tenant, auth, migration, and cross-component workflow integrity."
|
|
485
|
+
if profile == "roadmapbench":
|
|
486
|
+
return "Roadmap discipline: keep milestones explicit and avoid claiming completion while roadmap items remain unverified."
|
|
487
|
+
if profile == "arc-agi":
|
|
488
|
+
return "ARC discipline: infer environment dynamics with small experiments and avoid hardcoding hidden answers."
|
|
489
|
+
return "Generic discipline: use the available actions exactly, observe after state-changing actions, and finish only with benchmark-visible evidence."
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def _find_action_type(actions: list[ActionType], name: str) -> ActionType | None:
|
|
493
|
+
for action in actions:
|
|
494
|
+
if action.name == name:
|
|
495
|
+
return action
|
|
496
|
+
lowered = name.lower()
|
|
497
|
+
for action in actions:
|
|
498
|
+
if action.name.lower() == lowered:
|
|
499
|
+
return action
|
|
500
|
+
return None
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def _first_matching_action(actions: list[ActionType], predicate: Any) -> ActionType | None:
|
|
504
|
+
for action in actions:
|
|
505
|
+
if predicate(action):
|
|
506
|
+
return action
|
|
507
|
+
return None
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def _normalize_arguments(action: ActionType, provided: dict[str, Any], fallback_text: str) -> dict[str, Any]:
|
|
511
|
+
args = dict(provided or {})
|
|
512
|
+
fields = _argument_fields(action)
|
|
513
|
+
if not fields:
|
|
514
|
+
return args
|
|
515
|
+
if any(key in args for key in fields):
|
|
516
|
+
return args
|
|
517
|
+
|
|
518
|
+
for key in ("answer", "final_answer", "response", "content", "message", "text", "result", "output"):
|
|
519
|
+
if key in fields:
|
|
520
|
+
args[key] = fallback_text
|
|
521
|
+
return args
|
|
522
|
+
|
|
523
|
+
for key, field in fields.items():
|
|
524
|
+
if _field_required(field):
|
|
525
|
+
args[key] = _fallback_value_for_field(field, fallback_text)
|
|
526
|
+
return args
|
|
527
|
+
return args
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def _argument_fields(action: ActionType) -> dict[str, Any]:
|
|
531
|
+
args_type = getattr(action, "arguments", None)
|
|
532
|
+
return dict(getattr(args_type, "model_fields", {}) or getattr(args_type, "__fields__", {}) or {})
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def _field_required(field: Any) -> bool:
|
|
536
|
+
method = getattr(field, "is_required", None)
|
|
537
|
+
if callable(method):
|
|
538
|
+
return bool(method())
|
|
539
|
+
return bool(getattr(field, "required", False))
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def _fallback_value_for_field(field: Any, text: str) -> Any:
|
|
543
|
+
annotation = getattr(field, "annotation", None) or getattr(field, "type_", None)
|
|
544
|
+
if annotation is bool:
|
|
545
|
+
return False
|
|
546
|
+
if annotation is int:
|
|
547
|
+
return 0
|
|
548
|
+
if annotation is float:
|
|
549
|
+
return 0.0
|
|
550
|
+
if annotation is list:
|
|
551
|
+
return []
|
|
552
|
+
if annotation is dict:
|
|
553
|
+
return {}
|
|
554
|
+
return text
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def _observation_is_empty(observation: Observation) -> bool:
|
|
558
|
+
try:
|
|
559
|
+
return bool(observation.is_empty())
|
|
560
|
+
except Exception:
|
|
561
|
+
return False
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def _observation_to_data(observation: Observation) -> Any:
|
|
565
|
+
try:
|
|
566
|
+
items = observation.to_observation_list()
|
|
567
|
+
except Exception:
|
|
568
|
+
return str(observation)
|
|
569
|
+
data: list[Any] = []
|
|
570
|
+
for item in items:
|
|
571
|
+
result = getattr(item, "result", item)
|
|
572
|
+
data.append(result)
|
|
573
|
+
return data
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def _single_action_to_data(action: Action) -> Any:
|
|
577
|
+
try:
|
|
578
|
+
values = []
|
|
579
|
+
for item in action.to_action_list():
|
|
580
|
+
args = getattr(item, "arguments", {})
|
|
581
|
+
if hasattr(args, "model_dump"):
|
|
582
|
+
args = args.model_dump()
|
|
583
|
+
values.append({"name": getattr(item, "name", ""), "arguments": args, "id": getattr(item, "id", "")})
|
|
584
|
+
return values
|
|
585
|
+
except Exception:
|
|
586
|
+
return {"id": safe_id(str(action)), "text": str(action)}
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
class CawdexAgentInstance(CawdexAgentInstance):
|
|
590
|
+
"""Preferred Exgentic runtime class name for Cawdex."""
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
class CawdexAgent(CawdexAgent):
|
|
594
|
+
"""Preferred Exgentic host class name for Cawdex."""
|
|
595
|
+
|
|
596
|
+
display_name: ClassVar[str] = "Cawdex"
|
|
597
|
+
slug_name: ClassVar[str] = "cawdex_agent"
|
|
598
|
+
|
|
599
|
+
@classmethod
|
|
600
|
+
def _get_instance_class(cls):
|
|
601
|
+
return CawdexAgentInstance
|
|
602
|
+
|
|
603
|
+
@classmethod
|
|
604
|
+
def _get_instance_class_ref(cls) -> str:
|
|
605
|
+
return f"{cls.__module__}:CawdexAgentInstance"
|