cawdex 1.35.75 → 1.35.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/bin/anycode.js +2 -2
- package/bin/cawdex.js +408 -408
- package/bin/ecc-hooks.cjs +11 -11
- package/dist/agents-md.d.ts +31 -0
- package/dist/agents-md.js +340 -0
- package/dist/agents-md.js.map +1 -0
- package/dist/agents.js +1424 -1424
- package/dist/api.d.ts +1 -0
- package/dist/api.js +19 -14
- package/dist/api.js.map +1 -1
- package/dist/autonomous-loops.js +287 -287
- package/dist/benchmark-repos.d.ts +31 -0
- package/dist/benchmark-repos.js +234 -8
- package/dist/benchmark-repos.js.map +1 -1
- package/dist/command-palette.js +4 -2
- package/dist/command-palette.js.map +1 -1
- package/dist/compaction.js +8 -8
- package/dist/config.js +51 -36
- package/dist/config.js.map +1 -1
- package/dist/content-engine.js +543 -543
- package/dist/context-brief.d.ts +4 -0
- package/dist/context-brief.js +230 -0
- package/dist/context-brief.js.map +1 -0
- package/dist/cost-tracker.d.ts +33 -14
- package/dist/cost-tracker.js +81 -19
- package/dist/cost-tracker.js.map +1 -1
- package/dist/coverage.js +39 -39
- package/dist/docs-sync.js +98 -98
- package/dist/evaluation.js +452 -452
- package/dist/fixed-footer.d.ts +7 -1
- package/dist/fixed-footer.js +92 -18
- package/dist/fixed-footer.js.map +1 -1
- package/dist/git-workflow.js +49 -49
- package/dist/index.d.ts +2 -0
- package/dist/index.js +161 -63
- package/dist/index.js.map +1 -1
- package/dist/live-queue.js +1 -1
- package/dist/live-queue.js.map +1 -1
- package/dist/model-aliases.d.ts +37 -0
- package/dist/model-aliases.js +203 -0
- package/dist/model-aliases.js.map +1 -0
- package/dist/orchestration.js +15 -15
- package/dist/permissions.d.ts +6 -0
- package/dist/permissions.js +53 -0
- package/dist/permissions.js.map +1 -1
- package/dist/pm2-manager.js +26 -26
- package/dist/query.d.ts +0 -1
- package/dist/query.js +74 -39
- package/dist/query.js.map +1 -1
- package/dist/refactor.js +87 -87
- package/dist/repo-command.js +7 -1
- package/dist/repo-command.js.map +1 -1
- package/dist/search-first.js +92 -92
- package/dist/skill-create.js +100 -100
- package/dist/stitch.js +1 -1
- package/dist/system-prompt.d.ts +2 -1
- package/dist/system-prompt.js +10 -5
- package/dist/system-prompt.js.map +1 -1
- package/dist/tools/github-repo-digest.d.ts +1 -1
- package/dist/tools/github-repo-digest.js +38 -6
- package/dist/tools/github-repo-digest.js.map +1 -1
- package/dist/types.d.ts +3 -0
- package/dist/types.js.map +1 -1
- package/dist/verification.js +55 -55
- package/package.json +1 -1
- package/resources/__init__.py +1 -1
- package/resources/exgentic/cawdex_agent/README.md +114 -114
- package/resources/exgentic/cawdex_agent/__init__.py +5 -5
- package/resources/exgentic/cawdex_agent/agent.py +605 -605
- package/resources/exgentic/cawdex_agent/requirements.txt +2 -2
- package/resources/exgentic/cawdex_agent/setup.sh +21 -21
- package/resources/exgentic/cawdex_agent/utils.py +1061 -1061
- package/resources/hal/cawdex_agent/README.md +24 -24
- package/resources/hal/cawdex_agent/__init__.py +1 -1
- package/resources/hal/cawdex_agent/main.py +550 -550
- package/resources/hal/cawdex_agent/requirements.txt +2 -2
- package/resources/kbench/cawdex_agent/README.md +107 -107
- package/resources/kbench/cawdex_agent/adapter.manifest.json +19 -19
- package/resources/kbench/cawdex_agent/runner.mjs +753 -753
- package/resources/open_agent_leaderboard/cawdex-agent-card.md +119 -119
- package/resources/terminal_bench/__init__.py +1 -1
- package/resources/terminal_bench/cawdex_agent.py +174 -174
- package/resources/terminal_bench/setup.sh +121 -121
|
@@ -1,1061 +1,1061 @@
|
|
|
1
|
-
"""Stdlib helpers for the cawdex Exgentic adapter."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import json
|
|
6
|
-
import re
|
|
7
|
-
from dataclasses import dataclass
|
|
8
|
-
from difflib import SequenceMatcher
|
|
9
|
-
from typing import Any
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
SECRET_REPLACEMENTS = [
|
|
13
|
-
(re.compile(r"sk-or-v1-[A-Za-z0-9_-]+"), "sk-or-v1-[REDACTED]"),
|
|
14
|
-
(re.compile(r"sk-[A-Za-z0-9_-]{16,}"), "sk-[REDACTED]"),
|
|
15
|
-
(re.compile(r"hf_[A-Za-z0-9]{16,}"), "hf_[REDACTED]"),
|
|
16
|
-
(re.compile(r"KGAT_[A-Za-z0-9]{16,}"), "KGAT_[REDACTED]"),
|
|
17
|
-
(re.compile(r"npm_[A-Za-z0-9]{16,}"), "npm_[REDACTED]"),
|
|
18
|
-
]
|
|
19
|
-
|
|
20
|
-
STOPWORDS = {
|
|
21
|
-
"about",
|
|
22
|
-
"after",
|
|
23
|
-
"again",
|
|
24
|
-
"also",
|
|
25
|
-
"and",
|
|
26
|
-
"any",
|
|
27
|
-
"are",
|
|
28
|
-
"available",
|
|
29
|
-
"been",
|
|
30
|
-
"before",
|
|
31
|
-
"being",
|
|
32
|
-
"can",
|
|
33
|
-
"context",
|
|
34
|
-
"could",
|
|
35
|
-
"current",
|
|
36
|
-
"does",
|
|
37
|
-
"for",
|
|
38
|
-
"from",
|
|
39
|
-
"has",
|
|
40
|
-
"have",
|
|
41
|
-
"into",
|
|
42
|
-
"latest",
|
|
43
|
-
"need",
|
|
44
|
-
"needs",
|
|
45
|
-
"not",
|
|
46
|
-
"observation",
|
|
47
|
-
"only",
|
|
48
|
-
"requested",
|
|
49
|
-
"should",
|
|
50
|
-
"task",
|
|
51
|
-
"that",
|
|
52
|
-
"the",
|
|
53
|
-
"then",
|
|
54
|
-
"this",
|
|
55
|
-
"use",
|
|
56
|
-
"user",
|
|
57
|
-
"with",
|
|
58
|
-
"you",
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
@dataclass(frozen=True)
|
|
63
|
-
class ActionPayload:
|
|
64
|
-
"""Machine-readable action selected by cawdex."""
|
|
65
|
-
|
|
66
|
-
name: str
|
|
67
|
-
arguments: dict[str, Any]
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
@dataclass(frozen=True)
|
|
71
|
-
class ActionRepairResult:
|
|
72
|
-
"""Deterministic repair result for benchmark action JSON."""
|
|
73
|
-
|
|
74
|
-
payload: ActionPayload
|
|
75
|
-
changed: bool
|
|
76
|
-
diagnostics: dict[str, Any]
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def redact(value: Any) -> str:
|
|
80
|
-
text = str(value or "")
|
|
81
|
-
for pattern, replacement in SECRET_REPLACEMENTS:
|
|
82
|
-
text = pattern.sub(replacement, text)
|
|
83
|
-
return text
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def truncate(value: Any, limit: int = 80000) -> str:
|
|
87
|
-
text = redact(value)
|
|
88
|
-
if len(text) <= limit:
|
|
89
|
-
return text
|
|
90
|
-
omitted = len(text) - limit
|
|
91
|
-
return text[:limit] + f"\n...[truncated {omitted} chars]"
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def json_dumps(value: Any, *, limit: int = 80000) -> str:
|
|
95
|
-
try:
|
|
96
|
-
text = json.dumps(value, ensure_ascii=False, indent=2, sort_keys=True, default=str)
|
|
97
|
-
except Exception:
|
|
98
|
-
text = str(value)
|
|
99
|
-
return truncate(text, limit=limit)
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def fold_exgentic_history(
|
|
103
|
-
history: list[dict[str, Any]],
|
|
104
|
-
*,
|
|
105
|
-
profile: str = "generic",
|
|
106
|
-
max_items: int = 16,
|
|
107
|
-
item_limit: int = 1200,
|
|
108
|
-
) -> dict[str, Any]:
|
|
109
|
-
"""Build a compact task-relevant ledger for long Exgentic sessions.
|
|
110
|
-
|
|
111
|
-
The adapter keeps the full raw history in memory. This folded view is what
|
|
112
|
-
goes back into the next model call, so noisy stdout does not crowd out the
|
|
113
|
-
latest app state, policy evidence, source evidence, or selected actions.
|
|
114
|
-
"""
|
|
115
|
-
|
|
116
|
-
observations: list[dict[str, Any]] = []
|
|
117
|
-
actions: list[dict[str, Any]] = []
|
|
118
|
-
diagnostics: list[dict[str, Any]] = []
|
|
119
|
-
action_counts: dict[str, int] = {}
|
|
120
|
-
|
|
121
|
-
for idx, item in enumerate(history or [], start=1):
|
|
122
|
-
role = str(item.get("role", ""))
|
|
123
|
-
if role == "observation":
|
|
124
|
-
observations.append(
|
|
125
|
-
{
|
|
126
|
-
"turn": idx,
|
|
127
|
-
"summary": truncate(json_dumps(item.get("content"), limit=item_limit), limit=item_limit),
|
|
128
|
-
}
|
|
129
|
-
)
|
|
130
|
-
elif role == "selected_action":
|
|
131
|
-
compact_actions = _compact_selected_actions(item.get("content"), item_limit=item_limit)
|
|
132
|
-
for action in compact_actions:
|
|
133
|
-
name = action.get("name") or "unknown"
|
|
134
|
-
action_counts[name] = action_counts.get(name, 0) + 1
|
|
135
|
-
actions.append({"turn": idx, "actions": compact_actions})
|
|
136
|
-
elif role == "cawdex":
|
|
137
|
-
diagnostic = _compact_cawdex_diagnostic(item, item_limit=item_limit)
|
|
138
|
-
if diagnostic is not None:
|
|
139
|
-
diagnostics.append({"turn": idx, **diagnostic})
|
|
140
|
-
elif role == "action_repair":
|
|
141
|
-
diagnostics.append(
|
|
142
|
-
{
|
|
143
|
-
"turn": idx,
|
|
144
|
-
"kind": "action_repair",
|
|
145
|
-
"evidence": truncate(json_dumps(item.get("content"), limit=item_limit), limit=item_limit),
|
|
146
|
-
}
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
latest_observation = observations[-1] if observations else None
|
|
150
|
-
latest_action = actions[-1] if actions else None
|
|
151
|
-
return {
|
|
152
|
-
"format": "cawdex-exgentic-folded-history-v1",
|
|
153
|
-
"profile": profile,
|
|
154
|
-
"turns_seen": len(history or []),
|
|
155
|
-
"latest_observation": latest_observation,
|
|
156
|
-
"latest_action": latest_action,
|
|
157
|
-
"no_effect_repeat_actions": _recent_no_effect_action_names(history or []),
|
|
158
|
-
"recent_observations": observations[-max_items:],
|
|
159
|
-
"recent_actions": actions[-max_items:],
|
|
160
|
-
"diagnostics": diagnostics[-max_items:],
|
|
161
|
-
"action_counts": action_counts,
|
|
162
|
-
"discipline": _folding_discipline(profile),
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
def repair_exgentic_action_payload(
|
|
167
|
-
payload: ActionPayload,
|
|
168
|
-
action_docs: list[dict[str, Any]],
|
|
169
|
-
*,
|
|
170
|
-
argument_hints: Any = None,
|
|
171
|
-
) -> ActionRepairResult:
|
|
172
|
-
"""Repair near-miss action names and argument keys before ActionType build.
|
|
173
|
-
|
|
174
|
-
This is intentionally deterministic and conservative. It fixes common model
|
|
175
|
-
output drift such as camelCase action names, case-only mismatches, and
|
|
176
|
-
schema-key casing/separator mistakes, while leaving unresolved names intact
|
|
177
|
-
so the caller can still fail or fallback explicitly.
|
|
178
|
-
"""
|
|
179
|
-
|
|
180
|
-
docs = [doc for doc in action_docs or [] if isinstance(doc, dict) and doc.get("name")]
|
|
181
|
-
matched_doc, match_reason, match_score = _resolve_action_doc(payload.name, docs)
|
|
182
|
-
repaired_name = str(matched_doc.get("name")) if matched_doc else payload.name
|
|
183
|
-
repaired_args, arg_diagnostics = _repair_action_arguments(
|
|
184
|
-
payload.arguments,
|
|
185
|
-
matched_doc.get("arguments_schema") if matched_doc else None,
|
|
186
|
-
argument_hints=argument_hints,
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
changed = repaired_name != payload.name or repaired_args != payload.arguments
|
|
190
|
-
if matched_doc is None:
|
|
191
|
-
status = "unresolved_action_name"
|
|
192
|
-
elif changed:
|
|
193
|
-
status = "repaired"
|
|
194
|
-
else:
|
|
195
|
-
status = "unchanged"
|
|
196
|
-
|
|
197
|
-
diagnostics = {
|
|
198
|
-
"status": status,
|
|
199
|
-
"original_name": payload.name,
|
|
200
|
-
"repaired_name": repaired_name,
|
|
201
|
-
"name_match_reason": match_reason,
|
|
202
|
-
"name_match_score": round(match_score, 3),
|
|
203
|
-
**arg_diagnostics,
|
|
204
|
-
}
|
|
205
|
-
return ActionRepairResult(
|
|
206
|
-
payload=ActionPayload(name=repaired_name, arguments=repaired_args),
|
|
207
|
-
changed=changed,
|
|
208
|
-
diagnostics=diagnostics,
|
|
209
|
-
)
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
def fallback_exgentic_action_payload(
|
|
213
|
-
action_docs: list[dict[str, Any]],
|
|
214
|
-
*,
|
|
215
|
-
task: Any = None,
|
|
216
|
-
context: Any = None,
|
|
217
|
-
history: list[dict[str, Any]] | None = None,
|
|
218
|
-
profile: str = "generic",
|
|
219
|
-
reason: str = "no_valid_action_json",
|
|
220
|
-
) -> ActionRepairResult | None:
|
|
221
|
-
"""Select a conservative fallback action when the model emits no valid JSON.
|
|
222
|
-
|
|
223
|
-
The old adapter bias was finish/message first. That is dangerous for
|
|
224
|
-
multi-step benchmarks because a transient malformed response can become a
|
|
225
|
-
premature stop. This selector reuses the same shortlist and exact required
|
|
226
|
-
argument hints as the main prompt, preferring viable non-finish actions
|
|
227
|
-
while the latest observation is not completion-ready.
|
|
228
|
-
"""
|
|
229
|
-
|
|
230
|
-
docs = [doc for doc in action_docs or [] if isinstance(doc, dict) and doc.get("name")]
|
|
231
|
-
if not docs:
|
|
232
|
-
return None
|
|
233
|
-
|
|
234
|
-
history_items = history or []
|
|
235
|
-
latest_observation = _latest_history_content(history_items, "observation")
|
|
236
|
-
argument_hints = {
|
|
237
|
-
"latest_observation": latest_observation,
|
|
238
|
-
"context": context or {},
|
|
239
|
-
}
|
|
240
|
-
shortlist = shortlist_exgentic_actions(
|
|
241
|
-
docs,
|
|
242
|
-
task=task,
|
|
243
|
-
context=context,
|
|
244
|
-
history=history_items,
|
|
245
|
-
profile=profile,
|
|
246
|
-
)
|
|
247
|
-
completion_ready = bool(shortlist.get("completion_ready"))
|
|
248
|
-
no_effect_repeat_actions = [
|
|
249
|
-
str(name)
|
|
250
|
-
for name in shortlist.get("avoid_no_effect_repeat_actions") or []
|
|
251
|
-
if str(name)
|
|
252
|
-
]
|
|
253
|
-
candidate_names = _fallback_candidate_names(
|
|
254
|
-
shortlist,
|
|
255
|
-
docs,
|
|
256
|
-
completion_ready=completion_ready,
|
|
257
|
-
avoid_names=no_effect_repeat_actions,
|
|
258
|
-
)
|
|
259
|
-
skipped: list[dict[str, Any]] = []
|
|
260
|
-
|
|
261
|
-
for name in candidate_names:
|
|
262
|
-
doc = _doc_by_name(docs, name)
|
|
263
|
-
if doc is None:
|
|
264
|
-
continue
|
|
265
|
-
is_completion = bool(doc.get("is_finish") or doc.get("is_message"))
|
|
266
|
-
if not completion_ready and is_completion:
|
|
267
|
-
skipped.append({"name": name, "reason": "completion_not_ready"})
|
|
268
|
-
continue
|
|
269
|
-
repair = repair_exgentic_action_payload(
|
|
270
|
-
ActionPayload(name=name, arguments={}),
|
|
271
|
-
docs,
|
|
272
|
-
argument_hints=argument_hints,
|
|
273
|
-
)
|
|
274
|
-
missing = _missing_required_arguments(repair.payload.arguments, doc.get("arguments_schema"))
|
|
275
|
-
if missing and not (completion_ready and is_completion):
|
|
276
|
-
skipped.append({"name": name, "reason": "missing_required_arguments", "missing": missing})
|
|
277
|
-
continue
|
|
278
|
-
diagnostics = {
|
|
279
|
-
"status": "fallback_selected",
|
|
280
|
-
"fallback_reason": reason,
|
|
281
|
-
"selected_name": repair.payload.name,
|
|
282
|
-
"completion_ready": completion_ready,
|
|
283
|
-
"avoid_no_effect_repeat_actions": no_effect_repeat_actions,
|
|
284
|
-
"candidate_names": candidate_names[:12],
|
|
285
|
-
"skipped_candidates": skipped[:8],
|
|
286
|
-
"shortlist": shortlist,
|
|
287
|
-
"repair": repair.diagnostics,
|
|
288
|
-
}
|
|
289
|
-
return ActionRepairResult(payload=repair.payload, changed=True, diagnostics=diagnostics)
|
|
290
|
-
|
|
291
|
-
return None
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
def shortlist_exgentic_actions(
|
|
295
|
-
action_docs: list[dict[str, Any]],
|
|
296
|
-
*,
|
|
297
|
-
task: Any = None,
|
|
298
|
-
context: Any = None,
|
|
299
|
-
history: list[dict[str, Any]] | None = None,
|
|
300
|
-
profile: str = "generic",
|
|
301
|
-
limit: int = 8,
|
|
302
|
-
) -> dict[str, Any]:
|
|
303
|
-
"""Rank available actions into a compact shortlist for the next step.
|
|
304
|
-
|
|
305
|
-
Exgentic still receives the full action schema list below this shortlist.
|
|
306
|
-
The shortlist is a deterministic scaffold: it narrows attention to likely
|
|
307
|
-
actions and finish/message timing without hiding benchmark capabilities.
|
|
308
|
-
"""
|
|
309
|
-
|
|
310
|
-
docs = [doc for doc in action_docs or [] if isinstance(doc, dict) and doc.get("name")]
|
|
311
|
-
safe_limit = max(1, min(16, int(limit or 8)))
|
|
312
|
-
latest_observation = _latest_history_content(history or [], "observation")
|
|
313
|
-
latest_observation_text = json_dumps(latest_observation, limit=6000) if latest_observation is not None else ""
|
|
314
|
-
argument_hints = {
|
|
315
|
-
"latest_observation": latest_observation,
|
|
316
|
-
"context": context or {},
|
|
317
|
-
}
|
|
318
|
-
target_text = " ".join(
|
|
319
|
-
[
|
|
320
|
-
str(task or ""),
|
|
321
|
-
json_dumps(context or {}, limit=6000),
|
|
322
|
-
latest_observation_text,
|
|
323
|
-
]
|
|
324
|
-
).lower()
|
|
325
|
-
tokens = _keyword_tokens(target_text)
|
|
326
|
-
completion_ready = _completion_ready(latest_observation_text)
|
|
327
|
-
latest_action_name = _latest_selected_action_name(history or [])
|
|
328
|
-
has_recent_error = _has_recent_action_error(history or [])
|
|
329
|
-
no_effect_repeat_actions = _recent_no_effect_action_names(history or [])
|
|
330
|
-
no_effect_repeat_set = {name.lower() for name in no_effect_repeat_actions}
|
|
331
|
-
|
|
332
|
-
scored: list[tuple[float, str, dict[str, Any], list[str], list[str], list[str], list[dict[str, str]]]] = []
|
|
333
|
-
for doc in docs:
|
|
334
|
-
name = str(doc.get("name") or "")
|
|
335
|
-
action_text = _action_doc_text(doc)
|
|
336
|
-
schema = doc.get("arguments_schema")
|
|
337
|
-
schema_keys = _schema_property_keys(schema)
|
|
338
|
-
required_keys = _schema_required_keys(schema)
|
|
339
|
-
required_hints = _required_argument_hints(required_keys, argument_hints)
|
|
340
|
-
score = 0.0
|
|
341
|
-
reasons: list[str] = []
|
|
342
|
-
|
|
343
|
-
token_hits = [token for token in tokens if token in action_text][:6]
|
|
344
|
-
if token_hits:
|
|
345
|
-
score += min(12, len(token_hits) * 2)
|
|
346
|
-
reasons.append(f"matches task/observation tokens: {', '.join(token_hits)}")
|
|
347
|
-
|
|
348
|
-
schema_hits = [key for key in schema_keys if key.lower() in target_text][:6]
|
|
349
|
-
if schema_hits:
|
|
350
|
-
score += min(10, len(schema_hits) * 2)
|
|
351
|
-
reasons.append(f"schema keys appear in current state: {', '.join(schema_hits)}")
|
|
352
|
-
|
|
353
|
-
if required_hints:
|
|
354
|
-
score += min(8, len(required_hints) * 3)
|
|
355
|
-
reasons.append(
|
|
356
|
-
"required args available in current state: "
|
|
357
|
-
+ ", ".join(item["key"] for item in required_hints[:4])
|
|
358
|
-
)
|
|
359
|
-
|
|
360
|
-
prior_score, prior_reason = _profile_action_prior(profile, name, action_text)
|
|
361
|
-
if prior_score:
|
|
362
|
-
score += prior_score
|
|
363
|
-
reasons.append(prior_reason)
|
|
364
|
-
|
|
365
|
-
name_tokens = [token for token in _keyword_tokens(name.replace("_", " ")) if token not in {"action"}]
|
|
366
|
-
if name_tokens and all(token in latest_observation_text.lower() for token in name_tokens):
|
|
367
|
-
score += 8
|
|
368
|
-
reasons.append("action name matches explicit latest-observation cue")
|
|
369
|
-
|
|
370
|
-
is_completion = bool(doc.get("is_finish") or doc.get("is_message"))
|
|
371
|
-
if is_completion:
|
|
372
|
-
if completion_ready:
|
|
373
|
-
score += 8
|
|
374
|
-
reasons.append("latest observation suggests completion is ready")
|
|
375
|
-
else:
|
|
376
|
-
score -= 7
|
|
377
|
-
reasons.append("defer finish/message until benchmark-visible completion evidence")
|
|
378
|
-
|
|
379
|
-
if latest_action_name and name.lower() == latest_action_name.lower():
|
|
380
|
-
score -= 2
|
|
381
|
-
reasons.append("same as previous selected action")
|
|
382
|
-
if has_recent_error:
|
|
383
|
-
score -= 4
|
|
384
|
-
reasons.append("avoid repeating after recent action/schema error")
|
|
385
|
-
if not completion_ready and name.lower() in no_effect_repeat_set:
|
|
386
|
-
score -= 10
|
|
387
|
-
reasons.append("avoid repeating no-effect action; latest observation did not change")
|
|
388
|
-
|
|
389
|
-
scored.append((score, name.lower(), doc, reasons, schema_keys, required_keys, required_hints))
|
|
390
|
-
|
|
391
|
-
scored.sort(key=lambda item: (-item[0], item[1]))
|
|
392
|
-
shortlisted = [
|
|
393
|
-
_shortlist_item(doc, score, reasons, schema_keys, required_keys, required_hints)
|
|
394
|
-
for score, _name, doc, reasons, schema_keys, required_keys, required_hints in scored[:safe_limit]
|
|
395
|
-
]
|
|
396
|
-
shortlisted_names = {str(item.get("name", "")).lower() for item in shortlisted}
|
|
397
|
-
deferred_completion = [
|
|
398
|
-
str(doc.get("name"))
|
|
399
|
-
for doc in docs
|
|
400
|
-
if (doc.get("is_finish") or doc.get("is_message"))
|
|
401
|
-
and str(doc.get("name", "")).lower() not in shortlisted_names
|
|
402
|
-
and not completion_ready
|
|
403
|
-
]
|
|
404
|
-
|
|
405
|
-
return {
|
|
406
|
-
"format": "cawdex-exgentic-action-shortlist-v1",
|
|
407
|
-
"profile": profile,
|
|
408
|
-
"action_count": len(docs),
|
|
409
|
-
"shortlist_limit": safe_limit,
|
|
410
|
-
"completion_ready": completion_ready,
|
|
411
|
-
"avoid_no_effect_repeat_actions": no_effect_repeat_actions,
|
|
412
|
-
"shortlisted_actions": shortlisted,
|
|
413
|
-
"deferred_completion_actions": deferred_completion,
|
|
414
|
-
"discipline": "Prefer shortlisted actions when they fit the latest observation; use full schemas below if the current state clearly requires a non-shortlisted action. If avoid_no_effect_repeat_actions is non-empty, change strategy unless no other viable action has its required arguments.",
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
def safe_id(value: Any, default: str = "session") -> str:
|
|
419
|
-
raw = str(value or default)
|
|
420
|
-
safe = re.sub(r"[^A-Za-z0-9_.-]+", "-", raw).strip("-")
|
|
421
|
-
return safe or default
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
def _compact_selected_actions(value: Any, *, item_limit: int) -> list[dict[str, Any]]:
|
|
425
|
-
actions = value if isinstance(value, list) else [value]
|
|
426
|
-
compact: list[dict[str, Any]] = []
|
|
427
|
-
for action in actions:
|
|
428
|
-
if not isinstance(action, dict):
|
|
429
|
-
compact.append({"name": "unknown", "summary": truncate(action, limit=item_limit)})
|
|
430
|
-
continue
|
|
431
|
-
raw_args = action.get("arguments", {})
|
|
432
|
-
args = raw_args if isinstance(raw_args, dict) else {"value": raw_args}
|
|
433
|
-
compact.append(
|
|
434
|
-
{
|
|
435
|
-
"name": str(action.get("name") or "unknown"),
|
|
436
|
-
"argument_keys": sorted(str(key) for key in args.keys()),
|
|
437
|
-
"arguments": truncate(json_dumps(args, limit=item_limit), limit=item_limit),
|
|
438
|
-
}
|
|
439
|
-
)
|
|
440
|
-
return compact
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
def _compact_cawdex_diagnostic(item: dict[str, Any], *, item_limit: int) -> dict[str, Any] | None:
|
|
444
|
-
returncode = item.get("returncode")
|
|
445
|
-
stderr = str(item.get("stderr") or "")
|
|
446
|
-
stdout = str(item.get("stdout") or "")
|
|
447
|
-
text = "\n".join(part for part in [stderr, stdout] if part)
|
|
448
|
-
if returncode in (None, 0) and not re.search(
|
|
449
|
-
r"\b(error|invalid|unknown action|schema|malformed|permission|timed out|timeout|failed)\b",
|
|
450
|
-
text,
|
|
451
|
-
flags=re.IGNORECASE,
|
|
452
|
-
):
|
|
453
|
-
return None
|
|
454
|
-
return {
|
|
455
|
-
"returncode": returncode,
|
|
456
|
-
"evidence": truncate(text, limit=item_limit),
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
def _folding_discipline(profile: str) -> str:
|
|
461
|
-
if profile == "appworld":
|
|
462
|
-
return "Use latest_observation as authoritative app/API state; preserve IDs, dates, permissions, and record integrity."
|
|
463
|
-
if profile == "browsecomp":
|
|
464
|
-
return "Carry forward verified sources and unresolved search facets; do not treat snippets or stale single-source claims as final evidence."
|
|
465
|
-
if profile == "tau2":
|
|
466
|
-
return "Carry forward policy constraints, customer intent, tool results, and pending confirmations before selecting the next action."
|
|
467
|
-
if profile == "terminalworld":
|
|
468
|
-
return "TerminalWorld discipline: carry forward instruction.md/task artifact requirements, generated files/services, command outputs, verifier status, and any solve.sh/reference-solution avoidance before selecting the next action."
|
|
469
|
-
if profile == "webdevbench":
|
|
470
|
-
return "Carry forward canary requirements, frontend/backend state, integration evidence, and production/security gaps before selecting the next action."
|
|
471
|
-
if profile == "swe-cycle":
|
|
472
|
-
return "Carry forward lifecycle phase, bare-repo environment setup state, implementation requirements, generated/selected tests, judge commands, and unresolved phase gaps before selecting the next action."
|
|
473
|
-
if profile == "swe-ci":
|
|
474
|
-
return "Carry forward current/target commits, test gaps, inferred requirements, touched files, verifier deltas, and unresolved regressions before selecting the next action."
|
|
475
|
-
if profile == "swe-prbench":
|
|
476
|
-
return "Carry forward PR title/description, changed files, diff hunks, suspected findings, evidence gaps, and context-expansion reasons before selecting the next action."
|
|
477
|
-
if profile == "tml-bench":
|
|
478
|
-
return "Carry forward train/test/sample submission paths, ID/target columns, metric, validation split, leakage checks, model artifacts, submission path, and validity evidence before selecting the next action."
|
|
479
|
-
if profile == "pi-bench":
|
|
480
|
-
return "Carry forward user profile, current request, message/file/app context, available domain tools, hidden-intent hypotheses, clarification state, privacy risk, selected actions, and observable completion evidence before selecting the next action."
|
|
481
|
-
return "Use the folded ledger as orientation, then rely on the latest observation and available action schemas for the next action."
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
def _resolve_action_doc(
|
|
485
|
-
name: str,
|
|
486
|
-
docs: list[dict[str, Any]],
|
|
487
|
-
) -> tuple[dict[str, Any] | None, str, float]:
|
|
488
|
-
raw = str(name or "")
|
|
489
|
-
if not raw:
|
|
490
|
-
return None, "empty", 0.0
|
|
491
|
-
|
|
492
|
-
for doc in docs:
|
|
493
|
-
candidate = str(doc.get("name") or "")
|
|
494
|
-
if candidate == raw:
|
|
495
|
-
return doc, "exact", 1.0
|
|
496
|
-
|
|
497
|
-
lowered = raw.lower()
|
|
498
|
-
for doc in docs:
|
|
499
|
-
candidate = str(doc.get("name") or "")
|
|
500
|
-
if candidate.lower() == lowered:
|
|
501
|
-
return doc, "case_insensitive", 1.0
|
|
502
|
-
|
|
503
|
-
normalized = _normalized_identifier(raw)
|
|
504
|
-
for doc in docs:
|
|
505
|
-
candidate = str(doc.get("name") or "")
|
|
506
|
-
if _normalized_identifier(candidate) == normalized:
|
|
507
|
-
return doc, "normalized_identifier", 1.0
|
|
508
|
-
|
|
509
|
-
best_doc: dict[str, Any] | None = None
|
|
510
|
-
best_score = 0.0
|
|
511
|
-
second_score = 0.0
|
|
512
|
-
for doc in docs:
|
|
513
|
-
candidate = str(doc.get("name") or "")
|
|
514
|
-
candidate_norm = _normalized_identifier(candidate)
|
|
515
|
-
score = SequenceMatcher(None, normalized, candidate_norm).ratio() if normalized and candidate_norm else 0.0
|
|
516
|
-
if normalized and candidate_norm and (normalized in candidate_norm or candidate_norm in normalized):
|
|
517
|
-
score = max(score, 0.82)
|
|
518
|
-
if score > best_score:
|
|
519
|
-
second_score = best_score
|
|
520
|
-
best_score = score
|
|
521
|
-
best_doc = doc
|
|
522
|
-
elif score > second_score:
|
|
523
|
-
second_score = score
|
|
524
|
-
|
|
525
|
-
if best_doc is not None and best_score >= 0.82 and best_score - second_score >= 0.04:
|
|
526
|
-
return best_doc, "fuzzy_identifier", best_score
|
|
527
|
-
return None, "unresolved", best_score
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
def _repair_action_arguments(
|
|
531
|
-
arguments: dict[str, Any],
|
|
532
|
-
schema: Any,
|
|
533
|
-
*,
|
|
534
|
-
argument_hints: Any = None,
|
|
535
|
-
) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
536
|
-
args = dict(arguments or {})
|
|
537
|
-
schema_keys = _schema_property_keys(schema)
|
|
538
|
-
if not schema_keys:
|
|
539
|
-
return args, {
|
|
540
|
-
"argument_key_repairs": [],
|
|
541
|
-
"dropped_argument_keys": [],
|
|
542
|
-
"filled_required_arguments": [],
|
|
543
|
-
"schema_keys": [],
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
key_by_normalized = {_normalized_identifier(key): key for key in schema_keys}
|
|
547
|
-
repaired: dict[str, Any] = {}
|
|
548
|
-
key_repairs: list[dict[str, str]] = []
|
|
549
|
-
dropped: list[str] = []
|
|
550
|
-
|
|
551
|
-
for key, value in args.items():
|
|
552
|
-
text_key = str(key)
|
|
553
|
-
if text_key in schema_keys:
|
|
554
|
-
repaired[text_key] = value
|
|
555
|
-
continue
|
|
556
|
-
canonical = key_by_normalized.get(_normalized_identifier(text_key))
|
|
557
|
-
if canonical is not None:
|
|
558
|
-
repaired[canonical] = value
|
|
559
|
-
key_repairs.append({"from": text_key, "to": canonical})
|
|
560
|
-
else:
|
|
561
|
-
dropped.append(text_key)
|
|
562
|
-
|
|
563
|
-
filled = _fill_required_arguments(repaired, schema, argument_hints)
|
|
564
|
-
return repaired, {
|
|
565
|
-
"argument_key_repairs": key_repairs,
|
|
566
|
-
"dropped_argument_keys": dropped,
|
|
567
|
-
"filled_required_arguments": filled,
|
|
568
|
-
"schema_keys": schema_keys[:24],
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
def _fill_required_arguments(
|
|
573
|
-
repaired: dict[str, Any],
|
|
574
|
-
schema: Any,
|
|
575
|
-
argument_hints: Any,
|
|
576
|
-
) -> list[dict[str, str]]:
|
|
577
|
-
if not isinstance(schema, dict):
|
|
578
|
-
return []
|
|
579
|
-
required = [str(key) for key in schema.get("required") or [] if str(key)]
|
|
580
|
-
if not required:
|
|
581
|
-
return []
|
|
582
|
-
hint_index = _argument_hint_index(argument_hints)
|
|
583
|
-
filled: list[dict[str, str]] = []
|
|
584
|
-
existing = {_normalized_identifier(key) for key in repaired.keys()}
|
|
585
|
-
|
|
586
|
-
for key in required:
|
|
587
|
-
norm = _normalized_identifier(key)
|
|
588
|
-
if not norm or norm in existing:
|
|
589
|
-
continue
|
|
590
|
-
match = hint_index.get(norm)
|
|
591
|
-
if match is None:
|
|
592
|
-
continue
|
|
593
|
-
value, source = match
|
|
594
|
-
if value is None:
|
|
595
|
-
continue
|
|
596
|
-
repaired[key] = value
|
|
597
|
-
existing.add(norm)
|
|
598
|
-
filled.append({"key": key, "source": source})
|
|
599
|
-
return filled
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
def _argument_hint_index(value: Any) -> dict[str, tuple[Any, str]]:
|
|
603
|
-
index: dict[str, tuple[Any, str]] = {}
|
|
604
|
-
|
|
605
|
-
def visit(item: Any, path: str) -> None:
|
|
606
|
-
if isinstance(item, dict):
|
|
607
|
-
for key, child in item.items():
|
|
608
|
-
key_text = str(key)
|
|
609
|
-
child_path = f"{path}.{key_text}" if path else key_text
|
|
610
|
-
norm = _normalized_identifier(key_text)
|
|
611
|
-
if norm and norm not in index and _hint_value_is_usable(child):
|
|
612
|
-
index[norm] = (child, child_path)
|
|
613
|
-
visit(child, child_path)
|
|
614
|
-
elif isinstance(item, list):
|
|
615
|
-
for idx, child in enumerate(item[:50]):
|
|
616
|
-
visit(child, f"{path}[{idx}]" if path else f"[{idx}]")
|
|
617
|
-
|
|
618
|
-
visit(value, "")
|
|
619
|
-
return index
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
def _hint_value_is_usable(value: Any) -> bool:
|
|
623
|
-
if value is None:
|
|
624
|
-
return False
|
|
625
|
-
if isinstance(value, str):
|
|
626
|
-
return bool(value.strip())
|
|
627
|
-
if isinstance(value, (bool, int, float)):
|
|
628
|
-
return True
|
|
629
|
-
if isinstance(value, (dict, list)):
|
|
630
|
-
return bool(value)
|
|
631
|
-
return True
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
def _normalized_identifier(value: Any) -> str:
|
|
635
|
-
return re.sub(r"[^a-z0-9]+", "", str(value or "").lower())
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
def _latest_history_content(history: list[dict[str, Any]], role: str) -> Any | None:
|
|
639
|
-
for item in reversed(history or []):
|
|
640
|
-
if isinstance(item, dict) and item.get("role") == role:
|
|
641
|
-
return item.get("content")
|
|
642
|
-
return None
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
def _latest_selected_action_name(history: list[dict[str, Any]]) -> str | None:
|
|
646
|
-
content = _latest_history_content(history, "selected_action")
|
|
647
|
-
actions = content if isinstance(content, list) else [content]
|
|
648
|
-
for action in actions:
|
|
649
|
-
if isinstance(action, dict) and action.get("name"):
|
|
650
|
-
return str(action.get("name"))
|
|
651
|
-
return None
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
def _recent_no_effect_action_names(history: list[dict[str, Any]]) -> list[str]:
|
|
655
|
-
latest_observation_idx: int | None = None
|
|
656
|
-
previous_observation_idx: int | None = None
|
|
657
|
-
for idx in range(len(history or []) - 1, -1, -1):
|
|
658
|
-
item = history[idx]
|
|
659
|
-
if not isinstance(item, dict) or item.get("role") != "observation":
|
|
660
|
-
continue
|
|
661
|
-
if latest_observation_idx is None:
|
|
662
|
-
latest_observation_idx = idx
|
|
663
|
-
else:
|
|
664
|
-
previous_observation_idx = idx
|
|
665
|
-
break
|
|
666
|
-
|
|
667
|
-
if latest_observation_idx is None or previous_observation_idx is None:
|
|
668
|
-
return []
|
|
669
|
-
|
|
670
|
-
latest = history[latest_observation_idx].get("content")
|
|
671
|
-
previous = history[previous_observation_idx].get("content")
|
|
672
|
-
latest_text = json_dumps(latest, limit=4000).lower()
|
|
673
|
-
unchanged = (
|
|
674
|
-
_observation_fingerprint(latest) == _observation_fingerprint(previous)
|
|
675
|
-
or bool(
|
|
676
|
-
re.search(
|
|
677
|
-
r"\b(no change|no changes|unchanged|same state|nothing changed|no effect|still pending|"
|
|
678
|
-
r"did not (?:change|update|move|complete|resolve)|not (?:changed|updated|completed|resolved))\b",
|
|
679
|
-
latest_text,
|
|
680
|
-
)
|
|
681
|
-
)
|
|
682
|
-
)
|
|
683
|
-
if not unchanged:
|
|
684
|
-
return []
|
|
685
|
-
|
|
686
|
-
names: list[str] = []
|
|
687
|
-
for item in history[previous_observation_idx + 1:latest_observation_idx]:
|
|
688
|
-
if not isinstance(item, dict) or item.get("role") != "selected_action":
|
|
689
|
-
continue
|
|
690
|
-
for name in _selected_action_names(item.get("content")):
|
|
691
|
-
push_unique(names, name)
|
|
692
|
-
return names
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
def _selected_action_names(value: Any) -> list[str]:
|
|
696
|
-
actions = value if isinstance(value, list) else [value]
|
|
697
|
-
names: list[str] = []
|
|
698
|
-
for action in actions:
|
|
699
|
-
if isinstance(action, dict) and action.get("name"):
|
|
700
|
-
push_unique(names, str(action.get("name")))
|
|
701
|
-
return names
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
def _observation_fingerprint(value: Any) -> str:
|
|
705
|
-
text = json_dumps(value, limit=8000).lower()
|
|
706
|
-
return re.sub(r"\s+", " ", text).strip()
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
def _has_recent_action_error(history: list[dict[str, Any]]) -> bool:
|
|
710
|
-
for item in reversed((history or [])[-4:]):
|
|
711
|
-
if not isinstance(item, dict) or item.get("role") != "cawdex":
|
|
712
|
-
continue
|
|
713
|
-
diagnostic = _compact_cawdex_diagnostic(item, item_limit=600)
|
|
714
|
-
if diagnostic is not None:
|
|
715
|
-
return True
|
|
716
|
-
return False
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
def _keyword_tokens(text: str) -> list[str]:
|
|
720
|
-
seen: set[str] = set()
|
|
721
|
-
tokens: list[str] = []
|
|
722
|
-
for raw in re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,}", text or ""):
|
|
723
|
-
token = raw.lower().strip("-_")
|
|
724
|
-
if token in STOPWORDS or len(token) < 3 or token in seen:
|
|
725
|
-
continue
|
|
726
|
-
seen.add(token)
|
|
727
|
-
tokens.append(token)
|
|
728
|
-
if len(tokens) >= 80:
|
|
729
|
-
break
|
|
730
|
-
return tokens
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
def _completion_ready(latest_observation_text: str) -> bool:
|
|
734
|
-
text = (latest_observation_text or "").lower()
|
|
735
|
-
if not text:
|
|
736
|
-
return False
|
|
737
|
-
if re.search(r"\b(pending|missing|need|needs|required|error|failed|invalid|not complete|unresolved)\b", text):
|
|
738
|
-
return False
|
|
739
|
-
return bool(re.search(r"\b(done|complete|completed|success|succeeded|confirmed|final answer|resolved)\b", text))
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
def _action_doc_text(doc: dict[str, Any]) -> str:
|
|
743
|
-
parts = [
|
|
744
|
-
str(doc.get("name") or ""),
|
|
745
|
-
str(doc.get("description") or ""),
|
|
746
|
-
" ".join(_schema_property_keys(doc.get("arguments_schema"))),
|
|
747
|
-
json_dumps(doc.get("arguments_schema") or {}, limit=4000),
|
|
748
|
-
]
|
|
749
|
-
return " ".join(parts).lower()
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
def _schema_property_keys(schema: Any) -> list[str]:
|
|
753
|
-
if not isinstance(schema, dict):
|
|
754
|
-
return []
|
|
755
|
-
keys: list[str] = []
|
|
756
|
-
properties = schema.get("properties")
|
|
757
|
-
if isinstance(properties, dict):
|
|
758
|
-
keys.extend(str(key) for key in properties.keys())
|
|
759
|
-
for nested_key in ("$defs", "definitions"):
|
|
760
|
-
nested = schema.get(nested_key)
|
|
761
|
-
if isinstance(nested, dict):
|
|
762
|
-
for value in nested.values():
|
|
763
|
-
keys.extend(_schema_property_keys(value))
|
|
764
|
-
seen: set[str] = set()
|
|
765
|
-
deduped: list[str] = []
|
|
766
|
-
for key in keys:
|
|
767
|
-
lowered = key.lower()
|
|
768
|
-
if lowered in seen:
|
|
769
|
-
continue
|
|
770
|
-
seen.add(lowered)
|
|
771
|
-
deduped.append(key)
|
|
772
|
-
return deduped
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
def _schema_required_keys(schema: Any) -> list[str]:
|
|
776
|
-
if not isinstance(schema, dict):
|
|
777
|
-
return []
|
|
778
|
-
required = schema.get("required")
|
|
779
|
-
if not isinstance(required, list):
|
|
780
|
-
return []
|
|
781
|
-
seen: set[str] = set()
|
|
782
|
-
keys: list[str] = []
|
|
783
|
-
for key in required:
|
|
784
|
-
text = str(key or "").strip()
|
|
785
|
-
lowered = text.lower()
|
|
786
|
-
if not text or lowered in seen:
|
|
787
|
-
continue
|
|
788
|
-
seen.add(lowered)
|
|
789
|
-
keys.append(text)
|
|
790
|
-
return keys
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
def _required_argument_hints(required_keys: list[str], argument_hints: Any) -> list[dict[str, str]]:
|
|
794
|
-
if not required_keys:
|
|
795
|
-
return []
|
|
796
|
-
hint_index = _argument_hint_index(argument_hints)
|
|
797
|
-
hints: list[dict[str, str]] = []
|
|
798
|
-
for key in required_keys:
|
|
799
|
-
match = hint_index.get(_normalized_identifier(key))
|
|
800
|
-
if match is None:
|
|
801
|
-
continue
|
|
802
|
-
value, source = match
|
|
803
|
-
if not _hint_value_is_usable(value):
|
|
804
|
-
continue
|
|
805
|
-
hints.append(
|
|
806
|
-
{
|
|
807
|
-
"key": key,
|
|
808
|
-
"source": source,
|
|
809
|
-
"value_preview": truncate(json_dumps(value, limit=360), limit=360),
|
|
810
|
-
}
|
|
811
|
-
)
|
|
812
|
-
return hints
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
def _missing_required_arguments(arguments: dict[str, Any], schema: Any) -> list[str]:
|
|
816
|
-
required = _schema_required_keys(schema)
|
|
817
|
-
if not required:
|
|
818
|
-
return []
|
|
819
|
-
present = {
|
|
820
|
-
_normalized_identifier(key)
|
|
821
|
-
for key, value in (arguments or {}).items()
|
|
822
|
-
if _hint_value_is_usable(value)
|
|
823
|
-
}
|
|
824
|
-
return [key for key in required if _normalized_identifier(key) not in present]
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
def _fallback_candidate_names(
|
|
828
|
-
shortlist: dict[str, Any],
|
|
829
|
-
docs: list[dict[str, Any]],
|
|
830
|
-
*,
|
|
831
|
-
completion_ready: bool,
|
|
832
|
-
avoid_names: list[str] | None = None,
|
|
833
|
-
) -> list[str]:
|
|
834
|
-
names: list[str] = []
|
|
835
|
-
delayed: list[str] = []
|
|
836
|
-
avoid = {str(name).lower() for name in avoid_names or [] if str(name)}
|
|
837
|
-
|
|
838
|
-
def add_candidate(value: Any) -> None:
|
|
839
|
-
name = str(value or "")
|
|
840
|
-
if not name:
|
|
841
|
-
return
|
|
842
|
-
if not completion_ready and name.lower() in avoid:
|
|
843
|
-
push_unique(delayed, name)
|
|
844
|
-
return
|
|
845
|
-
push_unique(names, name)
|
|
846
|
-
|
|
847
|
-
if completion_ready:
|
|
848
|
-
for doc in docs:
|
|
849
|
-
if (doc.get("is_finish") or doc.get("is_message")) and doc.get("name"):
|
|
850
|
-
add_candidate(doc.get("name"))
|
|
851
|
-
for doc in docs:
|
|
852
|
-
name = str(doc.get("name") or "")
|
|
853
|
-
if name.lower() in {"finish", "final", "done"}:
|
|
854
|
-
add_candidate(name)
|
|
855
|
-
|
|
856
|
-
for item in shortlist.get("shortlisted_actions") or []:
|
|
857
|
-
if isinstance(item, dict) and item.get("name"):
|
|
858
|
-
add_candidate(item.get("name"))
|
|
859
|
-
|
|
860
|
-
if not completion_ready:
|
|
861
|
-
for doc in docs:
|
|
862
|
-
if not (doc.get("is_finish") or doc.get("is_message")) and doc.get("name"):
|
|
863
|
-
add_candidate(doc.get("name"))
|
|
864
|
-
|
|
865
|
-
for doc in docs:
|
|
866
|
-
if (completion_ready or len(docs) == 1) and doc.get("name"):
|
|
867
|
-
add_candidate(doc.get("name"))
|
|
868
|
-
for name in delayed:
|
|
869
|
-
push_unique(names, name)
|
|
870
|
-
return names
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
def _doc_by_name(docs: list[dict[str, Any]], name: str) -> dict[str, Any] | None:
|
|
874
|
-
for doc in docs:
|
|
875
|
-
if str(doc.get("name") or "") == name:
|
|
876
|
-
return doc
|
|
877
|
-
lowered = str(name or "").lower()
|
|
878
|
-
for doc in docs:
|
|
879
|
-
if str(doc.get("name") or "").lower() == lowered:
|
|
880
|
-
return doc
|
|
881
|
-
return None
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
def push_unique(values: list[str], value: str) -> None:
|
|
885
|
-
if value not in values:
|
|
886
|
-
values.append(value)
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
def _profile_action_prior(profile: str, name: str, action_text: str) -> tuple[float, str]:
|
|
890
|
-
text = f"{name} {action_text}".lower()
|
|
891
|
-
if profile == "appworld":
|
|
892
|
-
if re.search(r"\b(get|lookup|list|search|find|query|read|fetch|load|inspect)\b", text):
|
|
893
|
-
return 5, "AppWorld prior: inspect app/API state before mutating records"
|
|
894
|
-
if re.search(r"\b(create|update|set|delete|cancel|submit|send)\b", text):
|
|
895
|
-
return 3, "AppWorld prior: likely state-changing app action"
|
|
896
|
-
elif profile == "browsecomp":
|
|
897
|
-
if re.search(r"\b(search|query|browse|web|open|read|fetch|source|cite|visit)\b", text):
|
|
898
|
-
return 6, "BrowseComp prior: gather and verify source evidence"
|
|
899
|
-
if re.search(r"\b(answer|final|finish|message|respond)\b", text):
|
|
900
|
-
return 2, "BrowseComp prior: final answer action when evidence is sufficient"
|
|
901
|
-
elif profile == "tau2":
|
|
902
|
-
if re.search(r"\b(policy|lookup|search|get|list|read|check|verify|order|customer|account|ticket)\b", text):
|
|
903
|
-
return 5, "tau2 prior: check policy/customer/tool state before commitments"
|
|
904
|
-
if re.search(r"\b(update|create|cancel|refund|transfer|confirm|submit|send)\b", text):
|
|
905
|
-
return 3, "tau2 prior: policy-supported customer-service action"
|
|
906
|
-
elif profile == "webdevbench":
|
|
907
|
-
if re.search(r"\b(requirements?|canar(?:y|ies)|spec|product|plan|architecture|read|inspect|list|search|get|query)\b", text):
|
|
908
|
-
return 6, "WebDevBench prior: preserve product/canary requirements before building"
|
|
909
|
-
if re.search(r"\b(e2e|integration|api|browser|playwright|cypress|security|audit|build|deploy|migration|load|concurrency|health)\b", text):
|
|
910
|
-
return 5, "WebDevBench prior: verify full-stack, production, or security evidence"
|
|
911
|
-
if re.search(r"\b(create|update|modify|deploy|submit|send)\b", text):
|
|
912
|
-
return 3, "WebDevBench prior: app creation/modification action"
|
|
913
|
-
elif profile == "swe-cycle":
|
|
914
|
-
if re.search(r"\b(fullcycle|envsetup|codeimpl|testgen|phase|requirements?|issue|read|inspect|list|search|get|query|run_script|parsing_script|selected_test_files_to_run|environment_setup_commit|before_repo_set_cmd|image_name)\b", text):
|
|
915
|
-
return 6, "SWE-Cycle prior: identify lifecycle phase, harness fields, and issue requirements"
|
|
916
|
-
if re.search(r"\b(setup|install|bootstrap|dependencies|env|environment|import|collect|discover|build)\b", text):
|
|
917
|
-
return 6, "SWE-Cycle prior: reconstruct bare-repo environment before code/test edits"
|
|
918
|
-
if re.search(r"\b(testgen|test|tests|pytest|jest|vitest|selected|judge|swe[-_ ]?judge|static|dynamic|verify|check)\b", text):
|
|
919
|
-
return 5, "SWE-Cycle prior: generate/validate tests and preserve judge evidence"
|
|
920
|
-
if re.search(r"\b(codeimpl|modify|patch|edit|update|change|implement|repair)\b", text):
|
|
921
|
-
return 3, "SWE-Cycle prior: implementation action after lifecycle context is established"
|
|
922
|
-
elif profile == "swe-ci":
|
|
923
|
-
if re.search(r"\b(current|target|commit|sha|history|log|diff|status|read|inspect|list|search|get|query)\b", text):
|
|
924
|
-
return 6, "SWE-CI prior: establish current/target commits, test gaps, and repo evolution context"
|
|
925
|
-
if re.search(r"\b(run[_ -]?tests?|test|ci|verify|check|tox|nox|act|pytest|unittest)\b", text):
|
|
926
|
-
return 6, "SWE-CI prior: run the CI/test loop and preserve verifier deltas"
|
|
927
|
-
if re.search(r"\b(requirements?|define[_ -]?requirements?|test[_ -]?gap|failure|attribution|plan|locali[sz]e)\b", text):
|
|
928
|
-
return 5, "SWE-CI prior: derive requirements from CI/test gaps before modifying code"
|
|
929
|
-
if re.search(r"\b(modify[_ -]?code|patch|edit|update|change|implement|repair)\b", text):
|
|
930
|
-
return 3, "SWE-CI prior: incremental requirement-backed code modification"
|
|
931
|
-
elif profile == "swe-prbench":
|
|
932
|
-
if re.search(r"\b(pr|pull|request|diff|patch|hunk|changed|files?|review|comment|read|inspect|list|search|get|query)\b", text):
|
|
933
|
-
return 6, "SWE-PRBench prior: inspect PR metadata and changed diff before broad context"
|
|
934
|
-
if re.search(r"\b(test|verify|repro|run|check|typecheck|lint|unit)\b", text):
|
|
935
|
-
return 4, "SWE-PRBench prior: verify suspected review findings when feasible"
|
|
936
|
-
if re.search(r"\b(finish|message|answer|respond|final|review)\b", text):
|
|
937
|
-
return 3, "SWE-PRBench prior: deliver severity-rated review findings once evidence is sufficient"
|
|
938
|
-
if re.search(r"\b(edit|patch|modify|update|write|apply)\b", text):
|
|
939
|
-
return -3, "SWE-PRBench prior: defer code edits unless the review task explicitly asks for patches"
|
|
940
|
-
elif profile == "tml-bench":
|
|
941
|
-
if re.search(r"\b(data|dataset|train|test|sample[_ -]?submission|schema|columns?|target|id|metric|read|inspect|list|search|get|query)\b", text):
|
|
942
|
-
return 6, "TML-Bench prior: establish data contract and submission schema before modeling"
|
|
943
|
-
if re.search(r"\b(validate|validation|split|cv|cross[-_ ]?validation|leakage|baseline|score|metric|check)\b", text):
|
|
944
|
-
return 6, "TML-Bench prior: honest validation and leakage checks before submission"
|
|
945
|
-
if re.search(r"\b(train|fit|model|pipeline|preprocess|feature|predict)\b", text):
|
|
946
|
-
return 4, "TML-Bench prior: build a reliable tabular baseline before complex ensembling"
|
|
947
|
-
if re.search(r"\b(submit|submission|save|write|export|finish|answer|final)\b", text):
|
|
948
|
-
return 4, "TML-Bench prior: produce and validate a schema-compatible submission artifact"
|
|
949
|
-
elif profile == "pi-bench":
|
|
950
|
-
if re.search(r"\b(profile|user|history|message|file|workspace|app|context|state|read|inspect|list|search|get|query)\b", text):
|
|
951
|
-
return 6, "Pi-Bench prior: establish personal/workspace/app context before proactive action"
|
|
952
|
-
if re.search(r"\b(intent|implicit|hidden|latent|need|preference|constraint|policy|privacy|permission|clarif(?:y|ication)|ask)\b", text):
|
|
953
|
-
return 6, "Pi-Bench prior: resolve hidden intent, privacy, and permission uncertainty"
|
|
954
|
-
if re.search(r"\b(tool|action|schedule|send|update|create|modify|book|message|email|calendar|file)\b", text):
|
|
955
|
-
return 4, "Pi-Bench prior: take reversible proactive action only after context is grounded"
|
|
956
|
-
if re.search(r"\b(verify|confirm|observe|check|finish|answer|final|done)\b", text):
|
|
957
|
-
return 4, "Pi-Bench prior: verify observable completion and communicate concise outcome"
|
|
958
|
-
else:
|
|
959
|
-
if re.search(r"\b(observe|read|search|list|get|lookup|inspect|query)\b", text):
|
|
960
|
-
return 4, "generic prior: inspect available state before irreversible actions"
|
|
961
|
-
return 0, ""
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
def _shortlist_item(
|
|
965
|
-
doc: dict[str, Any],
|
|
966
|
-
score: float,
|
|
967
|
-
reasons: list[str],
|
|
968
|
-
schema_keys: list[str],
|
|
969
|
-
required_keys: list[str],
|
|
970
|
-
required_hints: list[dict[str, str]],
|
|
971
|
-
) -> dict[str, Any]:
|
|
972
|
-
return {
|
|
973
|
-
"name": str(doc.get("name") or ""),
|
|
974
|
-
"score": round(score, 2),
|
|
975
|
-
"reason": "; ".join(reasons[:4]) or "available action",
|
|
976
|
-
"argument_keys": schema_keys[:12],
|
|
977
|
-
"required_argument_keys": required_keys[:12],
|
|
978
|
-
"available_required_hints": required_hints[:8],
|
|
979
|
-
"is_finish": bool(doc.get("is_finish", False)),
|
|
980
|
-
"is_message": bool(doc.get("is_message", False)),
|
|
981
|
-
}
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
def extract_action_payload(text: str) -> ActionPayload | None:
|
|
985
|
-
"""Return the last valid action payload from cawdex output.
|
|
986
|
-
|
|
987
|
-
Supported shapes:
|
|
988
|
-
{"name": "finish", "arguments": {"answer": "..."}}
|
|
989
|
-
{"action": "finish", "arguments": {"answer": "..."}}
|
|
990
|
-
{"action": {"name": "finish", "arguments": {"answer": "..."}}}
|
|
991
|
-
"""
|
|
992
|
-
|
|
993
|
-
for candidate in reversed(_json_candidates(text)):
|
|
994
|
-
payload = _coerce_action_payload(candidate)
|
|
995
|
-
if payload is not None:
|
|
996
|
-
return payload
|
|
997
|
-
return None
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
def _json_candidates(text: str) -> list[Any]:
|
|
1001
|
-
candidates: list[Any] = []
|
|
1002
|
-
|
|
1003
|
-
for block in re.findall(r"```(?:json|JSON)?\s*(.*?)```", text or "", flags=re.DOTALL):
|
|
1004
|
-
value = _parse_json(block.strip())
|
|
1005
|
-
if value is not None:
|
|
1006
|
-
candidates.append(value)
|
|
1007
|
-
|
|
1008
|
-
marker_re = re.compile(r"cawdex-exgentic action JSON\s*:\s*(\{.*?\})\s*$", re.IGNORECASE | re.DOTALL)
|
|
1009
|
-
marker = marker_re.search(text or "")
|
|
1010
|
-
if marker:
|
|
1011
|
-
value = _parse_json(marker.group(1))
|
|
1012
|
-
if value is not None:
|
|
1013
|
-
candidates.append(value)
|
|
1014
|
-
|
|
1015
|
-
decoder = json.JSONDecoder()
|
|
1016
|
-
for match in re.finditer(r"\{", text or ""):
|
|
1017
|
-
try:
|
|
1018
|
-
value, _ = decoder.raw_decode(text[match.start() :])
|
|
1019
|
-
except Exception:
|
|
1020
|
-
continue
|
|
1021
|
-
candidates.append(value)
|
|
1022
|
-
|
|
1023
|
-
return candidates
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
def _parse_json(text: str) -> Any | None:
|
|
1027
|
-
try:
|
|
1028
|
-
return json.loads(text)
|
|
1029
|
-
except Exception:
|
|
1030
|
-
return None
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
def _coerce_action_payload(value: Any) -> ActionPayload | None:
|
|
1034
|
-
if not isinstance(value, dict):
|
|
1035
|
-
return None
|
|
1036
|
-
|
|
1037
|
-
nested = value.get("action")
|
|
1038
|
-
if isinstance(nested, dict):
|
|
1039
|
-
nested_args = nested.get("arguments")
|
|
1040
|
-
if nested_args is None:
|
|
1041
|
-
nested_args = nested.get("args")
|
|
1042
|
-
value = {
|
|
1043
|
-
"name": nested.get("name") or nested.get("action") or nested.get("tool"),
|
|
1044
|
-
"arguments": nested_args,
|
|
1045
|
-
}
|
|
1046
|
-
|
|
1047
|
-
name = value.get("name") or value.get("action") or value.get("tool")
|
|
1048
|
-
if not isinstance(name, str) or not name.strip():
|
|
1049
|
-
return None
|
|
1050
|
-
|
|
1051
|
-
arguments = value.get("arguments")
|
|
1052
|
-
if arguments is None:
|
|
1053
|
-
arguments = value.get("args")
|
|
1054
|
-
if arguments is None:
|
|
1055
|
-
arguments = value.get("action_input")
|
|
1056
|
-
if arguments is None:
|
|
1057
|
-
arguments = {}
|
|
1058
|
-
if not isinstance(arguments, dict):
|
|
1059
|
-
arguments = {"value": arguments}
|
|
1060
|
-
|
|
1061
|
-
return ActionPayload(name=name.strip(), arguments=arguments)
|
|
1
|
+
"""Stdlib helpers for the cawdex Exgentic adapter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from difflib import SequenceMatcher
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
SECRET_REPLACEMENTS = [
|
|
13
|
+
(re.compile(r"sk-or-v1-[A-Za-z0-9_-]+"), "sk-or-v1-[REDACTED]"),
|
|
14
|
+
(re.compile(r"sk-[A-Za-z0-9_-]{16,}"), "sk-[REDACTED]"),
|
|
15
|
+
(re.compile(r"hf_[A-Za-z0-9]{16,}"), "hf_[REDACTED]"),
|
|
16
|
+
(re.compile(r"KGAT_[A-Za-z0-9]{16,}"), "KGAT_[REDACTED]"),
|
|
17
|
+
(re.compile(r"npm_[A-Za-z0-9]{16,}"), "npm_[REDACTED]"),
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
STOPWORDS = {
|
|
21
|
+
"about",
|
|
22
|
+
"after",
|
|
23
|
+
"again",
|
|
24
|
+
"also",
|
|
25
|
+
"and",
|
|
26
|
+
"any",
|
|
27
|
+
"are",
|
|
28
|
+
"available",
|
|
29
|
+
"been",
|
|
30
|
+
"before",
|
|
31
|
+
"being",
|
|
32
|
+
"can",
|
|
33
|
+
"context",
|
|
34
|
+
"could",
|
|
35
|
+
"current",
|
|
36
|
+
"does",
|
|
37
|
+
"for",
|
|
38
|
+
"from",
|
|
39
|
+
"has",
|
|
40
|
+
"have",
|
|
41
|
+
"into",
|
|
42
|
+
"latest",
|
|
43
|
+
"need",
|
|
44
|
+
"needs",
|
|
45
|
+
"not",
|
|
46
|
+
"observation",
|
|
47
|
+
"only",
|
|
48
|
+
"requested",
|
|
49
|
+
"should",
|
|
50
|
+
"task",
|
|
51
|
+
"that",
|
|
52
|
+
"the",
|
|
53
|
+
"then",
|
|
54
|
+
"this",
|
|
55
|
+
"use",
|
|
56
|
+
"user",
|
|
57
|
+
"with",
|
|
58
|
+
"you",
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass(frozen=True)
|
|
63
|
+
class ActionPayload:
|
|
64
|
+
"""Machine-readable action selected by cawdex."""
|
|
65
|
+
|
|
66
|
+
name: str
|
|
67
|
+
arguments: dict[str, Any]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass(frozen=True)
|
|
71
|
+
class ActionRepairResult:
|
|
72
|
+
"""Deterministic repair result for benchmark action JSON."""
|
|
73
|
+
|
|
74
|
+
payload: ActionPayload
|
|
75
|
+
changed: bool
|
|
76
|
+
diagnostics: dict[str, Any]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def redact(value: Any) -> str:
|
|
80
|
+
text = str(value or "")
|
|
81
|
+
for pattern, replacement in SECRET_REPLACEMENTS:
|
|
82
|
+
text = pattern.sub(replacement, text)
|
|
83
|
+
return text
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def truncate(value: Any, limit: int = 80000) -> str:
|
|
87
|
+
text = redact(value)
|
|
88
|
+
if len(text) <= limit:
|
|
89
|
+
return text
|
|
90
|
+
omitted = len(text) - limit
|
|
91
|
+
return text[:limit] + f"\n...[truncated {omitted} chars]"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def json_dumps(value: Any, *, limit: int = 80000) -> str:
|
|
95
|
+
try:
|
|
96
|
+
text = json.dumps(value, ensure_ascii=False, indent=2, sort_keys=True, default=str)
|
|
97
|
+
except Exception:
|
|
98
|
+
text = str(value)
|
|
99
|
+
return truncate(text, limit=limit)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def fold_exgentic_history(
|
|
103
|
+
history: list[dict[str, Any]],
|
|
104
|
+
*,
|
|
105
|
+
profile: str = "generic",
|
|
106
|
+
max_items: int = 16,
|
|
107
|
+
item_limit: int = 1200,
|
|
108
|
+
) -> dict[str, Any]:
|
|
109
|
+
"""Build a compact task-relevant ledger for long Exgentic sessions.
|
|
110
|
+
|
|
111
|
+
The adapter keeps the full raw history in memory. This folded view is what
|
|
112
|
+
goes back into the next model call, so noisy stdout does not crowd out the
|
|
113
|
+
latest app state, policy evidence, source evidence, or selected actions.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
observations: list[dict[str, Any]] = []
|
|
117
|
+
actions: list[dict[str, Any]] = []
|
|
118
|
+
diagnostics: list[dict[str, Any]] = []
|
|
119
|
+
action_counts: dict[str, int] = {}
|
|
120
|
+
|
|
121
|
+
for idx, item in enumerate(history or [], start=1):
|
|
122
|
+
role = str(item.get("role", ""))
|
|
123
|
+
if role == "observation":
|
|
124
|
+
observations.append(
|
|
125
|
+
{
|
|
126
|
+
"turn": idx,
|
|
127
|
+
"summary": truncate(json_dumps(item.get("content"), limit=item_limit), limit=item_limit),
|
|
128
|
+
}
|
|
129
|
+
)
|
|
130
|
+
elif role == "selected_action":
|
|
131
|
+
compact_actions = _compact_selected_actions(item.get("content"), item_limit=item_limit)
|
|
132
|
+
for action in compact_actions:
|
|
133
|
+
name = action.get("name") or "unknown"
|
|
134
|
+
action_counts[name] = action_counts.get(name, 0) + 1
|
|
135
|
+
actions.append({"turn": idx, "actions": compact_actions})
|
|
136
|
+
elif role == "cawdex":
|
|
137
|
+
diagnostic = _compact_cawdex_diagnostic(item, item_limit=item_limit)
|
|
138
|
+
if diagnostic is not None:
|
|
139
|
+
diagnostics.append({"turn": idx, **diagnostic})
|
|
140
|
+
elif role == "action_repair":
|
|
141
|
+
diagnostics.append(
|
|
142
|
+
{
|
|
143
|
+
"turn": idx,
|
|
144
|
+
"kind": "action_repair",
|
|
145
|
+
"evidence": truncate(json_dumps(item.get("content"), limit=item_limit), limit=item_limit),
|
|
146
|
+
}
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
latest_observation = observations[-1] if observations else None
|
|
150
|
+
latest_action = actions[-1] if actions else None
|
|
151
|
+
return {
|
|
152
|
+
"format": "cawdex-exgentic-folded-history-v1",
|
|
153
|
+
"profile": profile,
|
|
154
|
+
"turns_seen": len(history or []),
|
|
155
|
+
"latest_observation": latest_observation,
|
|
156
|
+
"latest_action": latest_action,
|
|
157
|
+
"no_effect_repeat_actions": _recent_no_effect_action_names(history or []),
|
|
158
|
+
"recent_observations": observations[-max_items:],
|
|
159
|
+
"recent_actions": actions[-max_items:],
|
|
160
|
+
"diagnostics": diagnostics[-max_items:],
|
|
161
|
+
"action_counts": action_counts,
|
|
162
|
+
"discipline": _folding_discipline(profile),
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def repair_exgentic_action_payload(
|
|
167
|
+
payload: ActionPayload,
|
|
168
|
+
action_docs: list[dict[str, Any]],
|
|
169
|
+
*,
|
|
170
|
+
argument_hints: Any = None,
|
|
171
|
+
) -> ActionRepairResult:
|
|
172
|
+
"""Repair near-miss action names and argument keys before ActionType build.
|
|
173
|
+
|
|
174
|
+
This is intentionally deterministic and conservative. It fixes common model
|
|
175
|
+
output drift such as camelCase action names, case-only mismatches, and
|
|
176
|
+
schema-key casing/separator mistakes, while leaving unresolved names intact
|
|
177
|
+
so the caller can still fail or fallback explicitly.
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
docs = [doc for doc in action_docs or [] if isinstance(doc, dict) and doc.get("name")]
|
|
181
|
+
matched_doc, match_reason, match_score = _resolve_action_doc(payload.name, docs)
|
|
182
|
+
repaired_name = str(matched_doc.get("name")) if matched_doc else payload.name
|
|
183
|
+
repaired_args, arg_diagnostics = _repair_action_arguments(
|
|
184
|
+
payload.arguments,
|
|
185
|
+
matched_doc.get("arguments_schema") if matched_doc else None,
|
|
186
|
+
argument_hints=argument_hints,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
changed = repaired_name != payload.name or repaired_args != payload.arguments
|
|
190
|
+
if matched_doc is None:
|
|
191
|
+
status = "unresolved_action_name"
|
|
192
|
+
elif changed:
|
|
193
|
+
status = "repaired"
|
|
194
|
+
else:
|
|
195
|
+
status = "unchanged"
|
|
196
|
+
|
|
197
|
+
diagnostics = {
|
|
198
|
+
"status": status,
|
|
199
|
+
"original_name": payload.name,
|
|
200
|
+
"repaired_name": repaired_name,
|
|
201
|
+
"name_match_reason": match_reason,
|
|
202
|
+
"name_match_score": round(match_score, 3),
|
|
203
|
+
**arg_diagnostics,
|
|
204
|
+
}
|
|
205
|
+
return ActionRepairResult(
|
|
206
|
+
payload=ActionPayload(name=repaired_name, arguments=repaired_args),
|
|
207
|
+
changed=changed,
|
|
208
|
+
diagnostics=diagnostics,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def fallback_exgentic_action_payload(
|
|
213
|
+
action_docs: list[dict[str, Any]],
|
|
214
|
+
*,
|
|
215
|
+
task: Any = None,
|
|
216
|
+
context: Any = None,
|
|
217
|
+
history: list[dict[str, Any]] | None = None,
|
|
218
|
+
profile: str = "generic",
|
|
219
|
+
reason: str = "no_valid_action_json",
|
|
220
|
+
) -> ActionRepairResult | None:
|
|
221
|
+
"""Select a conservative fallback action when the model emits no valid JSON.
|
|
222
|
+
|
|
223
|
+
The old adapter bias was finish/message first. That is dangerous for
|
|
224
|
+
multi-step benchmarks because a transient malformed response can become a
|
|
225
|
+
premature stop. This selector reuses the same shortlist and exact required
|
|
226
|
+
argument hints as the main prompt, preferring viable non-finish actions
|
|
227
|
+
while the latest observation is not completion-ready.
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
docs = [doc for doc in action_docs or [] if isinstance(doc, dict) and doc.get("name")]
|
|
231
|
+
if not docs:
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
history_items = history or []
|
|
235
|
+
latest_observation = _latest_history_content(history_items, "observation")
|
|
236
|
+
argument_hints = {
|
|
237
|
+
"latest_observation": latest_observation,
|
|
238
|
+
"context": context or {},
|
|
239
|
+
}
|
|
240
|
+
shortlist = shortlist_exgentic_actions(
|
|
241
|
+
docs,
|
|
242
|
+
task=task,
|
|
243
|
+
context=context,
|
|
244
|
+
history=history_items,
|
|
245
|
+
profile=profile,
|
|
246
|
+
)
|
|
247
|
+
completion_ready = bool(shortlist.get("completion_ready"))
|
|
248
|
+
no_effect_repeat_actions = [
|
|
249
|
+
str(name)
|
|
250
|
+
for name in shortlist.get("avoid_no_effect_repeat_actions") or []
|
|
251
|
+
if str(name)
|
|
252
|
+
]
|
|
253
|
+
candidate_names = _fallback_candidate_names(
|
|
254
|
+
shortlist,
|
|
255
|
+
docs,
|
|
256
|
+
completion_ready=completion_ready,
|
|
257
|
+
avoid_names=no_effect_repeat_actions,
|
|
258
|
+
)
|
|
259
|
+
skipped: list[dict[str, Any]] = []
|
|
260
|
+
|
|
261
|
+
for name in candidate_names:
|
|
262
|
+
doc = _doc_by_name(docs, name)
|
|
263
|
+
if doc is None:
|
|
264
|
+
continue
|
|
265
|
+
is_completion = bool(doc.get("is_finish") or doc.get("is_message"))
|
|
266
|
+
if not completion_ready and is_completion:
|
|
267
|
+
skipped.append({"name": name, "reason": "completion_not_ready"})
|
|
268
|
+
continue
|
|
269
|
+
repair = repair_exgentic_action_payload(
|
|
270
|
+
ActionPayload(name=name, arguments={}),
|
|
271
|
+
docs,
|
|
272
|
+
argument_hints=argument_hints,
|
|
273
|
+
)
|
|
274
|
+
missing = _missing_required_arguments(repair.payload.arguments, doc.get("arguments_schema"))
|
|
275
|
+
if missing and not (completion_ready and is_completion):
|
|
276
|
+
skipped.append({"name": name, "reason": "missing_required_arguments", "missing": missing})
|
|
277
|
+
continue
|
|
278
|
+
diagnostics = {
|
|
279
|
+
"status": "fallback_selected",
|
|
280
|
+
"fallback_reason": reason,
|
|
281
|
+
"selected_name": repair.payload.name,
|
|
282
|
+
"completion_ready": completion_ready,
|
|
283
|
+
"avoid_no_effect_repeat_actions": no_effect_repeat_actions,
|
|
284
|
+
"candidate_names": candidate_names[:12],
|
|
285
|
+
"skipped_candidates": skipped[:8],
|
|
286
|
+
"shortlist": shortlist,
|
|
287
|
+
"repair": repair.diagnostics,
|
|
288
|
+
}
|
|
289
|
+
return ActionRepairResult(payload=repair.payload, changed=True, diagnostics=diagnostics)
|
|
290
|
+
|
|
291
|
+
return None
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def shortlist_exgentic_actions(
|
|
295
|
+
action_docs: list[dict[str, Any]],
|
|
296
|
+
*,
|
|
297
|
+
task: Any = None,
|
|
298
|
+
context: Any = None,
|
|
299
|
+
history: list[dict[str, Any]] | None = None,
|
|
300
|
+
profile: str = "generic",
|
|
301
|
+
limit: int = 8,
|
|
302
|
+
) -> dict[str, Any]:
|
|
303
|
+
"""Rank available actions into a compact shortlist for the next step.
|
|
304
|
+
|
|
305
|
+
Exgentic still receives the full action schema list below this shortlist.
|
|
306
|
+
The shortlist is a deterministic scaffold: it narrows attention to likely
|
|
307
|
+
actions and finish/message timing without hiding benchmark capabilities.
|
|
308
|
+
"""
|
|
309
|
+
|
|
310
|
+
docs = [doc for doc in action_docs or [] if isinstance(doc, dict) and doc.get("name")]
|
|
311
|
+
safe_limit = max(1, min(16, int(limit or 8)))
|
|
312
|
+
latest_observation = _latest_history_content(history or [], "observation")
|
|
313
|
+
latest_observation_text = json_dumps(latest_observation, limit=6000) if latest_observation is not None else ""
|
|
314
|
+
argument_hints = {
|
|
315
|
+
"latest_observation": latest_observation,
|
|
316
|
+
"context": context or {},
|
|
317
|
+
}
|
|
318
|
+
target_text = " ".join(
|
|
319
|
+
[
|
|
320
|
+
str(task or ""),
|
|
321
|
+
json_dumps(context or {}, limit=6000),
|
|
322
|
+
latest_observation_text,
|
|
323
|
+
]
|
|
324
|
+
).lower()
|
|
325
|
+
tokens = _keyword_tokens(target_text)
|
|
326
|
+
completion_ready = _completion_ready(latest_observation_text)
|
|
327
|
+
latest_action_name = _latest_selected_action_name(history or [])
|
|
328
|
+
has_recent_error = _has_recent_action_error(history or [])
|
|
329
|
+
no_effect_repeat_actions = _recent_no_effect_action_names(history or [])
|
|
330
|
+
no_effect_repeat_set = {name.lower() for name in no_effect_repeat_actions}
|
|
331
|
+
|
|
332
|
+
scored: list[tuple[float, str, dict[str, Any], list[str], list[str], list[str], list[dict[str, str]]]] = []
|
|
333
|
+
for doc in docs:
|
|
334
|
+
name = str(doc.get("name") or "")
|
|
335
|
+
action_text = _action_doc_text(doc)
|
|
336
|
+
schema = doc.get("arguments_schema")
|
|
337
|
+
schema_keys = _schema_property_keys(schema)
|
|
338
|
+
required_keys = _schema_required_keys(schema)
|
|
339
|
+
required_hints = _required_argument_hints(required_keys, argument_hints)
|
|
340
|
+
score = 0.0
|
|
341
|
+
reasons: list[str] = []
|
|
342
|
+
|
|
343
|
+
token_hits = [token for token in tokens if token in action_text][:6]
|
|
344
|
+
if token_hits:
|
|
345
|
+
score += min(12, len(token_hits) * 2)
|
|
346
|
+
reasons.append(f"matches task/observation tokens: {', '.join(token_hits)}")
|
|
347
|
+
|
|
348
|
+
schema_hits = [key for key in schema_keys if key.lower() in target_text][:6]
|
|
349
|
+
if schema_hits:
|
|
350
|
+
score += min(10, len(schema_hits) * 2)
|
|
351
|
+
reasons.append(f"schema keys appear in current state: {', '.join(schema_hits)}")
|
|
352
|
+
|
|
353
|
+
if required_hints:
|
|
354
|
+
score += min(8, len(required_hints) * 3)
|
|
355
|
+
reasons.append(
|
|
356
|
+
"required args available in current state: "
|
|
357
|
+
+ ", ".join(item["key"] for item in required_hints[:4])
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
prior_score, prior_reason = _profile_action_prior(profile, name, action_text)
|
|
361
|
+
if prior_score:
|
|
362
|
+
score += prior_score
|
|
363
|
+
reasons.append(prior_reason)
|
|
364
|
+
|
|
365
|
+
name_tokens = [token for token in _keyword_tokens(name.replace("_", " ")) if token not in {"action"}]
|
|
366
|
+
if name_tokens and all(token in latest_observation_text.lower() for token in name_tokens):
|
|
367
|
+
score += 8
|
|
368
|
+
reasons.append("action name matches explicit latest-observation cue")
|
|
369
|
+
|
|
370
|
+
is_completion = bool(doc.get("is_finish") or doc.get("is_message"))
|
|
371
|
+
if is_completion:
|
|
372
|
+
if completion_ready:
|
|
373
|
+
score += 8
|
|
374
|
+
reasons.append("latest observation suggests completion is ready")
|
|
375
|
+
else:
|
|
376
|
+
score -= 7
|
|
377
|
+
reasons.append("defer finish/message until benchmark-visible completion evidence")
|
|
378
|
+
|
|
379
|
+
if latest_action_name and name.lower() == latest_action_name.lower():
|
|
380
|
+
score -= 2
|
|
381
|
+
reasons.append("same as previous selected action")
|
|
382
|
+
if has_recent_error:
|
|
383
|
+
score -= 4
|
|
384
|
+
reasons.append("avoid repeating after recent action/schema error")
|
|
385
|
+
if not completion_ready and name.lower() in no_effect_repeat_set:
|
|
386
|
+
score -= 10
|
|
387
|
+
reasons.append("avoid repeating no-effect action; latest observation did not change")
|
|
388
|
+
|
|
389
|
+
scored.append((score, name.lower(), doc, reasons, schema_keys, required_keys, required_hints))
|
|
390
|
+
|
|
391
|
+
scored.sort(key=lambda item: (-item[0], item[1]))
|
|
392
|
+
shortlisted = [
|
|
393
|
+
_shortlist_item(doc, score, reasons, schema_keys, required_keys, required_hints)
|
|
394
|
+
for score, _name, doc, reasons, schema_keys, required_keys, required_hints in scored[:safe_limit]
|
|
395
|
+
]
|
|
396
|
+
shortlisted_names = {str(item.get("name", "")).lower() for item in shortlisted}
|
|
397
|
+
deferred_completion = [
|
|
398
|
+
str(doc.get("name"))
|
|
399
|
+
for doc in docs
|
|
400
|
+
if (doc.get("is_finish") or doc.get("is_message"))
|
|
401
|
+
and str(doc.get("name", "")).lower() not in shortlisted_names
|
|
402
|
+
and not completion_ready
|
|
403
|
+
]
|
|
404
|
+
|
|
405
|
+
return {
|
|
406
|
+
"format": "cawdex-exgentic-action-shortlist-v1",
|
|
407
|
+
"profile": profile,
|
|
408
|
+
"action_count": len(docs),
|
|
409
|
+
"shortlist_limit": safe_limit,
|
|
410
|
+
"completion_ready": completion_ready,
|
|
411
|
+
"avoid_no_effect_repeat_actions": no_effect_repeat_actions,
|
|
412
|
+
"shortlisted_actions": shortlisted,
|
|
413
|
+
"deferred_completion_actions": deferred_completion,
|
|
414
|
+
"discipline": "Prefer shortlisted actions when they fit the latest observation; use full schemas below if the current state clearly requires a non-shortlisted action. If avoid_no_effect_repeat_actions is non-empty, change strategy unless no other viable action has its required arguments.",
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def safe_id(value: Any, default: str = "session") -> str:
|
|
419
|
+
raw = str(value or default)
|
|
420
|
+
safe = re.sub(r"[^A-Za-z0-9_.-]+", "-", raw).strip("-")
|
|
421
|
+
return safe or default
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def _compact_selected_actions(value: Any, *, item_limit: int) -> list[dict[str, Any]]:
|
|
425
|
+
actions = value if isinstance(value, list) else [value]
|
|
426
|
+
compact: list[dict[str, Any]] = []
|
|
427
|
+
for action in actions:
|
|
428
|
+
if not isinstance(action, dict):
|
|
429
|
+
compact.append({"name": "unknown", "summary": truncate(action, limit=item_limit)})
|
|
430
|
+
continue
|
|
431
|
+
raw_args = action.get("arguments", {})
|
|
432
|
+
args = raw_args if isinstance(raw_args, dict) else {"value": raw_args}
|
|
433
|
+
compact.append(
|
|
434
|
+
{
|
|
435
|
+
"name": str(action.get("name") or "unknown"),
|
|
436
|
+
"argument_keys": sorted(str(key) for key in args.keys()),
|
|
437
|
+
"arguments": truncate(json_dumps(args, limit=item_limit), limit=item_limit),
|
|
438
|
+
}
|
|
439
|
+
)
|
|
440
|
+
return compact
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def _compact_cawdex_diagnostic(item: dict[str, Any], *, item_limit: int) -> dict[str, Any] | None:
|
|
444
|
+
returncode = item.get("returncode")
|
|
445
|
+
stderr = str(item.get("stderr") or "")
|
|
446
|
+
stdout = str(item.get("stdout") or "")
|
|
447
|
+
text = "\n".join(part for part in [stderr, stdout] if part)
|
|
448
|
+
if returncode in (None, 0) and not re.search(
|
|
449
|
+
r"\b(error|invalid|unknown action|schema|malformed|permission|timed out|timeout|failed)\b",
|
|
450
|
+
text,
|
|
451
|
+
flags=re.IGNORECASE,
|
|
452
|
+
):
|
|
453
|
+
return None
|
|
454
|
+
return {
|
|
455
|
+
"returncode": returncode,
|
|
456
|
+
"evidence": truncate(text, limit=item_limit),
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def _folding_discipline(profile: str) -> str:
|
|
461
|
+
if profile == "appworld":
|
|
462
|
+
return "Use latest_observation as authoritative app/API state; preserve IDs, dates, permissions, and record integrity."
|
|
463
|
+
if profile == "browsecomp":
|
|
464
|
+
return "Carry forward verified sources and unresolved search facets; do not treat snippets or stale single-source claims as final evidence."
|
|
465
|
+
if profile == "tau2":
|
|
466
|
+
return "Carry forward policy constraints, customer intent, tool results, and pending confirmations before selecting the next action."
|
|
467
|
+
if profile == "terminalworld":
|
|
468
|
+
return "TerminalWorld discipline: carry forward instruction.md/task artifact requirements, generated files/services, command outputs, verifier status, and any solve.sh/reference-solution avoidance before selecting the next action."
|
|
469
|
+
if profile == "webdevbench":
|
|
470
|
+
return "Carry forward canary requirements, frontend/backend state, integration evidence, and production/security gaps before selecting the next action."
|
|
471
|
+
if profile == "swe-cycle":
|
|
472
|
+
return "Carry forward lifecycle phase, bare-repo environment setup state, implementation requirements, generated/selected tests, judge commands, and unresolved phase gaps before selecting the next action."
|
|
473
|
+
if profile == "swe-ci":
|
|
474
|
+
return "Carry forward current/target commits, test gaps, inferred requirements, touched files, verifier deltas, and unresolved regressions before selecting the next action."
|
|
475
|
+
if profile == "swe-prbench":
|
|
476
|
+
return "Carry forward PR title/description, changed files, diff hunks, suspected findings, evidence gaps, and context-expansion reasons before selecting the next action."
|
|
477
|
+
if profile == "tml-bench":
|
|
478
|
+
return "Carry forward train/test/sample submission paths, ID/target columns, metric, validation split, leakage checks, model artifacts, submission path, and validity evidence before selecting the next action."
|
|
479
|
+
if profile == "pi-bench":
|
|
480
|
+
return "Carry forward user profile, current request, message/file/app context, available domain tools, hidden-intent hypotheses, clarification state, privacy risk, selected actions, and observable completion evidence before selecting the next action."
|
|
481
|
+
return "Use the folded ledger as orientation, then rely on the latest observation and available action schemas for the next action."
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def _resolve_action_doc(
|
|
485
|
+
name: str,
|
|
486
|
+
docs: list[dict[str, Any]],
|
|
487
|
+
) -> tuple[dict[str, Any] | None, str, float]:
|
|
488
|
+
raw = str(name or "")
|
|
489
|
+
if not raw:
|
|
490
|
+
return None, "empty", 0.0
|
|
491
|
+
|
|
492
|
+
for doc in docs:
|
|
493
|
+
candidate = str(doc.get("name") or "")
|
|
494
|
+
if candidate == raw:
|
|
495
|
+
return doc, "exact", 1.0
|
|
496
|
+
|
|
497
|
+
lowered = raw.lower()
|
|
498
|
+
for doc in docs:
|
|
499
|
+
candidate = str(doc.get("name") or "")
|
|
500
|
+
if candidate.lower() == lowered:
|
|
501
|
+
return doc, "case_insensitive", 1.0
|
|
502
|
+
|
|
503
|
+
normalized = _normalized_identifier(raw)
|
|
504
|
+
for doc in docs:
|
|
505
|
+
candidate = str(doc.get("name") or "")
|
|
506
|
+
if _normalized_identifier(candidate) == normalized:
|
|
507
|
+
return doc, "normalized_identifier", 1.0
|
|
508
|
+
|
|
509
|
+
best_doc: dict[str, Any] | None = None
|
|
510
|
+
best_score = 0.0
|
|
511
|
+
second_score = 0.0
|
|
512
|
+
for doc in docs:
|
|
513
|
+
candidate = str(doc.get("name") or "")
|
|
514
|
+
candidate_norm = _normalized_identifier(candidate)
|
|
515
|
+
score = SequenceMatcher(None, normalized, candidate_norm).ratio() if normalized and candidate_norm else 0.0
|
|
516
|
+
if normalized and candidate_norm and (normalized in candidate_norm or candidate_norm in normalized):
|
|
517
|
+
score = max(score, 0.82)
|
|
518
|
+
if score > best_score:
|
|
519
|
+
second_score = best_score
|
|
520
|
+
best_score = score
|
|
521
|
+
best_doc = doc
|
|
522
|
+
elif score > second_score:
|
|
523
|
+
second_score = score
|
|
524
|
+
|
|
525
|
+
if best_doc is not None and best_score >= 0.82 and best_score - second_score >= 0.04:
|
|
526
|
+
return best_doc, "fuzzy_identifier", best_score
|
|
527
|
+
return None, "unresolved", best_score
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def _repair_action_arguments(
|
|
531
|
+
arguments: dict[str, Any],
|
|
532
|
+
schema: Any,
|
|
533
|
+
*,
|
|
534
|
+
argument_hints: Any = None,
|
|
535
|
+
) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
536
|
+
args = dict(arguments or {})
|
|
537
|
+
schema_keys = _schema_property_keys(schema)
|
|
538
|
+
if not schema_keys:
|
|
539
|
+
return args, {
|
|
540
|
+
"argument_key_repairs": [],
|
|
541
|
+
"dropped_argument_keys": [],
|
|
542
|
+
"filled_required_arguments": [],
|
|
543
|
+
"schema_keys": [],
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
key_by_normalized = {_normalized_identifier(key): key for key in schema_keys}
|
|
547
|
+
repaired: dict[str, Any] = {}
|
|
548
|
+
key_repairs: list[dict[str, str]] = []
|
|
549
|
+
dropped: list[str] = []
|
|
550
|
+
|
|
551
|
+
for key, value in args.items():
|
|
552
|
+
text_key = str(key)
|
|
553
|
+
if text_key in schema_keys:
|
|
554
|
+
repaired[text_key] = value
|
|
555
|
+
continue
|
|
556
|
+
canonical = key_by_normalized.get(_normalized_identifier(text_key))
|
|
557
|
+
if canonical is not None:
|
|
558
|
+
repaired[canonical] = value
|
|
559
|
+
key_repairs.append({"from": text_key, "to": canonical})
|
|
560
|
+
else:
|
|
561
|
+
dropped.append(text_key)
|
|
562
|
+
|
|
563
|
+
filled = _fill_required_arguments(repaired, schema, argument_hints)
|
|
564
|
+
return repaired, {
|
|
565
|
+
"argument_key_repairs": key_repairs,
|
|
566
|
+
"dropped_argument_keys": dropped,
|
|
567
|
+
"filled_required_arguments": filled,
|
|
568
|
+
"schema_keys": schema_keys[:24],
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def _fill_required_arguments(
|
|
573
|
+
repaired: dict[str, Any],
|
|
574
|
+
schema: Any,
|
|
575
|
+
argument_hints: Any,
|
|
576
|
+
) -> list[dict[str, str]]:
|
|
577
|
+
if not isinstance(schema, dict):
|
|
578
|
+
return []
|
|
579
|
+
required = [str(key) for key in schema.get("required") or [] if str(key)]
|
|
580
|
+
if not required:
|
|
581
|
+
return []
|
|
582
|
+
hint_index = _argument_hint_index(argument_hints)
|
|
583
|
+
filled: list[dict[str, str]] = []
|
|
584
|
+
existing = {_normalized_identifier(key) for key in repaired.keys()}
|
|
585
|
+
|
|
586
|
+
for key in required:
|
|
587
|
+
norm = _normalized_identifier(key)
|
|
588
|
+
if not norm or norm in existing:
|
|
589
|
+
continue
|
|
590
|
+
match = hint_index.get(norm)
|
|
591
|
+
if match is None:
|
|
592
|
+
continue
|
|
593
|
+
value, source = match
|
|
594
|
+
if value is None:
|
|
595
|
+
continue
|
|
596
|
+
repaired[key] = value
|
|
597
|
+
existing.add(norm)
|
|
598
|
+
filled.append({"key": key, "source": source})
|
|
599
|
+
return filled
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def _argument_hint_index(value: Any) -> dict[str, tuple[Any, str]]:
|
|
603
|
+
index: dict[str, tuple[Any, str]] = {}
|
|
604
|
+
|
|
605
|
+
def visit(item: Any, path: str) -> None:
|
|
606
|
+
if isinstance(item, dict):
|
|
607
|
+
for key, child in item.items():
|
|
608
|
+
key_text = str(key)
|
|
609
|
+
child_path = f"{path}.{key_text}" if path else key_text
|
|
610
|
+
norm = _normalized_identifier(key_text)
|
|
611
|
+
if norm and norm not in index and _hint_value_is_usable(child):
|
|
612
|
+
index[norm] = (child, child_path)
|
|
613
|
+
visit(child, child_path)
|
|
614
|
+
elif isinstance(item, list):
|
|
615
|
+
for idx, child in enumerate(item[:50]):
|
|
616
|
+
visit(child, f"{path}[{idx}]" if path else f"[{idx}]")
|
|
617
|
+
|
|
618
|
+
visit(value, "")
|
|
619
|
+
return index
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
def _hint_value_is_usable(value: Any) -> bool:
|
|
623
|
+
if value is None:
|
|
624
|
+
return False
|
|
625
|
+
if isinstance(value, str):
|
|
626
|
+
return bool(value.strip())
|
|
627
|
+
if isinstance(value, (bool, int, float)):
|
|
628
|
+
return True
|
|
629
|
+
if isinstance(value, (dict, list)):
|
|
630
|
+
return bool(value)
|
|
631
|
+
return True
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
def _normalized_identifier(value: Any) -> str:
|
|
635
|
+
return re.sub(r"[^a-z0-9]+", "", str(value or "").lower())
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def _latest_history_content(history: list[dict[str, Any]], role: str) -> Any | None:
|
|
639
|
+
for item in reversed(history or []):
|
|
640
|
+
if isinstance(item, dict) and item.get("role") == role:
|
|
641
|
+
return item.get("content")
|
|
642
|
+
return None
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def _latest_selected_action_name(history: list[dict[str, Any]]) -> str | None:
|
|
646
|
+
content = _latest_history_content(history, "selected_action")
|
|
647
|
+
actions = content if isinstance(content, list) else [content]
|
|
648
|
+
for action in actions:
|
|
649
|
+
if isinstance(action, dict) and action.get("name"):
|
|
650
|
+
return str(action.get("name"))
|
|
651
|
+
return None
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
def _recent_no_effect_action_names(history: list[dict[str, Any]]) -> list[str]:
|
|
655
|
+
latest_observation_idx: int | None = None
|
|
656
|
+
previous_observation_idx: int | None = None
|
|
657
|
+
for idx in range(len(history or []) - 1, -1, -1):
|
|
658
|
+
item = history[idx]
|
|
659
|
+
if not isinstance(item, dict) or item.get("role") != "observation":
|
|
660
|
+
continue
|
|
661
|
+
if latest_observation_idx is None:
|
|
662
|
+
latest_observation_idx = idx
|
|
663
|
+
else:
|
|
664
|
+
previous_observation_idx = idx
|
|
665
|
+
break
|
|
666
|
+
|
|
667
|
+
if latest_observation_idx is None or previous_observation_idx is None:
|
|
668
|
+
return []
|
|
669
|
+
|
|
670
|
+
latest = history[latest_observation_idx].get("content")
|
|
671
|
+
previous = history[previous_observation_idx].get("content")
|
|
672
|
+
latest_text = json_dumps(latest, limit=4000).lower()
|
|
673
|
+
unchanged = (
|
|
674
|
+
_observation_fingerprint(latest) == _observation_fingerprint(previous)
|
|
675
|
+
or bool(
|
|
676
|
+
re.search(
|
|
677
|
+
r"\b(no change|no changes|unchanged|same state|nothing changed|no effect|still pending|"
|
|
678
|
+
r"did not (?:change|update|move|complete|resolve)|not (?:changed|updated|completed|resolved))\b",
|
|
679
|
+
latest_text,
|
|
680
|
+
)
|
|
681
|
+
)
|
|
682
|
+
)
|
|
683
|
+
if not unchanged:
|
|
684
|
+
return []
|
|
685
|
+
|
|
686
|
+
names: list[str] = []
|
|
687
|
+
for item in history[previous_observation_idx + 1:latest_observation_idx]:
|
|
688
|
+
if not isinstance(item, dict) or item.get("role") != "selected_action":
|
|
689
|
+
continue
|
|
690
|
+
for name in _selected_action_names(item.get("content")):
|
|
691
|
+
push_unique(names, name)
|
|
692
|
+
return names
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
def _selected_action_names(value: Any) -> list[str]:
|
|
696
|
+
actions = value if isinstance(value, list) else [value]
|
|
697
|
+
names: list[str] = []
|
|
698
|
+
for action in actions:
|
|
699
|
+
if isinstance(action, dict) and action.get("name"):
|
|
700
|
+
push_unique(names, str(action.get("name")))
|
|
701
|
+
return names
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def _observation_fingerprint(value: Any) -> str:
|
|
705
|
+
text = json_dumps(value, limit=8000).lower()
|
|
706
|
+
return re.sub(r"\s+", " ", text).strip()
|
|
707
|
+
|
|
708
|
+
|
|
709
|
+
def _has_recent_action_error(history: list[dict[str, Any]]) -> bool:
|
|
710
|
+
for item in reversed((history or [])[-4:]):
|
|
711
|
+
if not isinstance(item, dict) or item.get("role") != "cawdex":
|
|
712
|
+
continue
|
|
713
|
+
diagnostic = _compact_cawdex_diagnostic(item, item_limit=600)
|
|
714
|
+
if diagnostic is not None:
|
|
715
|
+
return True
|
|
716
|
+
return False
|
|
717
|
+
|
|
718
|
+
|
|
719
|
+
def _keyword_tokens(text: str) -> list[str]:
|
|
720
|
+
seen: set[str] = set()
|
|
721
|
+
tokens: list[str] = []
|
|
722
|
+
for raw in re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,}", text or ""):
|
|
723
|
+
token = raw.lower().strip("-_")
|
|
724
|
+
if token in STOPWORDS or len(token) < 3 or token in seen:
|
|
725
|
+
continue
|
|
726
|
+
seen.add(token)
|
|
727
|
+
tokens.append(token)
|
|
728
|
+
if len(tokens) >= 80:
|
|
729
|
+
break
|
|
730
|
+
return tokens
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
def _completion_ready(latest_observation_text: str) -> bool:
|
|
734
|
+
text = (latest_observation_text or "").lower()
|
|
735
|
+
if not text:
|
|
736
|
+
return False
|
|
737
|
+
if re.search(r"\b(pending|missing|need|needs|required|error|failed|invalid|not complete|unresolved)\b", text):
|
|
738
|
+
return False
|
|
739
|
+
return bool(re.search(r"\b(done|complete|completed|success|succeeded|confirmed|final answer|resolved)\b", text))
|
|
740
|
+
|
|
741
|
+
|
|
742
|
+
def _action_doc_text(doc: dict[str, Any]) -> str:
|
|
743
|
+
parts = [
|
|
744
|
+
str(doc.get("name") or ""),
|
|
745
|
+
str(doc.get("description") or ""),
|
|
746
|
+
" ".join(_schema_property_keys(doc.get("arguments_schema"))),
|
|
747
|
+
json_dumps(doc.get("arguments_schema") or {}, limit=4000),
|
|
748
|
+
]
|
|
749
|
+
return " ".join(parts).lower()
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
def _schema_property_keys(schema: Any) -> list[str]:
|
|
753
|
+
if not isinstance(schema, dict):
|
|
754
|
+
return []
|
|
755
|
+
keys: list[str] = []
|
|
756
|
+
properties = schema.get("properties")
|
|
757
|
+
if isinstance(properties, dict):
|
|
758
|
+
keys.extend(str(key) for key in properties.keys())
|
|
759
|
+
for nested_key in ("$defs", "definitions"):
|
|
760
|
+
nested = schema.get(nested_key)
|
|
761
|
+
if isinstance(nested, dict):
|
|
762
|
+
for value in nested.values():
|
|
763
|
+
keys.extend(_schema_property_keys(value))
|
|
764
|
+
seen: set[str] = set()
|
|
765
|
+
deduped: list[str] = []
|
|
766
|
+
for key in keys:
|
|
767
|
+
lowered = key.lower()
|
|
768
|
+
if lowered in seen:
|
|
769
|
+
continue
|
|
770
|
+
seen.add(lowered)
|
|
771
|
+
deduped.append(key)
|
|
772
|
+
return deduped
|
|
773
|
+
|
|
774
|
+
|
|
775
|
+
def _schema_required_keys(schema: Any) -> list[str]:
|
|
776
|
+
if not isinstance(schema, dict):
|
|
777
|
+
return []
|
|
778
|
+
required = schema.get("required")
|
|
779
|
+
if not isinstance(required, list):
|
|
780
|
+
return []
|
|
781
|
+
seen: set[str] = set()
|
|
782
|
+
keys: list[str] = []
|
|
783
|
+
for key in required:
|
|
784
|
+
text = str(key or "").strip()
|
|
785
|
+
lowered = text.lower()
|
|
786
|
+
if not text or lowered in seen:
|
|
787
|
+
continue
|
|
788
|
+
seen.add(lowered)
|
|
789
|
+
keys.append(text)
|
|
790
|
+
return keys
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
def _required_argument_hints(required_keys: list[str], argument_hints: Any) -> list[dict[str, str]]:
|
|
794
|
+
if not required_keys:
|
|
795
|
+
return []
|
|
796
|
+
hint_index = _argument_hint_index(argument_hints)
|
|
797
|
+
hints: list[dict[str, str]] = []
|
|
798
|
+
for key in required_keys:
|
|
799
|
+
match = hint_index.get(_normalized_identifier(key))
|
|
800
|
+
if match is None:
|
|
801
|
+
continue
|
|
802
|
+
value, source = match
|
|
803
|
+
if not _hint_value_is_usable(value):
|
|
804
|
+
continue
|
|
805
|
+
hints.append(
|
|
806
|
+
{
|
|
807
|
+
"key": key,
|
|
808
|
+
"source": source,
|
|
809
|
+
"value_preview": truncate(json_dumps(value, limit=360), limit=360),
|
|
810
|
+
}
|
|
811
|
+
)
|
|
812
|
+
return hints
|
|
813
|
+
|
|
814
|
+
|
|
815
|
+
def _missing_required_arguments(arguments: dict[str, Any], schema: Any) -> list[str]:
|
|
816
|
+
required = _schema_required_keys(schema)
|
|
817
|
+
if not required:
|
|
818
|
+
return []
|
|
819
|
+
present = {
|
|
820
|
+
_normalized_identifier(key)
|
|
821
|
+
for key, value in (arguments or {}).items()
|
|
822
|
+
if _hint_value_is_usable(value)
|
|
823
|
+
}
|
|
824
|
+
return [key for key in required if _normalized_identifier(key) not in present]
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
def _fallback_candidate_names(
|
|
828
|
+
shortlist: dict[str, Any],
|
|
829
|
+
docs: list[dict[str, Any]],
|
|
830
|
+
*,
|
|
831
|
+
completion_ready: bool,
|
|
832
|
+
avoid_names: list[str] | None = None,
|
|
833
|
+
) -> list[str]:
|
|
834
|
+
names: list[str] = []
|
|
835
|
+
delayed: list[str] = []
|
|
836
|
+
avoid = {str(name).lower() for name in avoid_names or [] if str(name)}
|
|
837
|
+
|
|
838
|
+
def add_candidate(value: Any) -> None:
|
|
839
|
+
name = str(value or "")
|
|
840
|
+
if not name:
|
|
841
|
+
return
|
|
842
|
+
if not completion_ready and name.lower() in avoid:
|
|
843
|
+
push_unique(delayed, name)
|
|
844
|
+
return
|
|
845
|
+
push_unique(names, name)
|
|
846
|
+
|
|
847
|
+
if completion_ready:
|
|
848
|
+
for doc in docs:
|
|
849
|
+
if (doc.get("is_finish") or doc.get("is_message")) and doc.get("name"):
|
|
850
|
+
add_candidate(doc.get("name"))
|
|
851
|
+
for doc in docs:
|
|
852
|
+
name = str(doc.get("name") or "")
|
|
853
|
+
if name.lower() in {"finish", "final", "done"}:
|
|
854
|
+
add_candidate(name)
|
|
855
|
+
|
|
856
|
+
for item in shortlist.get("shortlisted_actions") or []:
|
|
857
|
+
if isinstance(item, dict) and item.get("name"):
|
|
858
|
+
add_candidate(item.get("name"))
|
|
859
|
+
|
|
860
|
+
if not completion_ready:
|
|
861
|
+
for doc in docs:
|
|
862
|
+
if not (doc.get("is_finish") or doc.get("is_message")) and doc.get("name"):
|
|
863
|
+
add_candidate(doc.get("name"))
|
|
864
|
+
|
|
865
|
+
for doc in docs:
|
|
866
|
+
if (completion_ready or len(docs) == 1) and doc.get("name"):
|
|
867
|
+
add_candidate(doc.get("name"))
|
|
868
|
+
for name in delayed:
|
|
869
|
+
push_unique(names, name)
|
|
870
|
+
return names
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
def _doc_by_name(docs: list[dict[str, Any]], name: str) -> dict[str, Any] | None:
|
|
874
|
+
for doc in docs:
|
|
875
|
+
if str(doc.get("name") or "") == name:
|
|
876
|
+
return doc
|
|
877
|
+
lowered = str(name or "").lower()
|
|
878
|
+
for doc in docs:
|
|
879
|
+
if str(doc.get("name") or "").lower() == lowered:
|
|
880
|
+
return doc
|
|
881
|
+
return None
|
|
882
|
+
|
|
883
|
+
|
|
884
|
+
def push_unique(values: list[str], value: str) -> None:
|
|
885
|
+
if value not in values:
|
|
886
|
+
values.append(value)
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
def _profile_action_prior(profile: str, name: str, action_text: str) -> tuple[float, str]:
|
|
890
|
+
text = f"{name} {action_text}".lower()
|
|
891
|
+
if profile == "appworld":
|
|
892
|
+
if re.search(r"\b(get|lookup|list|search|find|query|read|fetch|load|inspect)\b", text):
|
|
893
|
+
return 5, "AppWorld prior: inspect app/API state before mutating records"
|
|
894
|
+
if re.search(r"\b(create|update|set|delete|cancel|submit|send)\b", text):
|
|
895
|
+
return 3, "AppWorld prior: likely state-changing app action"
|
|
896
|
+
elif profile == "browsecomp":
|
|
897
|
+
if re.search(r"\b(search|query|browse|web|open|read|fetch|source|cite|visit)\b", text):
|
|
898
|
+
return 6, "BrowseComp prior: gather and verify source evidence"
|
|
899
|
+
if re.search(r"\b(answer|final|finish|message|respond)\b", text):
|
|
900
|
+
return 2, "BrowseComp prior: final answer action when evidence is sufficient"
|
|
901
|
+
elif profile == "tau2":
|
|
902
|
+
if re.search(r"\b(policy|lookup|search|get|list|read|check|verify|order|customer|account|ticket)\b", text):
|
|
903
|
+
return 5, "tau2 prior: check policy/customer/tool state before commitments"
|
|
904
|
+
if re.search(r"\b(update|create|cancel|refund|transfer|confirm|submit|send)\b", text):
|
|
905
|
+
return 3, "tau2 prior: policy-supported customer-service action"
|
|
906
|
+
elif profile == "webdevbench":
|
|
907
|
+
if re.search(r"\b(requirements?|canar(?:y|ies)|spec|product|plan|architecture|read|inspect|list|search|get|query)\b", text):
|
|
908
|
+
return 6, "WebDevBench prior: preserve product/canary requirements before building"
|
|
909
|
+
if re.search(r"\b(e2e|integration|api|browser|playwright|cypress|security|audit|build|deploy|migration|load|concurrency|health)\b", text):
|
|
910
|
+
return 5, "WebDevBench prior: verify full-stack, production, or security evidence"
|
|
911
|
+
if re.search(r"\b(create|update|modify|deploy|submit|send)\b", text):
|
|
912
|
+
return 3, "WebDevBench prior: app creation/modification action"
|
|
913
|
+
elif profile == "swe-cycle":
|
|
914
|
+
if re.search(r"\b(fullcycle|envsetup|codeimpl|testgen|phase|requirements?|issue|read|inspect|list|search|get|query|run_script|parsing_script|selected_test_files_to_run|environment_setup_commit|before_repo_set_cmd|image_name)\b", text):
|
|
915
|
+
return 6, "SWE-Cycle prior: identify lifecycle phase, harness fields, and issue requirements"
|
|
916
|
+
if re.search(r"\b(setup|install|bootstrap|dependencies|env|environment|import|collect|discover|build)\b", text):
|
|
917
|
+
return 6, "SWE-Cycle prior: reconstruct bare-repo environment before code/test edits"
|
|
918
|
+
if re.search(r"\b(testgen|test|tests|pytest|jest|vitest|selected|judge|swe[-_ ]?judge|static|dynamic|verify|check)\b", text):
|
|
919
|
+
return 5, "SWE-Cycle prior: generate/validate tests and preserve judge evidence"
|
|
920
|
+
if re.search(r"\b(codeimpl|modify|patch|edit|update|change|implement|repair)\b", text):
|
|
921
|
+
return 3, "SWE-Cycle prior: implementation action after lifecycle context is established"
|
|
922
|
+
elif profile == "swe-ci":
|
|
923
|
+
if re.search(r"\b(current|target|commit|sha|history|log|diff|status|read|inspect|list|search|get|query)\b", text):
|
|
924
|
+
return 6, "SWE-CI prior: establish current/target commits, test gaps, and repo evolution context"
|
|
925
|
+
if re.search(r"\b(run[_ -]?tests?|test|ci|verify|check|tox|nox|act|pytest|unittest)\b", text):
|
|
926
|
+
return 6, "SWE-CI prior: run the CI/test loop and preserve verifier deltas"
|
|
927
|
+
if re.search(r"\b(requirements?|define[_ -]?requirements?|test[_ -]?gap|failure|attribution|plan|locali[sz]e)\b", text):
|
|
928
|
+
return 5, "SWE-CI prior: derive requirements from CI/test gaps before modifying code"
|
|
929
|
+
if re.search(r"\b(modify[_ -]?code|patch|edit|update|change|implement|repair)\b", text):
|
|
930
|
+
return 3, "SWE-CI prior: incremental requirement-backed code modification"
|
|
931
|
+
elif profile == "swe-prbench":
|
|
932
|
+
if re.search(r"\b(pr|pull|request|diff|patch|hunk|changed|files?|review|comment|read|inspect|list|search|get|query)\b", text):
|
|
933
|
+
return 6, "SWE-PRBench prior: inspect PR metadata and changed diff before broad context"
|
|
934
|
+
if re.search(r"\b(test|verify|repro|run|check|typecheck|lint|unit)\b", text):
|
|
935
|
+
return 4, "SWE-PRBench prior: verify suspected review findings when feasible"
|
|
936
|
+
if re.search(r"\b(finish|message|answer|respond|final|review)\b", text):
|
|
937
|
+
return 3, "SWE-PRBench prior: deliver severity-rated review findings once evidence is sufficient"
|
|
938
|
+
if re.search(r"\b(edit|patch|modify|update|write|apply)\b", text):
|
|
939
|
+
return -3, "SWE-PRBench prior: defer code edits unless the review task explicitly asks for patches"
|
|
940
|
+
elif profile == "tml-bench":
|
|
941
|
+
if re.search(r"\b(data|dataset|train|test|sample[_ -]?submission|schema|columns?|target|id|metric|read|inspect|list|search|get|query)\b", text):
|
|
942
|
+
return 6, "TML-Bench prior: establish data contract and submission schema before modeling"
|
|
943
|
+
if re.search(r"\b(validate|validation|split|cv|cross[-_ ]?validation|leakage|baseline|score|metric|check)\b", text):
|
|
944
|
+
return 6, "TML-Bench prior: honest validation and leakage checks before submission"
|
|
945
|
+
if re.search(r"\b(train|fit|model|pipeline|preprocess|feature|predict)\b", text):
|
|
946
|
+
return 4, "TML-Bench prior: build a reliable tabular baseline before complex ensembling"
|
|
947
|
+
if re.search(r"\b(submit|submission|save|write|export|finish|answer|final)\b", text):
|
|
948
|
+
return 4, "TML-Bench prior: produce and validate a schema-compatible submission artifact"
|
|
949
|
+
elif profile == "pi-bench":
|
|
950
|
+
if re.search(r"\b(profile|user|history|message|file|workspace|app|context|state|read|inspect|list|search|get|query)\b", text):
|
|
951
|
+
return 6, "Pi-Bench prior: establish personal/workspace/app context before proactive action"
|
|
952
|
+
if re.search(r"\b(intent|implicit|hidden|latent|need|preference|constraint|policy|privacy|permission|clarif(?:y|ication)|ask)\b", text):
|
|
953
|
+
return 6, "Pi-Bench prior: resolve hidden intent, privacy, and permission uncertainty"
|
|
954
|
+
if re.search(r"\b(tool|action|schedule|send|update|create|modify|book|message|email|calendar|file)\b", text):
|
|
955
|
+
return 4, "Pi-Bench prior: take reversible proactive action only after context is grounded"
|
|
956
|
+
if re.search(r"\b(verify|confirm|observe|check|finish|answer|final|done)\b", text):
|
|
957
|
+
return 4, "Pi-Bench prior: verify observable completion and communicate concise outcome"
|
|
958
|
+
else:
|
|
959
|
+
if re.search(r"\b(observe|read|search|list|get|lookup|inspect|query)\b", text):
|
|
960
|
+
return 4, "generic prior: inspect available state before irreversible actions"
|
|
961
|
+
return 0, ""
|
|
962
|
+
|
|
963
|
+
|
|
964
|
+
def _shortlist_item(
|
|
965
|
+
doc: dict[str, Any],
|
|
966
|
+
score: float,
|
|
967
|
+
reasons: list[str],
|
|
968
|
+
schema_keys: list[str],
|
|
969
|
+
required_keys: list[str],
|
|
970
|
+
required_hints: list[dict[str, str]],
|
|
971
|
+
) -> dict[str, Any]:
|
|
972
|
+
return {
|
|
973
|
+
"name": str(doc.get("name") or ""),
|
|
974
|
+
"score": round(score, 2),
|
|
975
|
+
"reason": "; ".join(reasons[:4]) or "available action",
|
|
976
|
+
"argument_keys": schema_keys[:12],
|
|
977
|
+
"required_argument_keys": required_keys[:12],
|
|
978
|
+
"available_required_hints": required_hints[:8],
|
|
979
|
+
"is_finish": bool(doc.get("is_finish", False)),
|
|
980
|
+
"is_message": bool(doc.get("is_message", False)),
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
|
|
984
|
+
def extract_action_payload(text: str) -> ActionPayload | None:
|
|
985
|
+
"""Return the last valid action payload from cawdex output.
|
|
986
|
+
|
|
987
|
+
Supported shapes:
|
|
988
|
+
{"name": "finish", "arguments": {"answer": "..."}}
|
|
989
|
+
{"action": "finish", "arguments": {"answer": "..."}}
|
|
990
|
+
{"action": {"name": "finish", "arguments": {"answer": "..."}}}
|
|
991
|
+
"""
|
|
992
|
+
|
|
993
|
+
for candidate in reversed(_json_candidates(text)):
|
|
994
|
+
payload = _coerce_action_payload(candidate)
|
|
995
|
+
if payload is not None:
|
|
996
|
+
return payload
|
|
997
|
+
return None
|
|
998
|
+
|
|
999
|
+
|
|
1000
|
+
def _json_candidates(text: str) -> list[Any]:
|
|
1001
|
+
candidates: list[Any] = []
|
|
1002
|
+
|
|
1003
|
+
for block in re.findall(r"```(?:json|JSON)?\s*(.*?)```", text or "", flags=re.DOTALL):
|
|
1004
|
+
value = _parse_json(block.strip())
|
|
1005
|
+
if value is not None:
|
|
1006
|
+
candidates.append(value)
|
|
1007
|
+
|
|
1008
|
+
marker_re = re.compile(r"cawdex-exgentic action JSON\s*:\s*(\{.*?\})\s*$", re.IGNORECASE | re.DOTALL)
|
|
1009
|
+
marker = marker_re.search(text or "")
|
|
1010
|
+
if marker:
|
|
1011
|
+
value = _parse_json(marker.group(1))
|
|
1012
|
+
if value is not None:
|
|
1013
|
+
candidates.append(value)
|
|
1014
|
+
|
|
1015
|
+
decoder = json.JSONDecoder()
|
|
1016
|
+
for match in re.finditer(r"\{", text or ""):
|
|
1017
|
+
try:
|
|
1018
|
+
value, _ = decoder.raw_decode(text[match.start() :])
|
|
1019
|
+
except Exception:
|
|
1020
|
+
continue
|
|
1021
|
+
candidates.append(value)
|
|
1022
|
+
|
|
1023
|
+
return candidates
|
|
1024
|
+
|
|
1025
|
+
|
|
1026
|
+
def _parse_json(text: str) -> Any | None:
|
|
1027
|
+
try:
|
|
1028
|
+
return json.loads(text)
|
|
1029
|
+
except Exception:
|
|
1030
|
+
return None
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
def _coerce_action_payload(value: Any) -> ActionPayload | None:
|
|
1034
|
+
if not isinstance(value, dict):
|
|
1035
|
+
return None
|
|
1036
|
+
|
|
1037
|
+
nested = value.get("action")
|
|
1038
|
+
if isinstance(nested, dict):
|
|
1039
|
+
nested_args = nested.get("arguments")
|
|
1040
|
+
if nested_args is None:
|
|
1041
|
+
nested_args = nested.get("args")
|
|
1042
|
+
value = {
|
|
1043
|
+
"name": nested.get("name") or nested.get("action") or nested.get("tool"),
|
|
1044
|
+
"arguments": nested_args,
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
name = value.get("name") or value.get("action") or value.get("tool")
|
|
1048
|
+
if not isinstance(name, str) or not name.strip():
|
|
1049
|
+
return None
|
|
1050
|
+
|
|
1051
|
+
arguments = value.get("arguments")
|
|
1052
|
+
if arguments is None:
|
|
1053
|
+
arguments = value.get("args")
|
|
1054
|
+
if arguments is None:
|
|
1055
|
+
arguments = value.get("action_input")
|
|
1056
|
+
if arguments is None:
|
|
1057
|
+
arguments = {}
|
|
1058
|
+
if not isinstance(arguments, dict):
|
|
1059
|
+
arguments = {"value": arguments}
|
|
1060
|
+
|
|
1061
|
+
return ActionPayload(name=name.strip(), arguments=arguments)
|