cawdex 1.35.75 → 1.35.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/README.md +5 -5
  2. package/bin/anycode.js +2 -2
  3. package/bin/cawdex.js +408 -408
  4. package/bin/ecc-hooks.cjs +11 -11
  5. package/dist/agents-md.d.ts +31 -0
  6. package/dist/agents-md.js +340 -0
  7. package/dist/agents-md.js.map +1 -0
  8. package/dist/agents.js +1424 -1424
  9. package/dist/api.d.ts +1 -0
  10. package/dist/api.js +19 -14
  11. package/dist/api.js.map +1 -1
  12. package/dist/autonomous-loops.js +287 -287
  13. package/dist/benchmark-repos.d.ts +31 -0
  14. package/dist/benchmark-repos.js +234 -8
  15. package/dist/benchmark-repos.js.map +1 -1
  16. package/dist/command-palette.js +4 -2
  17. package/dist/command-palette.js.map +1 -1
  18. package/dist/compaction.js +8 -8
  19. package/dist/config.js +51 -36
  20. package/dist/config.js.map +1 -1
  21. package/dist/content-engine.js +543 -543
  22. package/dist/context-brief.d.ts +4 -0
  23. package/dist/context-brief.js +230 -0
  24. package/dist/context-brief.js.map +1 -0
  25. package/dist/cost-tracker.d.ts +33 -14
  26. package/dist/cost-tracker.js +81 -19
  27. package/dist/cost-tracker.js.map +1 -1
  28. package/dist/coverage.js +39 -39
  29. package/dist/docs-sync.js +98 -98
  30. package/dist/evaluation.js +452 -452
  31. package/dist/fixed-footer.d.ts +7 -1
  32. package/dist/fixed-footer.js +92 -18
  33. package/dist/fixed-footer.js.map +1 -1
  34. package/dist/git-workflow.js +49 -49
  35. package/dist/index.d.ts +2 -0
  36. package/dist/index.js +161 -63
  37. package/dist/index.js.map +1 -1
  38. package/dist/live-queue.js +1 -1
  39. package/dist/live-queue.js.map +1 -1
  40. package/dist/model-aliases.d.ts +37 -0
  41. package/dist/model-aliases.js +203 -0
  42. package/dist/model-aliases.js.map +1 -0
  43. package/dist/orchestration.js +15 -15
  44. package/dist/permissions.d.ts +6 -0
  45. package/dist/permissions.js +53 -0
  46. package/dist/permissions.js.map +1 -1
  47. package/dist/pm2-manager.js +26 -26
  48. package/dist/query.d.ts +0 -1
  49. package/dist/query.js +74 -39
  50. package/dist/query.js.map +1 -1
  51. package/dist/refactor.js +87 -87
  52. package/dist/repo-command.js +7 -1
  53. package/dist/repo-command.js.map +1 -1
  54. package/dist/search-first.js +92 -92
  55. package/dist/skill-create.js +100 -100
  56. package/dist/stitch.js +1 -1
  57. package/dist/system-prompt.d.ts +2 -1
  58. package/dist/system-prompt.js +10 -5
  59. package/dist/system-prompt.js.map +1 -1
  60. package/dist/tools/github-repo-digest.d.ts +1 -1
  61. package/dist/tools/github-repo-digest.js +38 -6
  62. package/dist/tools/github-repo-digest.js.map +1 -1
  63. package/dist/types.d.ts +3 -0
  64. package/dist/types.js.map +1 -1
  65. package/dist/verification.js +55 -55
  66. package/package.json +1 -1
  67. package/resources/__init__.py +1 -1
  68. package/resources/exgentic/cawdex_agent/README.md +114 -114
  69. package/resources/exgentic/cawdex_agent/__init__.py +5 -5
  70. package/resources/exgentic/cawdex_agent/agent.py +605 -605
  71. package/resources/exgentic/cawdex_agent/requirements.txt +2 -2
  72. package/resources/exgentic/cawdex_agent/setup.sh +21 -21
  73. package/resources/exgentic/cawdex_agent/utils.py +1061 -1061
  74. package/resources/hal/cawdex_agent/README.md +24 -24
  75. package/resources/hal/cawdex_agent/__init__.py +1 -1
  76. package/resources/hal/cawdex_agent/main.py +550 -550
  77. package/resources/hal/cawdex_agent/requirements.txt +2 -2
  78. package/resources/kbench/cawdex_agent/README.md +107 -107
  79. package/resources/kbench/cawdex_agent/adapter.manifest.json +19 -19
  80. package/resources/kbench/cawdex_agent/runner.mjs +753 -753
  81. package/resources/open_agent_leaderboard/cawdex-agent-card.md +119 -119
  82. package/resources/terminal_bench/__init__.py +1 -1
  83. package/resources/terminal_bench/cawdex_agent.py +174 -174
  84. package/resources/terminal_bench/setup.sh +121 -121
@@ -1,1061 +1,1061 @@
1
- """Stdlib helpers for the cawdex Exgentic adapter."""
2
-
3
- from __future__ import annotations
4
-
5
- import json
6
- import re
7
- from dataclasses import dataclass
8
- from difflib import SequenceMatcher
9
- from typing import Any
10
-
11
-
12
- SECRET_REPLACEMENTS = [
13
- (re.compile(r"sk-or-v1-[A-Za-z0-9_-]+"), "sk-or-v1-[REDACTED]"),
14
- (re.compile(r"sk-[A-Za-z0-9_-]{16,}"), "sk-[REDACTED]"),
15
- (re.compile(r"hf_[A-Za-z0-9]{16,}"), "hf_[REDACTED]"),
16
- (re.compile(r"KGAT_[A-Za-z0-9]{16,}"), "KGAT_[REDACTED]"),
17
- (re.compile(r"npm_[A-Za-z0-9]{16,}"), "npm_[REDACTED]"),
18
- ]
19
-
20
- STOPWORDS = {
21
- "about",
22
- "after",
23
- "again",
24
- "also",
25
- "and",
26
- "any",
27
- "are",
28
- "available",
29
- "been",
30
- "before",
31
- "being",
32
- "can",
33
- "context",
34
- "could",
35
- "current",
36
- "does",
37
- "for",
38
- "from",
39
- "has",
40
- "have",
41
- "into",
42
- "latest",
43
- "need",
44
- "needs",
45
- "not",
46
- "observation",
47
- "only",
48
- "requested",
49
- "should",
50
- "task",
51
- "that",
52
- "the",
53
- "then",
54
- "this",
55
- "use",
56
- "user",
57
- "with",
58
- "you",
59
- }
60
-
61
-
62
- @dataclass(frozen=True)
63
- class ActionPayload:
64
- """Machine-readable action selected by cawdex."""
65
-
66
- name: str
67
- arguments: dict[str, Any]
68
-
69
-
70
- @dataclass(frozen=True)
71
- class ActionRepairResult:
72
- """Deterministic repair result for benchmark action JSON."""
73
-
74
- payload: ActionPayload
75
- changed: bool
76
- diagnostics: dict[str, Any]
77
-
78
-
79
- def redact(value: Any) -> str:
80
- text = str(value or "")
81
- for pattern, replacement in SECRET_REPLACEMENTS:
82
- text = pattern.sub(replacement, text)
83
- return text
84
-
85
-
86
- def truncate(value: Any, limit: int = 80000) -> str:
87
- text = redact(value)
88
- if len(text) <= limit:
89
- return text
90
- omitted = len(text) - limit
91
- return text[:limit] + f"\n...[truncated {omitted} chars]"
92
-
93
-
94
- def json_dumps(value: Any, *, limit: int = 80000) -> str:
95
- try:
96
- text = json.dumps(value, ensure_ascii=False, indent=2, sort_keys=True, default=str)
97
- except Exception:
98
- text = str(value)
99
- return truncate(text, limit=limit)
100
-
101
-
102
- def fold_exgentic_history(
103
- history: list[dict[str, Any]],
104
- *,
105
- profile: str = "generic",
106
- max_items: int = 16,
107
- item_limit: int = 1200,
108
- ) -> dict[str, Any]:
109
- """Build a compact task-relevant ledger for long Exgentic sessions.
110
-
111
- The adapter keeps the full raw history in memory. This folded view is what
112
- goes back into the next model call, so noisy stdout does not crowd out the
113
- latest app state, policy evidence, source evidence, or selected actions.
114
- """
115
-
116
- observations: list[dict[str, Any]] = []
117
- actions: list[dict[str, Any]] = []
118
- diagnostics: list[dict[str, Any]] = []
119
- action_counts: dict[str, int] = {}
120
-
121
- for idx, item in enumerate(history or [], start=1):
122
- role = str(item.get("role", ""))
123
- if role == "observation":
124
- observations.append(
125
- {
126
- "turn": idx,
127
- "summary": truncate(json_dumps(item.get("content"), limit=item_limit), limit=item_limit),
128
- }
129
- )
130
- elif role == "selected_action":
131
- compact_actions = _compact_selected_actions(item.get("content"), item_limit=item_limit)
132
- for action in compact_actions:
133
- name = action.get("name") or "unknown"
134
- action_counts[name] = action_counts.get(name, 0) + 1
135
- actions.append({"turn": idx, "actions": compact_actions})
136
- elif role == "cawdex":
137
- diagnostic = _compact_cawdex_diagnostic(item, item_limit=item_limit)
138
- if diagnostic is not None:
139
- diagnostics.append({"turn": idx, **diagnostic})
140
- elif role == "action_repair":
141
- diagnostics.append(
142
- {
143
- "turn": idx,
144
- "kind": "action_repair",
145
- "evidence": truncate(json_dumps(item.get("content"), limit=item_limit), limit=item_limit),
146
- }
147
- )
148
-
149
- latest_observation = observations[-1] if observations else None
150
- latest_action = actions[-1] if actions else None
151
- return {
152
- "format": "cawdex-exgentic-folded-history-v1",
153
- "profile": profile,
154
- "turns_seen": len(history or []),
155
- "latest_observation": latest_observation,
156
- "latest_action": latest_action,
157
- "no_effect_repeat_actions": _recent_no_effect_action_names(history or []),
158
- "recent_observations": observations[-max_items:],
159
- "recent_actions": actions[-max_items:],
160
- "diagnostics": diagnostics[-max_items:],
161
- "action_counts": action_counts,
162
- "discipline": _folding_discipline(profile),
163
- }
164
-
165
-
166
- def repair_exgentic_action_payload(
167
- payload: ActionPayload,
168
- action_docs: list[dict[str, Any]],
169
- *,
170
- argument_hints: Any = None,
171
- ) -> ActionRepairResult:
172
- """Repair near-miss action names and argument keys before ActionType build.
173
-
174
- This is intentionally deterministic and conservative. It fixes common model
175
- output drift such as camelCase action names, case-only mismatches, and
176
- schema-key casing/separator mistakes, while leaving unresolved names intact
177
- so the caller can still fail or fallback explicitly.
178
- """
179
-
180
- docs = [doc for doc in action_docs or [] if isinstance(doc, dict) and doc.get("name")]
181
- matched_doc, match_reason, match_score = _resolve_action_doc(payload.name, docs)
182
- repaired_name = str(matched_doc.get("name")) if matched_doc else payload.name
183
- repaired_args, arg_diagnostics = _repair_action_arguments(
184
- payload.arguments,
185
- matched_doc.get("arguments_schema") if matched_doc else None,
186
- argument_hints=argument_hints,
187
- )
188
-
189
- changed = repaired_name != payload.name or repaired_args != payload.arguments
190
- if matched_doc is None:
191
- status = "unresolved_action_name"
192
- elif changed:
193
- status = "repaired"
194
- else:
195
- status = "unchanged"
196
-
197
- diagnostics = {
198
- "status": status,
199
- "original_name": payload.name,
200
- "repaired_name": repaired_name,
201
- "name_match_reason": match_reason,
202
- "name_match_score": round(match_score, 3),
203
- **arg_diagnostics,
204
- }
205
- return ActionRepairResult(
206
- payload=ActionPayload(name=repaired_name, arguments=repaired_args),
207
- changed=changed,
208
- diagnostics=diagnostics,
209
- )
210
-
211
-
212
- def fallback_exgentic_action_payload(
213
- action_docs: list[dict[str, Any]],
214
- *,
215
- task: Any = None,
216
- context: Any = None,
217
- history: list[dict[str, Any]] | None = None,
218
- profile: str = "generic",
219
- reason: str = "no_valid_action_json",
220
- ) -> ActionRepairResult | None:
221
- """Select a conservative fallback action when the model emits no valid JSON.
222
-
223
- The old adapter bias was finish/message first. That is dangerous for
224
- multi-step benchmarks because a transient malformed response can become a
225
- premature stop. This selector reuses the same shortlist and exact required
226
- argument hints as the main prompt, preferring viable non-finish actions
227
- while the latest observation is not completion-ready.
228
- """
229
-
230
- docs = [doc for doc in action_docs or [] if isinstance(doc, dict) and doc.get("name")]
231
- if not docs:
232
- return None
233
-
234
- history_items = history or []
235
- latest_observation = _latest_history_content(history_items, "observation")
236
- argument_hints = {
237
- "latest_observation": latest_observation,
238
- "context": context or {},
239
- }
240
- shortlist = shortlist_exgentic_actions(
241
- docs,
242
- task=task,
243
- context=context,
244
- history=history_items,
245
- profile=profile,
246
- )
247
- completion_ready = bool(shortlist.get("completion_ready"))
248
- no_effect_repeat_actions = [
249
- str(name)
250
- for name in shortlist.get("avoid_no_effect_repeat_actions") or []
251
- if str(name)
252
- ]
253
- candidate_names = _fallback_candidate_names(
254
- shortlist,
255
- docs,
256
- completion_ready=completion_ready,
257
- avoid_names=no_effect_repeat_actions,
258
- )
259
- skipped: list[dict[str, Any]] = []
260
-
261
- for name in candidate_names:
262
- doc = _doc_by_name(docs, name)
263
- if doc is None:
264
- continue
265
- is_completion = bool(doc.get("is_finish") or doc.get("is_message"))
266
- if not completion_ready and is_completion:
267
- skipped.append({"name": name, "reason": "completion_not_ready"})
268
- continue
269
- repair = repair_exgentic_action_payload(
270
- ActionPayload(name=name, arguments={}),
271
- docs,
272
- argument_hints=argument_hints,
273
- )
274
- missing = _missing_required_arguments(repair.payload.arguments, doc.get("arguments_schema"))
275
- if missing and not (completion_ready and is_completion):
276
- skipped.append({"name": name, "reason": "missing_required_arguments", "missing": missing})
277
- continue
278
- diagnostics = {
279
- "status": "fallback_selected",
280
- "fallback_reason": reason,
281
- "selected_name": repair.payload.name,
282
- "completion_ready": completion_ready,
283
- "avoid_no_effect_repeat_actions": no_effect_repeat_actions,
284
- "candidate_names": candidate_names[:12],
285
- "skipped_candidates": skipped[:8],
286
- "shortlist": shortlist,
287
- "repair": repair.diagnostics,
288
- }
289
- return ActionRepairResult(payload=repair.payload, changed=True, diagnostics=diagnostics)
290
-
291
- return None
292
-
293
-
294
- def shortlist_exgentic_actions(
295
- action_docs: list[dict[str, Any]],
296
- *,
297
- task: Any = None,
298
- context: Any = None,
299
- history: list[dict[str, Any]] | None = None,
300
- profile: str = "generic",
301
- limit: int = 8,
302
- ) -> dict[str, Any]:
303
- """Rank available actions into a compact shortlist for the next step.
304
-
305
- Exgentic still receives the full action schema list below this shortlist.
306
- The shortlist is a deterministic scaffold: it narrows attention to likely
307
- actions and finish/message timing without hiding benchmark capabilities.
308
- """
309
-
310
- docs = [doc for doc in action_docs or [] if isinstance(doc, dict) and doc.get("name")]
311
- safe_limit = max(1, min(16, int(limit or 8)))
312
- latest_observation = _latest_history_content(history or [], "observation")
313
- latest_observation_text = json_dumps(latest_observation, limit=6000) if latest_observation is not None else ""
314
- argument_hints = {
315
- "latest_observation": latest_observation,
316
- "context": context or {},
317
- }
318
- target_text = " ".join(
319
- [
320
- str(task or ""),
321
- json_dumps(context or {}, limit=6000),
322
- latest_observation_text,
323
- ]
324
- ).lower()
325
- tokens = _keyword_tokens(target_text)
326
- completion_ready = _completion_ready(latest_observation_text)
327
- latest_action_name = _latest_selected_action_name(history or [])
328
- has_recent_error = _has_recent_action_error(history or [])
329
- no_effect_repeat_actions = _recent_no_effect_action_names(history or [])
330
- no_effect_repeat_set = {name.lower() for name in no_effect_repeat_actions}
331
-
332
- scored: list[tuple[float, str, dict[str, Any], list[str], list[str], list[str], list[dict[str, str]]]] = []
333
- for doc in docs:
334
- name = str(doc.get("name") or "")
335
- action_text = _action_doc_text(doc)
336
- schema = doc.get("arguments_schema")
337
- schema_keys = _schema_property_keys(schema)
338
- required_keys = _schema_required_keys(schema)
339
- required_hints = _required_argument_hints(required_keys, argument_hints)
340
- score = 0.0
341
- reasons: list[str] = []
342
-
343
- token_hits = [token for token in tokens if token in action_text][:6]
344
- if token_hits:
345
- score += min(12, len(token_hits) * 2)
346
- reasons.append(f"matches task/observation tokens: {', '.join(token_hits)}")
347
-
348
- schema_hits = [key for key in schema_keys if key.lower() in target_text][:6]
349
- if schema_hits:
350
- score += min(10, len(schema_hits) * 2)
351
- reasons.append(f"schema keys appear in current state: {', '.join(schema_hits)}")
352
-
353
- if required_hints:
354
- score += min(8, len(required_hints) * 3)
355
- reasons.append(
356
- "required args available in current state: "
357
- + ", ".join(item["key"] for item in required_hints[:4])
358
- )
359
-
360
- prior_score, prior_reason = _profile_action_prior(profile, name, action_text)
361
- if prior_score:
362
- score += prior_score
363
- reasons.append(prior_reason)
364
-
365
- name_tokens = [token for token in _keyword_tokens(name.replace("_", " ")) if token not in {"action"}]
366
- if name_tokens and all(token in latest_observation_text.lower() for token in name_tokens):
367
- score += 8
368
- reasons.append("action name matches explicit latest-observation cue")
369
-
370
- is_completion = bool(doc.get("is_finish") or doc.get("is_message"))
371
- if is_completion:
372
- if completion_ready:
373
- score += 8
374
- reasons.append("latest observation suggests completion is ready")
375
- else:
376
- score -= 7
377
- reasons.append("defer finish/message until benchmark-visible completion evidence")
378
-
379
- if latest_action_name and name.lower() == latest_action_name.lower():
380
- score -= 2
381
- reasons.append("same as previous selected action")
382
- if has_recent_error:
383
- score -= 4
384
- reasons.append("avoid repeating after recent action/schema error")
385
- if not completion_ready and name.lower() in no_effect_repeat_set:
386
- score -= 10
387
- reasons.append("avoid repeating no-effect action; latest observation did not change")
388
-
389
- scored.append((score, name.lower(), doc, reasons, schema_keys, required_keys, required_hints))
390
-
391
- scored.sort(key=lambda item: (-item[0], item[1]))
392
- shortlisted = [
393
- _shortlist_item(doc, score, reasons, schema_keys, required_keys, required_hints)
394
- for score, _name, doc, reasons, schema_keys, required_keys, required_hints in scored[:safe_limit]
395
- ]
396
- shortlisted_names = {str(item.get("name", "")).lower() for item in shortlisted}
397
- deferred_completion = [
398
- str(doc.get("name"))
399
- for doc in docs
400
- if (doc.get("is_finish") or doc.get("is_message"))
401
- and str(doc.get("name", "")).lower() not in shortlisted_names
402
- and not completion_ready
403
- ]
404
-
405
- return {
406
- "format": "cawdex-exgentic-action-shortlist-v1",
407
- "profile": profile,
408
- "action_count": len(docs),
409
- "shortlist_limit": safe_limit,
410
- "completion_ready": completion_ready,
411
- "avoid_no_effect_repeat_actions": no_effect_repeat_actions,
412
- "shortlisted_actions": shortlisted,
413
- "deferred_completion_actions": deferred_completion,
414
- "discipline": "Prefer shortlisted actions when they fit the latest observation; use full schemas below if the current state clearly requires a non-shortlisted action. If avoid_no_effect_repeat_actions is non-empty, change strategy unless no other viable action has its required arguments.",
415
- }
416
-
417
-
418
- def safe_id(value: Any, default: str = "session") -> str:
419
- raw = str(value or default)
420
- safe = re.sub(r"[^A-Za-z0-9_.-]+", "-", raw).strip("-")
421
- return safe or default
422
-
423
-
424
- def _compact_selected_actions(value: Any, *, item_limit: int) -> list[dict[str, Any]]:
425
- actions = value if isinstance(value, list) else [value]
426
- compact: list[dict[str, Any]] = []
427
- for action in actions:
428
- if not isinstance(action, dict):
429
- compact.append({"name": "unknown", "summary": truncate(action, limit=item_limit)})
430
- continue
431
- raw_args = action.get("arguments", {})
432
- args = raw_args if isinstance(raw_args, dict) else {"value": raw_args}
433
- compact.append(
434
- {
435
- "name": str(action.get("name") or "unknown"),
436
- "argument_keys": sorted(str(key) for key in args.keys()),
437
- "arguments": truncate(json_dumps(args, limit=item_limit), limit=item_limit),
438
- }
439
- )
440
- return compact
441
-
442
-
443
- def _compact_cawdex_diagnostic(item: dict[str, Any], *, item_limit: int) -> dict[str, Any] | None:
444
- returncode = item.get("returncode")
445
- stderr = str(item.get("stderr") or "")
446
- stdout = str(item.get("stdout") or "")
447
- text = "\n".join(part for part in [stderr, stdout] if part)
448
- if returncode in (None, 0) and not re.search(
449
- r"\b(error|invalid|unknown action|schema|malformed|permission|timed out|timeout|failed)\b",
450
- text,
451
- flags=re.IGNORECASE,
452
- ):
453
- return None
454
- return {
455
- "returncode": returncode,
456
- "evidence": truncate(text, limit=item_limit),
457
- }
458
-
459
-
460
- def _folding_discipline(profile: str) -> str:
461
- if profile == "appworld":
462
- return "Use latest_observation as authoritative app/API state; preserve IDs, dates, permissions, and record integrity."
463
- if profile == "browsecomp":
464
- return "Carry forward verified sources and unresolved search facets; do not treat snippets or stale single-source claims as final evidence."
465
- if profile == "tau2":
466
- return "Carry forward policy constraints, customer intent, tool results, and pending confirmations before selecting the next action."
467
- if profile == "terminalworld":
468
- return "TerminalWorld discipline: carry forward instruction.md/task artifact requirements, generated files/services, command outputs, verifier status, and any solve.sh/reference-solution avoidance before selecting the next action."
469
- if profile == "webdevbench":
470
- return "Carry forward canary requirements, frontend/backend state, integration evidence, and production/security gaps before selecting the next action."
471
- if profile == "swe-cycle":
472
- return "Carry forward lifecycle phase, bare-repo environment setup state, implementation requirements, generated/selected tests, judge commands, and unresolved phase gaps before selecting the next action."
473
- if profile == "swe-ci":
474
- return "Carry forward current/target commits, test gaps, inferred requirements, touched files, verifier deltas, and unresolved regressions before selecting the next action."
475
- if profile == "swe-prbench":
476
- return "Carry forward PR title/description, changed files, diff hunks, suspected findings, evidence gaps, and context-expansion reasons before selecting the next action."
477
- if profile == "tml-bench":
478
- return "Carry forward train/test/sample submission paths, ID/target columns, metric, validation split, leakage checks, model artifacts, submission path, and validity evidence before selecting the next action."
479
- if profile == "pi-bench":
480
- return "Carry forward user profile, current request, message/file/app context, available domain tools, hidden-intent hypotheses, clarification state, privacy risk, selected actions, and observable completion evidence before selecting the next action."
481
- return "Use the folded ledger as orientation, then rely on the latest observation and available action schemas for the next action."
482
-
483
-
484
- def _resolve_action_doc(
485
- name: str,
486
- docs: list[dict[str, Any]],
487
- ) -> tuple[dict[str, Any] | None, str, float]:
488
- raw = str(name or "")
489
- if not raw:
490
- return None, "empty", 0.0
491
-
492
- for doc in docs:
493
- candidate = str(doc.get("name") or "")
494
- if candidate == raw:
495
- return doc, "exact", 1.0
496
-
497
- lowered = raw.lower()
498
- for doc in docs:
499
- candidate = str(doc.get("name") or "")
500
- if candidate.lower() == lowered:
501
- return doc, "case_insensitive", 1.0
502
-
503
- normalized = _normalized_identifier(raw)
504
- for doc in docs:
505
- candidate = str(doc.get("name") or "")
506
- if _normalized_identifier(candidate) == normalized:
507
- return doc, "normalized_identifier", 1.0
508
-
509
- best_doc: dict[str, Any] | None = None
510
- best_score = 0.0
511
- second_score = 0.0
512
- for doc in docs:
513
- candidate = str(doc.get("name") or "")
514
- candidate_norm = _normalized_identifier(candidate)
515
- score = SequenceMatcher(None, normalized, candidate_norm).ratio() if normalized and candidate_norm else 0.0
516
- if normalized and candidate_norm and (normalized in candidate_norm or candidate_norm in normalized):
517
- score = max(score, 0.82)
518
- if score > best_score:
519
- second_score = best_score
520
- best_score = score
521
- best_doc = doc
522
- elif score > second_score:
523
- second_score = score
524
-
525
- if best_doc is not None and best_score >= 0.82 and best_score - second_score >= 0.04:
526
- return best_doc, "fuzzy_identifier", best_score
527
- return None, "unresolved", best_score
528
-
529
-
530
- def _repair_action_arguments(
531
- arguments: dict[str, Any],
532
- schema: Any,
533
- *,
534
- argument_hints: Any = None,
535
- ) -> tuple[dict[str, Any], dict[str, Any]]:
536
- args = dict(arguments or {})
537
- schema_keys = _schema_property_keys(schema)
538
- if not schema_keys:
539
- return args, {
540
- "argument_key_repairs": [],
541
- "dropped_argument_keys": [],
542
- "filled_required_arguments": [],
543
- "schema_keys": [],
544
- }
545
-
546
- key_by_normalized = {_normalized_identifier(key): key for key in schema_keys}
547
- repaired: dict[str, Any] = {}
548
- key_repairs: list[dict[str, str]] = []
549
- dropped: list[str] = []
550
-
551
- for key, value in args.items():
552
- text_key = str(key)
553
- if text_key in schema_keys:
554
- repaired[text_key] = value
555
- continue
556
- canonical = key_by_normalized.get(_normalized_identifier(text_key))
557
- if canonical is not None:
558
- repaired[canonical] = value
559
- key_repairs.append({"from": text_key, "to": canonical})
560
- else:
561
- dropped.append(text_key)
562
-
563
- filled = _fill_required_arguments(repaired, schema, argument_hints)
564
- return repaired, {
565
- "argument_key_repairs": key_repairs,
566
- "dropped_argument_keys": dropped,
567
- "filled_required_arguments": filled,
568
- "schema_keys": schema_keys[:24],
569
- }
570
-
571
-
572
- def _fill_required_arguments(
573
- repaired: dict[str, Any],
574
- schema: Any,
575
- argument_hints: Any,
576
- ) -> list[dict[str, str]]:
577
- if not isinstance(schema, dict):
578
- return []
579
- required = [str(key) for key in schema.get("required") or [] if str(key)]
580
- if not required:
581
- return []
582
- hint_index = _argument_hint_index(argument_hints)
583
- filled: list[dict[str, str]] = []
584
- existing = {_normalized_identifier(key) for key in repaired.keys()}
585
-
586
- for key in required:
587
- norm = _normalized_identifier(key)
588
- if not norm or norm in existing:
589
- continue
590
- match = hint_index.get(norm)
591
- if match is None:
592
- continue
593
- value, source = match
594
- if value is None:
595
- continue
596
- repaired[key] = value
597
- existing.add(norm)
598
- filled.append({"key": key, "source": source})
599
- return filled
600
-
601
-
602
- def _argument_hint_index(value: Any) -> dict[str, tuple[Any, str]]:
603
- index: dict[str, tuple[Any, str]] = {}
604
-
605
- def visit(item: Any, path: str) -> None:
606
- if isinstance(item, dict):
607
- for key, child in item.items():
608
- key_text = str(key)
609
- child_path = f"{path}.{key_text}" if path else key_text
610
- norm = _normalized_identifier(key_text)
611
- if norm and norm not in index and _hint_value_is_usable(child):
612
- index[norm] = (child, child_path)
613
- visit(child, child_path)
614
- elif isinstance(item, list):
615
- for idx, child in enumerate(item[:50]):
616
- visit(child, f"{path}[{idx}]" if path else f"[{idx}]")
617
-
618
- visit(value, "")
619
- return index
620
-
621
-
622
- def _hint_value_is_usable(value: Any) -> bool:
623
- if value is None:
624
- return False
625
- if isinstance(value, str):
626
- return bool(value.strip())
627
- if isinstance(value, (bool, int, float)):
628
- return True
629
- if isinstance(value, (dict, list)):
630
- return bool(value)
631
- return True
632
-
633
-
634
- def _normalized_identifier(value: Any) -> str:
635
- return re.sub(r"[^a-z0-9]+", "", str(value or "").lower())
636
-
637
-
638
- def _latest_history_content(history: list[dict[str, Any]], role: str) -> Any | None:
639
- for item in reversed(history or []):
640
- if isinstance(item, dict) and item.get("role") == role:
641
- return item.get("content")
642
- return None
643
-
644
-
645
- def _latest_selected_action_name(history: list[dict[str, Any]]) -> str | None:
646
- content = _latest_history_content(history, "selected_action")
647
- actions = content if isinstance(content, list) else [content]
648
- for action in actions:
649
- if isinstance(action, dict) and action.get("name"):
650
- return str(action.get("name"))
651
- return None
652
-
653
-
654
- def _recent_no_effect_action_names(history: list[dict[str, Any]]) -> list[str]:
655
- latest_observation_idx: int | None = None
656
- previous_observation_idx: int | None = None
657
- for idx in range(len(history or []) - 1, -1, -1):
658
- item = history[idx]
659
- if not isinstance(item, dict) or item.get("role") != "observation":
660
- continue
661
- if latest_observation_idx is None:
662
- latest_observation_idx = idx
663
- else:
664
- previous_observation_idx = idx
665
- break
666
-
667
- if latest_observation_idx is None or previous_observation_idx is None:
668
- return []
669
-
670
- latest = history[latest_observation_idx].get("content")
671
- previous = history[previous_observation_idx].get("content")
672
- latest_text = json_dumps(latest, limit=4000).lower()
673
- unchanged = (
674
- _observation_fingerprint(latest) == _observation_fingerprint(previous)
675
- or bool(
676
- re.search(
677
- r"\b(no change|no changes|unchanged|same state|nothing changed|no effect|still pending|"
678
- r"did not (?:change|update|move|complete|resolve)|not (?:changed|updated|completed|resolved))\b",
679
- latest_text,
680
- )
681
- )
682
- )
683
- if not unchanged:
684
- return []
685
-
686
- names: list[str] = []
687
- for item in history[previous_observation_idx + 1:latest_observation_idx]:
688
- if not isinstance(item, dict) or item.get("role") != "selected_action":
689
- continue
690
- for name in _selected_action_names(item.get("content")):
691
- push_unique(names, name)
692
- return names
693
-
694
-
695
- def _selected_action_names(value: Any) -> list[str]:
696
- actions = value if isinstance(value, list) else [value]
697
- names: list[str] = []
698
- for action in actions:
699
- if isinstance(action, dict) and action.get("name"):
700
- push_unique(names, str(action.get("name")))
701
- return names
702
-
703
-
704
- def _observation_fingerprint(value: Any) -> str:
705
- text = json_dumps(value, limit=8000).lower()
706
- return re.sub(r"\s+", " ", text).strip()
707
-
708
-
709
- def _has_recent_action_error(history: list[dict[str, Any]]) -> bool:
710
- for item in reversed((history or [])[-4:]):
711
- if not isinstance(item, dict) or item.get("role") != "cawdex":
712
- continue
713
- diagnostic = _compact_cawdex_diagnostic(item, item_limit=600)
714
- if diagnostic is not None:
715
- return True
716
- return False
717
-
718
-
719
- def _keyword_tokens(text: str) -> list[str]:
720
- seen: set[str] = set()
721
- tokens: list[str] = []
722
- for raw in re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,}", text or ""):
723
- token = raw.lower().strip("-_")
724
- if token in STOPWORDS or len(token) < 3 or token in seen:
725
- continue
726
- seen.add(token)
727
- tokens.append(token)
728
- if len(tokens) >= 80:
729
- break
730
- return tokens
731
-
732
-
733
- def _completion_ready(latest_observation_text: str) -> bool:
734
- text = (latest_observation_text or "").lower()
735
- if not text:
736
- return False
737
- if re.search(r"\b(pending|missing|need|needs|required|error|failed|invalid|not complete|unresolved)\b", text):
738
- return False
739
- return bool(re.search(r"\b(done|complete|completed|success|succeeded|confirmed|final answer|resolved)\b", text))
740
-
741
-
742
- def _action_doc_text(doc: dict[str, Any]) -> str:
743
- parts = [
744
- str(doc.get("name") or ""),
745
- str(doc.get("description") or ""),
746
- " ".join(_schema_property_keys(doc.get("arguments_schema"))),
747
- json_dumps(doc.get("arguments_schema") or {}, limit=4000),
748
- ]
749
- return " ".join(parts).lower()
750
-
751
-
752
- def _schema_property_keys(schema: Any) -> list[str]:
753
- if not isinstance(schema, dict):
754
- return []
755
- keys: list[str] = []
756
- properties = schema.get("properties")
757
- if isinstance(properties, dict):
758
- keys.extend(str(key) for key in properties.keys())
759
- for nested_key in ("$defs", "definitions"):
760
- nested = schema.get(nested_key)
761
- if isinstance(nested, dict):
762
- for value in nested.values():
763
- keys.extend(_schema_property_keys(value))
764
- seen: set[str] = set()
765
- deduped: list[str] = []
766
- for key in keys:
767
- lowered = key.lower()
768
- if lowered in seen:
769
- continue
770
- seen.add(lowered)
771
- deduped.append(key)
772
- return deduped
773
-
774
-
775
- def _schema_required_keys(schema: Any) -> list[str]:
776
- if not isinstance(schema, dict):
777
- return []
778
- required = schema.get("required")
779
- if not isinstance(required, list):
780
- return []
781
- seen: set[str] = set()
782
- keys: list[str] = []
783
- for key in required:
784
- text = str(key or "").strip()
785
- lowered = text.lower()
786
- if not text or lowered in seen:
787
- continue
788
- seen.add(lowered)
789
- keys.append(text)
790
- return keys
791
-
792
-
793
- def _required_argument_hints(required_keys: list[str], argument_hints: Any) -> list[dict[str, str]]:
794
- if not required_keys:
795
- return []
796
- hint_index = _argument_hint_index(argument_hints)
797
- hints: list[dict[str, str]] = []
798
- for key in required_keys:
799
- match = hint_index.get(_normalized_identifier(key))
800
- if match is None:
801
- continue
802
- value, source = match
803
- if not _hint_value_is_usable(value):
804
- continue
805
- hints.append(
806
- {
807
- "key": key,
808
- "source": source,
809
- "value_preview": truncate(json_dumps(value, limit=360), limit=360),
810
- }
811
- )
812
- return hints
813
-
814
-
815
- def _missing_required_arguments(arguments: dict[str, Any], schema: Any) -> list[str]:
816
- required = _schema_required_keys(schema)
817
- if not required:
818
- return []
819
- present = {
820
- _normalized_identifier(key)
821
- for key, value in (arguments or {}).items()
822
- if _hint_value_is_usable(value)
823
- }
824
- return [key for key in required if _normalized_identifier(key) not in present]
825
-
826
-
827
- def _fallback_candidate_names(
828
- shortlist: dict[str, Any],
829
- docs: list[dict[str, Any]],
830
- *,
831
- completion_ready: bool,
832
- avoid_names: list[str] | None = None,
833
- ) -> list[str]:
834
- names: list[str] = []
835
- delayed: list[str] = []
836
- avoid = {str(name).lower() for name in avoid_names or [] if str(name)}
837
-
838
- def add_candidate(value: Any) -> None:
839
- name = str(value or "")
840
- if not name:
841
- return
842
- if not completion_ready and name.lower() in avoid:
843
- push_unique(delayed, name)
844
- return
845
- push_unique(names, name)
846
-
847
- if completion_ready:
848
- for doc in docs:
849
- if (doc.get("is_finish") or doc.get("is_message")) and doc.get("name"):
850
- add_candidate(doc.get("name"))
851
- for doc in docs:
852
- name = str(doc.get("name") or "")
853
- if name.lower() in {"finish", "final", "done"}:
854
- add_candidate(name)
855
-
856
- for item in shortlist.get("shortlisted_actions") or []:
857
- if isinstance(item, dict) and item.get("name"):
858
- add_candidate(item.get("name"))
859
-
860
- if not completion_ready:
861
- for doc in docs:
862
- if not (doc.get("is_finish") or doc.get("is_message")) and doc.get("name"):
863
- add_candidate(doc.get("name"))
864
-
865
- for doc in docs:
866
- if (completion_ready or len(docs) == 1) and doc.get("name"):
867
- add_candidate(doc.get("name"))
868
- for name in delayed:
869
- push_unique(names, name)
870
- return names
871
-
872
-
873
- def _doc_by_name(docs: list[dict[str, Any]], name: str) -> dict[str, Any] | None:
874
- for doc in docs:
875
- if str(doc.get("name") or "") == name:
876
- return doc
877
- lowered = str(name or "").lower()
878
- for doc in docs:
879
- if str(doc.get("name") or "").lower() == lowered:
880
- return doc
881
- return None
882
-
883
-
884
- def push_unique(values: list[str], value: str) -> None:
885
- if value not in values:
886
- values.append(value)
887
-
888
-
889
- def _profile_action_prior(profile: str, name: str, action_text: str) -> tuple[float, str]:
890
- text = f"{name} {action_text}".lower()
891
- if profile == "appworld":
892
- if re.search(r"\b(get|lookup|list|search|find|query|read|fetch|load|inspect)\b", text):
893
- return 5, "AppWorld prior: inspect app/API state before mutating records"
894
- if re.search(r"\b(create|update|set|delete|cancel|submit|send)\b", text):
895
- return 3, "AppWorld prior: likely state-changing app action"
896
- elif profile == "browsecomp":
897
- if re.search(r"\b(search|query|browse|web|open|read|fetch|source|cite|visit)\b", text):
898
- return 6, "BrowseComp prior: gather and verify source evidence"
899
- if re.search(r"\b(answer|final|finish|message|respond)\b", text):
900
- return 2, "BrowseComp prior: final answer action when evidence is sufficient"
901
- elif profile == "tau2":
902
- if re.search(r"\b(policy|lookup|search|get|list|read|check|verify|order|customer|account|ticket)\b", text):
903
- return 5, "tau2 prior: check policy/customer/tool state before commitments"
904
- if re.search(r"\b(update|create|cancel|refund|transfer|confirm|submit|send)\b", text):
905
- return 3, "tau2 prior: policy-supported customer-service action"
906
- elif profile == "webdevbench":
907
- if re.search(r"\b(requirements?|canar(?:y|ies)|spec|product|plan|architecture|read|inspect|list|search|get|query)\b", text):
908
- return 6, "WebDevBench prior: preserve product/canary requirements before building"
909
- if re.search(r"\b(e2e|integration|api|browser|playwright|cypress|security|audit|build|deploy|migration|load|concurrency|health)\b", text):
910
- return 5, "WebDevBench prior: verify full-stack, production, or security evidence"
911
- if re.search(r"\b(create|update|modify|deploy|submit|send)\b", text):
912
- return 3, "WebDevBench prior: app creation/modification action"
913
- elif profile == "swe-cycle":
914
- if re.search(r"\b(fullcycle|envsetup|codeimpl|testgen|phase|requirements?|issue|read|inspect|list|search|get|query|run_script|parsing_script|selected_test_files_to_run|environment_setup_commit|before_repo_set_cmd|image_name)\b", text):
915
- return 6, "SWE-Cycle prior: identify lifecycle phase, harness fields, and issue requirements"
916
- if re.search(r"\b(setup|install|bootstrap|dependencies|env|environment|import|collect|discover|build)\b", text):
917
- return 6, "SWE-Cycle prior: reconstruct bare-repo environment before code/test edits"
918
- if re.search(r"\b(testgen|test|tests|pytest|jest|vitest|selected|judge|swe[-_ ]?judge|static|dynamic|verify|check)\b", text):
919
- return 5, "SWE-Cycle prior: generate/validate tests and preserve judge evidence"
920
- if re.search(r"\b(codeimpl|modify|patch|edit|update|change|implement|repair)\b", text):
921
- return 3, "SWE-Cycle prior: implementation action after lifecycle context is established"
922
- elif profile == "swe-ci":
923
- if re.search(r"\b(current|target|commit|sha|history|log|diff|status|read|inspect|list|search|get|query)\b", text):
924
- return 6, "SWE-CI prior: establish current/target commits, test gaps, and repo evolution context"
925
- if re.search(r"\b(run[_ -]?tests?|test|ci|verify|check|tox|nox|act|pytest|unittest)\b", text):
926
- return 6, "SWE-CI prior: run the CI/test loop and preserve verifier deltas"
927
- if re.search(r"\b(requirements?|define[_ -]?requirements?|test[_ -]?gap|failure|attribution|plan|locali[sz]e)\b", text):
928
- return 5, "SWE-CI prior: derive requirements from CI/test gaps before modifying code"
929
- if re.search(r"\b(modify[_ -]?code|patch|edit|update|change|implement|repair)\b", text):
930
- return 3, "SWE-CI prior: incremental requirement-backed code modification"
931
- elif profile == "swe-prbench":
932
- if re.search(r"\b(pr|pull|request|diff|patch|hunk|changed|files?|review|comment|read|inspect|list|search|get|query)\b", text):
933
- return 6, "SWE-PRBench prior: inspect PR metadata and changed diff before broad context"
934
- if re.search(r"\b(test|verify|repro|run|check|typecheck|lint|unit)\b", text):
935
- return 4, "SWE-PRBench prior: verify suspected review findings when feasible"
936
- if re.search(r"\b(finish|message|answer|respond|final|review)\b", text):
937
- return 3, "SWE-PRBench prior: deliver severity-rated review findings once evidence is sufficient"
938
- if re.search(r"\b(edit|patch|modify|update|write|apply)\b", text):
939
- return -3, "SWE-PRBench prior: defer code edits unless the review task explicitly asks for patches"
940
- elif profile == "tml-bench":
941
- if re.search(r"\b(data|dataset|train|test|sample[_ -]?submission|schema|columns?|target|id|metric|read|inspect|list|search|get|query)\b", text):
942
- return 6, "TML-Bench prior: establish data contract and submission schema before modeling"
943
- if re.search(r"\b(validate|validation|split|cv|cross[-_ ]?validation|leakage|baseline|score|metric|check)\b", text):
944
- return 6, "TML-Bench prior: honest validation and leakage checks before submission"
945
- if re.search(r"\b(train|fit|model|pipeline|preprocess|feature|predict)\b", text):
946
- return 4, "TML-Bench prior: build a reliable tabular baseline before complex ensembling"
947
- if re.search(r"\b(submit|submission|save|write|export|finish|answer|final)\b", text):
948
- return 4, "TML-Bench prior: produce and validate a schema-compatible submission artifact"
949
- elif profile == "pi-bench":
950
- if re.search(r"\b(profile|user|history|message|file|workspace|app|context|state|read|inspect|list|search|get|query)\b", text):
951
- return 6, "Pi-Bench prior: establish personal/workspace/app context before proactive action"
952
- if re.search(r"\b(intent|implicit|hidden|latent|need|preference|constraint|policy|privacy|permission|clarif(?:y|ication)|ask)\b", text):
953
- return 6, "Pi-Bench prior: resolve hidden intent, privacy, and permission uncertainty"
954
- if re.search(r"\b(tool|action|schedule|send|update|create|modify|book|message|email|calendar|file)\b", text):
955
- return 4, "Pi-Bench prior: take reversible proactive action only after context is grounded"
956
- if re.search(r"\b(verify|confirm|observe|check|finish|answer|final|done)\b", text):
957
- return 4, "Pi-Bench prior: verify observable completion and communicate concise outcome"
958
- else:
959
- if re.search(r"\b(observe|read|search|list|get|lookup|inspect|query)\b", text):
960
- return 4, "generic prior: inspect available state before irreversible actions"
961
- return 0, ""
962
-
963
-
964
- def _shortlist_item(
965
- doc: dict[str, Any],
966
- score: float,
967
- reasons: list[str],
968
- schema_keys: list[str],
969
- required_keys: list[str],
970
- required_hints: list[dict[str, str]],
971
- ) -> dict[str, Any]:
972
- return {
973
- "name": str(doc.get("name") or ""),
974
- "score": round(score, 2),
975
- "reason": "; ".join(reasons[:4]) or "available action",
976
- "argument_keys": schema_keys[:12],
977
- "required_argument_keys": required_keys[:12],
978
- "available_required_hints": required_hints[:8],
979
- "is_finish": bool(doc.get("is_finish", False)),
980
- "is_message": bool(doc.get("is_message", False)),
981
- }
982
-
983
-
984
- def extract_action_payload(text: str) -> ActionPayload | None:
985
- """Return the last valid action payload from cawdex output.
986
-
987
- Supported shapes:
988
- {"name": "finish", "arguments": {"answer": "..."}}
989
- {"action": "finish", "arguments": {"answer": "..."}}
990
- {"action": {"name": "finish", "arguments": {"answer": "..."}}}
991
- """
992
-
993
- for candidate in reversed(_json_candidates(text)):
994
- payload = _coerce_action_payload(candidate)
995
- if payload is not None:
996
- return payload
997
- return None
998
-
999
-
1000
- def _json_candidates(text: str) -> list[Any]:
1001
- candidates: list[Any] = []
1002
-
1003
- for block in re.findall(r"```(?:json|JSON)?\s*(.*?)```", text or "", flags=re.DOTALL):
1004
- value = _parse_json(block.strip())
1005
- if value is not None:
1006
- candidates.append(value)
1007
-
1008
- marker_re = re.compile(r"cawdex-exgentic action JSON\s*:\s*(\{.*?\})\s*$", re.IGNORECASE | re.DOTALL)
1009
- marker = marker_re.search(text or "")
1010
- if marker:
1011
- value = _parse_json(marker.group(1))
1012
- if value is not None:
1013
- candidates.append(value)
1014
-
1015
- decoder = json.JSONDecoder()
1016
- for match in re.finditer(r"\{", text or ""):
1017
- try:
1018
- value, _ = decoder.raw_decode(text[match.start() :])
1019
- except Exception:
1020
- continue
1021
- candidates.append(value)
1022
-
1023
- return candidates
1024
-
1025
-
1026
- def _parse_json(text: str) -> Any | None:
1027
- try:
1028
- return json.loads(text)
1029
- except Exception:
1030
- return None
1031
-
1032
-
1033
- def _coerce_action_payload(value: Any) -> ActionPayload | None:
1034
- if not isinstance(value, dict):
1035
- return None
1036
-
1037
- nested = value.get("action")
1038
- if isinstance(nested, dict):
1039
- nested_args = nested.get("arguments")
1040
- if nested_args is None:
1041
- nested_args = nested.get("args")
1042
- value = {
1043
- "name": nested.get("name") or nested.get("action") or nested.get("tool"),
1044
- "arguments": nested_args,
1045
- }
1046
-
1047
- name = value.get("name") or value.get("action") or value.get("tool")
1048
- if not isinstance(name, str) or not name.strip():
1049
- return None
1050
-
1051
- arguments = value.get("arguments")
1052
- if arguments is None:
1053
- arguments = value.get("args")
1054
- if arguments is None:
1055
- arguments = value.get("action_input")
1056
- if arguments is None:
1057
- arguments = {}
1058
- if not isinstance(arguments, dict):
1059
- arguments = {"value": arguments}
1060
-
1061
- return ActionPayload(name=name.strip(), arguments=arguments)
1
+ """Stdlib helpers for the cawdex Exgentic adapter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from dataclasses import dataclass
8
+ from difflib import SequenceMatcher
9
+ from typing import Any
10
+
11
+
12
+ SECRET_REPLACEMENTS = [
13
+ (re.compile(r"sk-or-v1-[A-Za-z0-9_-]+"), "sk-or-v1-[REDACTED]"),
14
+ (re.compile(r"sk-[A-Za-z0-9_-]{16,}"), "sk-[REDACTED]"),
15
+ (re.compile(r"hf_[A-Za-z0-9]{16,}"), "hf_[REDACTED]"),
16
+ (re.compile(r"KGAT_[A-Za-z0-9]{16,}"), "KGAT_[REDACTED]"),
17
+ (re.compile(r"npm_[A-Za-z0-9]{16,}"), "npm_[REDACTED]"),
18
+ ]
19
+
20
+ STOPWORDS = {
21
+ "about",
22
+ "after",
23
+ "again",
24
+ "also",
25
+ "and",
26
+ "any",
27
+ "are",
28
+ "available",
29
+ "been",
30
+ "before",
31
+ "being",
32
+ "can",
33
+ "context",
34
+ "could",
35
+ "current",
36
+ "does",
37
+ "for",
38
+ "from",
39
+ "has",
40
+ "have",
41
+ "into",
42
+ "latest",
43
+ "need",
44
+ "needs",
45
+ "not",
46
+ "observation",
47
+ "only",
48
+ "requested",
49
+ "should",
50
+ "task",
51
+ "that",
52
+ "the",
53
+ "then",
54
+ "this",
55
+ "use",
56
+ "user",
57
+ "with",
58
+ "you",
59
+ }
60
+
61
+
62
+ @dataclass(frozen=True)
63
+ class ActionPayload:
64
+ """Machine-readable action selected by cawdex."""
65
+
66
+ name: str
67
+ arguments: dict[str, Any]
68
+
69
+
70
+ @dataclass(frozen=True)
71
+ class ActionRepairResult:
72
+ """Deterministic repair result for benchmark action JSON."""
73
+
74
+ payload: ActionPayload
75
+ changed: bool
76
+ diagnostics: dict[str, Any]
77
+
78
+
79
+ def redact(value: Any) -> str:
80
+ text = str(value or "")
81
+ for pattern, replacement in SECRET_REPLACEMENTS:
82
+ text = pattern.sub(replacement, text)
83
+ return text
84
+
85
+
86
+ def truncate(value: Any, limit: int = 80000) -> str:
87
+ text = redact(value)
88
+ if len(text) <= limit:
89
+ return text
90
+ omitted = len(text) - limit
91
+ return text[:limit] + f"\n...[truncated {omitted} chars]"
92
+
93
+
94
+ def json_dumps(value: Any, *, limit: int = 80000) -> str:
95
+ try:
96
+ text = json.dumps(value, ensure_ascii=False, indent=2, sort_keys=True, default=str)
97
+ except Exception:
98
+ text = str(value)
99
+ return truncate(text, limit=limit)
100
+
101
+
102
+ def fold_exgentic_history(
103
+ history: list[dict[str, Any]],
104
+ *,
105
+ profile: str = "generic",
106
+ max_items: int = 16,
107
+ item_limit: int = 1200,
108
+ ) -> dict[str, Any]:
109
+ """Build a compact task-relevant ledger for long Exgentic sessions.
110
+
111
+ The adapter keeps the full raw history in memory. This folded view is what
112
+ goes back into the next model call, so noisy stdout does not crowd out the
113
+ latest app state, policy evidence, source evidence, or selected actions.
114
+ """
115
+
116
+ observations: list[dict[str, Any]] = []
117
+ actions: list[dict[str, Any]] = []
118
+ diagnostics: list[dict[str, Any]] = []
119
+ action_counts: dict[str, int] = {}
120
+
121
+ for idx, item in enumerate(history or [], start=1):
122
+ role = str(item.get("role", ""))
123
+ if role == "observation":
124
+ observations.append(
125
+ {
126
+ "turn": idx,
127
+ "summary": truncate(json_dumps(item.get("content"), limit=item_limit), limit=item_limit),
128
+ }
129
+ )
130
+ elif role == "selected_action":
131
+ compact_actions = _compact_selected_actions(item.get("content"), item_limit=item_limit)
132
+ for action in compact_actions:
133
+ name = action.get("name") or "unknown"
134
+ action_counts[name] = action_counts.get(name, 0) + 1
135
+ actions.append({"turn": idx, "actions": compact_actions})
136
+ elif role == "cawdex":
137
+ diagnostic = _compact_cawdex_diagnostic(item, item_limit=item_limit)
138
+ if diagnostic is not None:
139
+ diagnostics.append({"turn": idx, **diagnostic})
140
+ elif role == "action_repair":
141
+ diagnostics.append(
142
+ {
143
+ "turn": idx,
144
+ "kind": "action_repair",
145
+ "evidence": truncate(json_dumps(item.get("content"), limit=item_limit), limit=item_limit),
146
+ }
147
+ )
148
+
149
+ latest_observation = observations[-1] if observations else None
150
+ latest_action = actions[-1] if actions else None
151
+ return {
152
+ "format": "cawdex-exgentic-folded-history-v1",
153
+ "profile": profile,
154
+ "turns_seen": len(history or []),
155
+ "latest_observation": latest_observation,
156
+ "latest_action": latest_action,
157
+ "no_effect_repeat_actions": _recent_no_effect_action_names(history or []),
158
+ "recent_observations": observations[-max_items:],
159
+ "recent_actions": actions[-max_items:],
160
+ "diagnostics": diagnostics[-max_items:],
161
+ "action_counts": action_counts,
162
+ "discipline": _folding_discipline(profile),
163
+ }
164
+
165
+
166
+ def repair_exgentic_action_payload(
167
+ payload: ActionPayload,
168
+ action_docs: list[dict[str, Any]],
169
+ *,
170
+ argument_hints: Any = None,
171
+ ) -> ActionRepairResult:
172
+ """Repair near-miss action names and argument keys before ActionType build.
173
+
174
+ This is intentionally deterministic and conservative. It fixes common model
175
+ output drift such as camelCase action names, case-only mismatches, and
176
+ schema-key casing/separator mistakes, while leaving unresolved names intact
177
+ so the caller can still fail or fallback explicitly.
178
+ """
179
+
180
+ docs = [doc for doc in action_docs or [] if isinstance(doc, dict) and doc.get("name")]
181
+ matched_doc, match_reason, match_score = _resolve_action_doc(payload.name, docs)
182
+ repaired_name = str(matched_doc.get("name")) if matched_doc else payload.name
183
+ repaired_args, arg_diagnostics = _repair_action_arguments(
184
+ payload.arguments,
185
+ matched_doc.get("arguments_schema") if matched_doc else None,
186
+ argument_hints=argument_hints,
187
+ )
188
+
189
+ changed = repaired_name != payload.name or repaired_args != payload.arguments
190
+ if matched_doc is None:
191
+ status = "unresolved_action_name"
192
+ elif changed:
193
+ status = "repaired"
194
+ else:
195
+ status = "unchanged"
196
+
197
+ diagnostics = {
198
+ "status": status,
199
+ "original_name": payload.name,
200
+ "repaired_name": repaired_name,
201
+ "name_match_reason": match_reason,
202
+ "name_match_score": round(match_score, 3),
203
+ **arg_diagnostics,
204
+ }
205
+ return ActionRepairResult(
206
+ payload=ActionPayload(name=repaired_name, arguments=repaired_args),
207
+ changed=changed,
208
+ diagnostics=diagnostics,
209
+ )
210
+
211
+
212
+ def fallback_exgentic_action_payload(
213
+ action_docs: list[dict[str, Any]],
214
+ *,
215
+ task: Any = None,
216
+ context: Any = None,
217
+ history: list[dict[str, Any]] | None = None,
218
+ profile: str = "generic",
219
+ reason: str = "no_valid_action_json",
220
+ ) -> ActionRepairResult | None:
221
+ """Select a conservative fallback action when the model emits no valid JSON.
222
+
223
+ The old adapter bias was finish/message first. That is dangerous for
224
+ multi-step benchmarks because a transient malformed response can become a
225
+ premature stop. This selector reuses the same shortlist and exact required
226
+ argument hints as the main prompt, preferring viable non-finish actions
227
+ while the latest observation is not completion-ready.
228
+ """
229
+
230
+ docs = [doc for doc in action_docs or [] if isinstance(doc, dict) and doc.get("name")]
231
+ if not docs:
232
+ return None
233
+
234
+ history_items = history or []
235
+ latest_observation = _latest_history_content(history_items, "observation")
236
+ argument_hints = {
237
+ "latest_observation": latest_observation,
238
+ "context": context or {},
239
+ }
240
+ shortlist = shortlist_exgentic_actions(
241
+ docs,
242
+ task=task,
243
+ context=context,
244
+ history=history_items,
245
+ profile=profile,
246
+ )
247
+ completion_ready = bool(shortlist.get("completion_ready"))
248
+ no_effect_repeat_actions = [
249
+ str(name)
250
+ for name in shortlist.get("avoid_no_effect_repeat_actions") or []
251
+ if str(name)
252
+ ]
253
+ candidate_names = _fallback_candidate_names(
254
+ shortlist,
255
+ docs,
256
+ completion_ready=completion_ready,
257
+ avoid_names=no_effect_repeat_actions,
258
+ )
259
+ skipped: list[dict[str, Any]] = []
260
+
261
+ for name in candidate_names:
262
+ doc = _doc_by_name(docs, name)
263
+ if doc is None:
264
+ continue
265
+ is_completion = bool(doc.get("is_finish") or doc.get("is_message"))
266
+ if not completion_ready and is_completion:
267
+ skipped.append({"name": name, "reason": "completion_not_ready"})
268
+ continue
269
+ repair = repair_exgentic_action_payload(
270
+ ActionPayload(name=name, arguments={}),
271
+ docs,
272
+ argument_hints=argument_hints,
273
+ )
274
+ missing = _missing_required_arguments(repair.payload.arguments, doc.get("arguments_schema"))
275
+ if missing and not (completion_ready and is_completion):
276
+ skipped.append({"name": name, "reason": "missing_required_arguments", "missing": missing})
277
+ continue
278
+ diagnostics = {
279
+ "status": "fallback_selected",
280
+ "fallback_reason": reason,
281
+ "selected_name": repair.payload.name,
282
+ "completion_ready": completion_ready,
283
+ "avoid_no_effect_repeat_actions": no_effect_repeat_actions,
284
+ "candidate_names": candidate_names[:12],
285
+ "skipped_candidates": skipped[:8],
286
+ "shortlist": shortlist,
287
+ "repair": repair.diagnostics,
288
+ }
289
+ return ActionRepairResult(payload=repair.payload, changed=True, diagnostics=diagnostics)
290
+
291
+ return None
292
+
293
+
294
+ def shortlist_exgentic_actions(
295
+ action_docs: list[dict[str, Any]],
296
+ *,
297
+ task: Any = None,
298
+ context: Any = None,
299
+ history: list[dict[str, Any]] | None = None,
300
+ profile: str = "generic",
301
+ limit: int = 8,
302
+ ) -> dict[str, Any]:
303
+ """Rank available actions into a compact shortlist for the next step.
304
+
305
+ Exgentic still receives the full action schema list below this shortlist.
306
+ The shortlist is a deterministic scaffold: it narrows attention to likely
307
+ actions and finish/message timing without hiding benchmark capabilities.
308
+ """
309
+
310
+ docs = [doc for doc in action_docs or [] if isinstance(doc, dict) and doc.get("name")]
311
+ safe_limit = max(1, min(16, int(limit or 8)))
312
+ latest_observation = _latest_history_content(history or [], "observation")
313
+ latest_observation_text = json_dumps(latest_observation, limit=6000) if latest_observation is not None else ""
314
+ argument_hints = {
315
+ "latest_observation": latest_observation,
316
+ "context": context or {},
317
+ }
318
+ target_text = " ".join(
319
+ [
320
+ str(task or ""),
321
+ json_dumps(context or {}, limit=6000),
322
+ latest_observation_text,
323
+ ]
324
+ ).lower()
325
+ tokens = _keyword_tokens(target_text)
326
+ completion_ready = _completion_ready(latest_observation_text)
327
+ latest_action_name = _latest_selected_action_name(history or [])
328
+ has_recent_error = _has_recent_action_error(history or [])
329
+ no_effect_repeat_actions = _recent_no_effect_action_names(history or [])
330
+ no_effect_repeat_set = {name.lower() for name in no_effect_repeat_actions}
331
+
332
+ scored: list[tuple[float, str, dict[str, Any], list[str], list[str], list[str], list[dict[str, str]]]] = []
333
+ for doc in docs:
334
+ name = str(doc.get("name") or "")
335
+ action_text = _action_doc_text(doc)
336
+ schema = doc.get("arguments_schema")
337
+ schema_keys = _schema_property_keys(schema)
338
+ required_keys = _schema_required_keys(schema)
339
+ required_hints = _required_argument_hints(required_keys, argument_hints)
340
+ score = 0.0
341
+ reasons: list[str] = []
342
+
343
+ token_hits = [token for token in tokens if token in action_text][:6]
344
+ if token_hits:
345
+ score += min(12, len(token_hits) * 2)
346
+ reasons.append(f"matches task/observation tokens: {', '.join(token_hits)}")
347
+
348
+ schema_hits = [key for key in schema_keys if key.lower() in target_text][:6]
349
+ if schema_hits:
350
+ score += min(10, len(schema_hits) * 2)
351
+ reasons.append(f"schema keys appear in current state: {', '.join(schema_hits)}")
352
+
353
+ if required_hints:
354
+ score += min(8, len(required_hints) * 3)
355
+ reasons.append(
356
+ "required args available in current state: "
357
+ + ", ".join(item["key"] for item in required_hints[:4])
358
+ )
359
+
360
+ prior_score, prior_reason = _profile_action_prior(profile, name, action_text)
361
+ if prior_score:
362
+ score += prior_score
363
+ reasons.append(prior_reason)
364
+
365
+ name_tokens = [token for token in _keyword_tokens(name.replace("_", " ")) if token not in {"action"}]
366
+ if name_tokens and all(token in latest_observation_text.lower() for token in name_tokens):
367
+ score += 8
368
+ reasons.append("action name matches explicit latest-observation cue")
369
+
370
+ is_completion = bool(doc.get("is_finish") or doc.get("is_message"))
371
+ if is_completion:
372
+ if completion_ready:
373
+ score += 8
374
+ reasons.append("latest observation suggests completion is ready")
375
+ else:
376
+ score -= 7
377
+ reasons.append("defer finish/message until benchmark-visible completion evidence")
378
+
379
+ if latest_action_name and name.lower() == latest_action_name.lower():
380
+ score -= 2
381
+ reasons.append("same as previous selected action")
382
+ if has_recent_error:
383
+ score -= 4
384
+ reasons.append("avoid repeating after recent action/schema error")
385
+ if not completion_ready and name.lower() in no_effect_repeat_set:
386
+ score -= 10
387
+ reasons.append("avoid repeating no-effect action; latest observation did not change")
388
+
389
+ scored.append((score, name.lower(), doc, reasons, schema_keys, required_keys, required_hints))
390
+
391
+ scored.sort(key=lambda item: (-item[0], item[1]))
392
+ shortlisted = [
393
+ _shortlist_item(doc, score, reasons, schema_keys, required_keys, required_hints)
394
+ for score, _name, doc, reasons, schema_keys, required_keys, required_hints in scored[:safe_limit]
395
+ ]
396
+ shortlisted_names = {str(item.get("name", "")).lower() for item in shortlisted}
397
+ deferred_completion = [
398
+ str(doc.get("name"))
399
+ for doc in docs
400
+ if (doc.get("is_finish") or doc.get("is_message"))
401
+ and str(doc.get("name", "")).lower() not in shortlisted_names
402
+ and not completion_ready
403
+ ]
404
+
405
+ return {
406
+ "format": "cawdex-exgentic-action-shortlist-v1",
407
+ "profile": profile,
408
+ "action_count": len(docs),
409
+ "shortlist_limit": safe_limit,
410
+ "completion_ready": completion_ready,
411
+ "avoid_no_effect_repeat_actions": no_effect_repeat_actions,
412
+ "shortlisted_actions": shortlisted,
413
+ "deferred_completion_actions": deferred_completion,
414
+ "discipline": "Prefer shortlisted actions when they fit the latest observation; use full schemas below if the current state clearly requires a non-shortlisted action. If avoid_no_effect_repeat_actions is non-empty, change strategy unless no other viable action has its required arguments.",
415
+ }
416
+
417
+
418
+ def safe_id(value: Any, default: str = "session") -> str:
419
+ raw = str(value or default)
420
+ safe = re.sub(r"[^A-Za-z0-9_.-]+", "-", raw).strip("-")
421
+ return safe or default
422
+
423
+
424
+ def _compact_selected_actions(value: Any, *, item_limit: int) -> list[dict[str, Any]]:
425
+ actions = value if isinstance(value, list) else [value]
426
+ compact: list[dict[str, Any]] = []
427
+ for action in actions:
428
+ if not isinstance(action, dict):
429
+ compact.append({"name": "unknown", "summary": truncate(action, limit=item_limit)})
430
+ continue
431
+ raw_args = action.get("arguments", {})
432
+ args = raw_args if isinstance(raw_args, dict) else {"value": raw_args}
433
+ compact.append(
434
+ {
435
+ "name": str(action.get("name") or "unknown"),
436
+ "argument_keys": sorted(str(key) for key in args.keys()),
437
+ "arguments": truncate(json_dumps(args, limit=item_limit), limit=item_limit),
438
+ }
439
+ )
440
+ return compact
441
+
442
+
443
+ def _compact_cawdex_diagnostic(item: dict[str, Any], *, item_limit: int) -> dict[str, Any] | None:
444
+ returncode = item.get("returncode")
445
+ stderr = str(item.get("stderr") or "")
446
+ stdout = str(item.get("stdout") or "")
447
+ text = "\n".join(part for part in [stderr, stdout] if part)
448
+ if returncode in (None, 0) and not re.search(
449
+ r"\b(error|invalid|unknown action|schema|malformed|permission|timed out|timeout|failed)\b",
450
+ text,
451
+ flags=re.IGNORECASE,
452
+ ):
453
+ return None
454
+ return {
455
+ "returncode": returncode,
456
+ "evidence": truncate(text, limit=item_limit),
457
+ }
458
+
459
+
460
+ def _folding_discipline(profile: str) -> str:
461
+ if profile == "appworld":
462
+ return "Use latest_observation as authoritative app/API state; preserve IDs, dates, permissions, and record integrity."
463
+ if profile == "browsecomp":
464
+ return "Carry forward verified sources and unresolved search facets; do not treat snippets or stale single-source claims as final evidence."
465
+ if profile == "tau2":
466
+ return "Carry forward policy constraints, customer intent, tool results, and pending confirmations before selecting the next action."
467
+ if profile == "terminalworld":
468
+ return "TerminalWorld discipline: carry forward instruction.md/task artifact requirements, generated files/services, command outputs, verifier status, and any solve.sh/reference-solution avoidance before selecting the next action."
469
+ if profile == "webdevbench":
470
+ return "Carry forward canary requirements, frontend/backend state, integration evidence, and production/security gaps before selecting the next action."
471
+ if profile == "swe-cycle":
472
+ return "Carry forward lifecycle phase, bare-repo environment setup state, implementation requirements, generated/selected tests, judge commands, and unresolved phase gaps before selecting the next action."
473
+ if profile == "swe-ci":
474
+ return "Carry forward current/target commits, test gaps, inferred requirements, touched files, verifier deltas, and unresolved regressions before selecting the next action."
475
+ if profile == "swe-prbench":
476
+ return "Carry forward PR title/description, changed files, diff hunks, suspected findings, evidence gaps, and context-expansion reasons before selecting the next action."
477
+ if profile == "tml-bench":
478
+ return "Carry forward train/test/sample submission paths, ID/target columns, metric, validation split, leakage checks, model artifacts, submission path, and validity evidence before selecting the next action."
479
+ if profile == "pi-bench":
480
+ return "Carry forward user profile, current request, message/file/app context, available domain tools, hidden-intent hypotheses, clarification state, privacy risk, selected actions, and observable completion evidence before selecting the next action."
481
+ return "Use the folded ledger as orientation, then rely on the latest observation and available action schemas for the next action."
482
+
483
+
484
+ def _resolve_action_doc(
485
+ name: str,
486
+ docs: list[dict[str, Any]],
487
+ ) -> tuple[dict[str, Any] | None, str, float]:
488
+ raw = str(name or "")
489
+ if not raw:
490
+ return None, "empty", 0.0
491
+
492
+ for doc in docs:
493
+ candidate = str(doc.get("name") or "")
494
+ if candidate == raw:
495
+ return doc, "exact", 1.0
496
+
497
+ lowered = raw.lower()
498
+ for doc in docs:
499
+ candidate = str(doc.get("name") or "")
500
+ if candidate.lower() == lowered:
501
+ return doc, "case_insensitive", 1.0
502
+
503
+ normalized = _normalized_identifier(raw)
504
+ for doc in docs:
505
+ candidate = str(doc.get("name") or "")
506
+ if _normalized_identifier(candidate) == normalized:
507
+ return doc, "normalized_identifier", 1.0
508
+
509
+ best_doc: dict[str, Any] | None = None
510
+ best_score = 0.0
511
+ second_score = 0.0
512
+ for doc in docs:
513
+ candidate = str(doc.get("name") or "")
514
+ candidate_norm = _normalized_identifier(candidate)
515
+ score = SequenceMatcher(None, normalized, candidate_norm).ratio() if normalized and candidate_norm else 0.0
516
+ if normalized and candidate_norm and (normalized in candidate_norm or candidate_norm in normalized):
517
+ score = max(score, 0.82)
518
+ if score > best_score:
519
+ second_score = best_score
520
+ best_score = score
521
+ best_doc = doc
522
+ elif score > second_score:
523
+ second_score = score
524
+
525
+ if best_doc is not None and best_score >= 0.82 and best_score - second_score >= 0.04:
526
+ return best_doc, "fuzzy_identifier", best_score
527
+ return None, "unresolved", best_score
528
+
529
+
530
+ def _repair_action_arguments(
531
+ arguments: dict[str, Any],
532
+ schema: Any,
533
+ *,
534
+ argument_hints: Any = None,
535
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
536
+ args = dict(arguments or {})
537
+ schema_keys = _schema_property_keys(schema)
538
+ if not schema_keys:
539
+ return args, {
540
+ "argument_key_repairs": [],
541
+ "dropped_argument_keys": [],
542
+ "filled_required_arguments": [],
543
+ "schema_keys": [],
544
+ }
545
+
546
+ key_by_normalized = {_normalized_identifier(key): key for key in schema_keys}
547
+ repaired: dict[str, Any] = {}
548
+ key_repairs: list[dict[str, str]] = []
549
+ dropped: list[str] = []
550
+
551
+ for key, value in args.items():
552
+ text_key = str(key)
553
+ if text_key in schema_keys:
554
+ repaired[text_key] = value
555
+ continue
556
+ canonical = key_by_normalized.get(_normalized_identifier(text_key))
557
+ if canonical is not None:
558
+ repaired[canonical] = value
559
+ key_repairs.append({"from": text_key, "to": canonical})
560
+ else:
561
+ dropped.append(text_key)
562
+
563
+ filled = _fill_required_arguments(repaired, schema, argument_hints)
564
+ return repaired, {
565
+ "argument_key_repairs": key_repairs,
566
+ "dropped_argument_keys": dropped,
567
+ "filled_required_arguments": filled,
568
+ "schema_keys": schema_keys[:24],
569
+ }
570
+
571
+
572
+ def _fill_required_arguments(
573
+ repaired: dict[str, Any],
574
+ schema: Any,
575
+ argument_hints: Any,
576
+ ) -> list[dict[str, str]]:
577
+ if not isinstance(schema, dict):
578
+ return []
579
+ required = [str(key) for key in schema.get("required") or [] if str(key)]
580
+ if not required:
581
+ return []
582
+ hint_index = _argument_hint_index(argument_hints)
583
+ filled: list[dict[str, str]] = []
584
+ existing = {_normalized_identifier(key) for key in repaired.keys()}
585
+
586
+ for key in required:
587
+ norm = _normalized_identifier(key)
588
+ if not norm or norm in existing:
589
+ continue
590
+ match = hint_index.get(norm)
591
+ if match is None:
592
+ continue
593
+ value, source = match
594
+ if value is None:
595
+ continue
596
+ repaired[key] = value
597
+ existing.add(norm)
598
+ filled.append({"key": key, "source": source})
599
+ return filled
600
+
601
+
602
+ def _argument_hint_index(value: Any) -> dict[str, tuple[Any, str]]:
603
+ index: dict[str, tuple[Any, str]] = {}
604
+
605
+ def visit(item: Any, path: str) -> None:
606
+ if isinstance(item, dict):
607
+ for key, child in item.items():
608
+ key_text = str(key)
609
+ child_path = f"{path}.{key_text}" if path else key_text
610
+ norm = _normalized_identifier(key_text)
611
+ if norm and norm not in index and _hint_value_is_usable(child):
612
+ index[norm] = (child, child_path)
613
+ visit(child, child_path)
614
+ elif isinstance(item, list):
615
+ for idx, child in enumerate(item[:50]):
616
+ visit(child, f"{path}[{idx}]" if path else f"[{idx}]")
617
+
618
+ visit(value, "")
619
+ return index
620
+
621
+
622
+ def _hint_value_is_usable(value: Any) -> bool:
623
+ if value is None:
624
+ return False
625
+ if isinstance(value, str):
626
+ return bool(value.strip())
627
+ if isinstance(value, (bool, int, float)):
628
+ return True
629
+ if isinstance(value, (dict, list)):
630
+ return bool(value)
631
+ return True
632
+
633
+
634
+ def _normalized_identifier(value: Any) -> str:
635
+ return re.sub(r"[^a-z0-9]+", "", str(value or "").lower())
636
+
637
+
638
+ def _latest_history_content(history: list[dict[str, Any]], role: str) -> Any | None:
639
+ for item in reversed(history or []):
640
+ if isinstance(item, dict) and item.get("role") == role:
641
+ return item.get("content")
642
+ return None
643
+
644
+
645
+ def _latest_selected_action_name(history: list[dict[str, Any]]) -> str | None:
646
+ content = _latest_history_content(history, "selected_action")
647
+ actions = content if isinstance(content, list) else [content]
648
+ for action in actions:
649
+ if isinstance(action, dict) and action.get("name"):
650
+ return str(action.get("name"))
651
+ return None
652
+
653
+
654
+ def _recent_no_effect_action_names(history: list[dict[str, Any]]) -> list[str]:
655
+ latest_observation_idx: int | None = None
656
+ previous_observation_idx: int | None = None
657
+ for idx in range(len(history or []) - 1, -1, -1):
658
+ item = history[idx]
659
+ if not isinstance(item, dict) or item.get("role") != "observation":
660
+ continue
661
+ if latest_observation_idx is None:
662
+ latest_observation_idx = idx
663
+ else:
664
+ previous_observation_idx = idx
665
+ break
666
+
667
+ if latest_observation_idx is None or previous_observation_idx is None:
668
+ return []
669
+
670
+ latest = history[latest_observation_idx].get("content")
671
+ previous = history[previous_observation_idx].get("content")
672
+ latest_text = json_dumps(latest, limit=4000).lower()
673
+ unchanged = (
674
+ _observation_fingerprint(latest) == _observation_fingerprint(previous)
675
+ or bool(
676
+ re.search(
677
+ r"\b(no change|no changes|unchanged|same state|nothing changed|no effect|still pending|"
678
+ r"did not (?:change|update|move|complete|resolve)|not (?:changed|updated|completed|resolved))\b",
679
+ latest_text,
680
+ )
681
+ )
682
+ )
683
+ if not unchanged:
684
+ return []
685
+
686
+ names: list[str] = []
687
+ for item in history[previous_observation_idx + 1:latest_observation_idx]:
688
+ if not isinstance(item, dict) or item.get("role") != "selected_action":
689
+ continue
690
+ for name in _selected_action_names(item.get("content")):
691
+ push_unique(names, name)
692
+ return names
693
+
694
+
695
+ def _selected_action_names(value: Any) -> list[str]:
696
+ actions = value if isinstance(value, list) else [value]
697
+ names: list[str] = []
698
+ for action in actions:
699
+ if isinstance(action, dict) and action.get("name"):
700
+ push_unique(names, str(action.get("name")))
701
+ return names
702
+
703
+
704
+ def _observation_fingerprint(value: Any) -> str:
705
+ text = json_dumps(value, limit=8000).lower()
706
+ return re.sub(r"\s+", " ", text).strip()
707
+
708
+
709
+ def _has_recent_action_error(history: list[dict[str, Any]]) -> bool:
710
+ for item in reversed((history or [])[-4:]):
711
+ if not isinstance(item, dict) or item.get("role") != "cawdex":
712
+ continue
713
+ diagnostic = _compact_cawdex_diagnostic(item, item_limit=600)
714
+ if diagnostic is not None:
715
+ return True
716
+ return False
717
+
718
+
719
+ def _keyword_tokens(text: str) -> list[str]:
720
+ seen: set[str] = set()
721
+ tokens: list[str] = []
722
+ for raw in re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,}", text or ""):
723
+ token = raw.lower().strip("-_")
724
+ if token in STOPWORDS or len(token) < 3 or token in seen:
725
+ continue
726
+ seen.add(token)
727
+ tokens.append(token)
728
+ if len(tokens) >= 80:
729
+ break
730
+ return tokens
731
+
732
+
733
+ def _completion_ready(latest_observation_text: str) -> bool:
734
+ text = (latest_observation_text or "").lower()
735
+ if not text:
736
+ return False
737
+ if re.search(r"\b(pending|missing|need|needs|required|error|failed|invalid|not complete|unresolved)\b", text):
738
+ return False
739
+ return bool(re.search(r"\b(done|complete|completed|success|succeeded|confirmed|final answer|resolved)\b", text))
740
+
741
+
742
+ def _action_doc_text(doc: dict[str, Any]) -> str:
743
+ parts = [
744
+ str(doc.get("name") or ""),
745
+ str(doc.get("description") or ""),
746
+ " ".join(_schema_property_keys(doc.get("arguments_schema"))),
747
+ json_dumps(doc.get("arguments_schema") or {}, limit=4000),
748
+ ]
749
+ return " ".join(parts).lower()
750
+
751
+
752
+ def _schema_property_keys(schema: Any) -> list[str]:
753
+ if not isinstance(schema, dict):
754
+ return []
755
+ keys: list[str] = []
756
+ properties = schema.get("properties")
757
+ if isinstance(properties, dict):
758
+ keys.extend(str(key) for key in properties.keys())
759
+ for nested_key in ("$defs", "definitions"):
760
+ nested = schema.get(nested_key)
761
+ if isinstance(nested, dict):
762
+ for value in nested.values():
763
+ keys.extend(_schema_property_keys(value))
764
+ seen: set[str] = set()
765
+ deduped: list[str] = []
766
+ for key in keys:
767
+ lowered = key.lower()
768
+ if lowered in seen:
769
+ continue
770
+ seen.add(lowered)
771
+ deduped.append(key)
772
+ return deduped
773
+
774
+
775
+ def _schema_required_keys(schema: Any) -> list[str]:
776
+ if not isinstance(schema, dict):
777
+ return []
778
+ required = schema.get("required")
779
+ if not isinstance(required, list):
780
+ return []
781
+ seen: set[str] = set()
782
+ keys: list[str] = []
783
+ for key in required:
784
+ text = str(key or "").strip()
785
+ lowered = text.lower()
786
+ if not text or lowered in seen:
787
+ continue
788
+ seen.add(lowered)
789
+ keys.append(text)
790
+ return keys
791
+
792
+
793
+ def _required_argument_hints(required_keys: list[str], argument_hints: Any) -> list[dict[str, str]]:
794
+ if not required_keys:
795
+ return []
796
+ hint_index = _argument_hint_index(argument_hints)
797
+ hints: list[dict[str, str]] = []
798
+ for key in required_keys:
799
+ match = hint_index.get(_normalized_identifier(key))
800
+ if match is None:
801
+ continue
802
+ value, source = match
803
+ if not _hint_value_is_usable(value):
804
+ continue
805
+ hints.append(
806
+ {
807
+ "key": key,
808
+ "source": source,
809
+ "value_preview": truncate(json_dumps(value, limit=360), limit=360),
810
+ }
811
+ )
812
+ return hints
813
+
814
+
815
+ def _missing_required_arguments(arguments: dict[str, Any], schema: Any) -> list[str]:
816
+ required = _schema_required_keys(schema)
817
+ if not required:
818
+ return []
819
+ present = {
820
+ _normalized_identifier(key)
821
+ for key, value in (arguments or {}).items()
822
+ if _hint_value_is_usable(value)
823
+ }
824
+ return [key for key in required if _normalized_identifier(key) not in present]
825
+
826
+
827
+ def _fallback_candidate_names(
828
+ shortlist: dict[str, Any],
829
+ docs: list[dict[str, Any]],
830
+ *,
831
+ completion_ready: bool,
832
+ avoid_names: list[str] | None = None,
833
+ ) -> list[str]:
834
+ names: list[str] = []
835
+ delayed: list[str] = []
836
+ avoid = {str(name).lower() for name in avoid_names or [] if str(name)}
837
+
838
+ def add_candidate(value: Any) -> None:
839
+ name = str(value or "")
840
+ if not name:
841
+ return
842
+ if not completion_ready and name.lower() in avoid:
843
+ push_unique(delayed, name)
844
+ return
845
+ push_unique(names, name)
846
+
847
+ if completion_ready:
848
+ for doc in docs:
849
+ if (doc.get("is_finish") or doc.get("is_message")) and doc.get("name"):
850
+ add_candidate(doc.get("name"))
851
+ for doc in docs:
852
+ name = str(doc.get("name") or "")
853
+ if name.lower() in {"finish", "final", "done"}:
854
+ add_candidate(name)
855
+
856
+ for item in shortlist.get("shortlisted_actions") or []:
857
+ if isinstance(item, dict) and item.get("name"):
858
+ add_candidate(item.get("name"))
859
+
860
+ if not completion_ready:
861
+ for doc in docs:
862
+ if not (doc.get("is_finish") or doc.get("is_message")) and doc.get("name"):
863
+ add_candidate(doc.get("name"))
864
+
865
+ for doc in docs:
866
+ if (completion_ready or len(docs) == 1) and doc.get("name"):
867
+ add_candidate(doc.get("name"))
868
+ for name in delayed:
869
+ push_unique(names, name)
870
+ return names
871
+
872
+
873
+ def _doc_by_name(docs: list[dict[str, Any]], name: str) -> dict[str, Any] | None:
874
+ for doc in docs:
875
+ if str(doc.get("name") or "") == name:
876
+ return doc
877
+ lowered = str(name or "").lower()
878
+ for doc in docs:
879
+ if str(doc.get("name") or "").lower() == lowered:
880
+ return doc
881
+ return None
882
+
883
+
884
+ def push_unique(values: list[str], value: str) -> None:
885
+ if value not in values:
886
+ values.append(value)
887
+
888
+
889
+ def _profile_action_prior(profile: str, name: str, action_text: str) -> tuple[float, str]:
890
+ text = f"{name} {action_text}".lower()
891
+ if profile == "appworld":
892
+ if re.search(r"\b(get|lookup|list|search|find|query|read|fetch|load|inspect)\b", text):
893
+ return 5, "AppWorld prior: inspect app/API state before mutating records"
894
+ if re.search(r"\b(create|update|set|delete|cancel|submit|send)\b", text):
895
+ return 3, "AppWorld prior: likely state-changing app action"
896
+ elif profile == "browsecomp":
897
+ if re.search(r"\b(search|query|browse|web|open|read|fetch|source|cite|visit)\b", text):
898
+ return 6, "BrowseComp prior: gather and verify source evidence"
899
+ if re.search(r"\b(answer|final|finish|message|respond)\b", text):
900
+ return 2, "BrowseComp prior: final answer action when evidence is sufficient"
901
+ elif profile == "tau2":
902
+ if re.search(r"\b(policy|lookup|search|get|list|read|check|verify|order|customer|account|ticket)\b", text):
903
+ return 5, "tau2 prior: check policy/customer/tool state before commitments"
904
+ if re.search(r"\b(update|create|cancel|refund|transfer|confirm|submit|send)\b", text):
905
+ return 3, "tau2 prior: policy-supported customer-service action"
906
+ elif profile == "webdevbench":
907
+ if re.search(r"\b(requirements?|canar(?:y|ies)|spec|product|plan|architecture|read|inspect|list|search|get|query)\b", text):
908
+ return 6, "WebDevBench prior: preserve product/canary requirements before building"
909
+ if re.search(r"\b(e2e|integration|api|browser|playwright|cypress|security|audit|build|deploy|migration|load|concurrency|health)\b", text):
910
+ return 5, "WebDevBench prior: verify full-stack, production, or security evidence"
911
+ if re.search(r"\b(create|update|modify|deploy|submit|send)\b", text):
912
+ return 3, "WebDevBench prior: app creation/modification action"
913
+ elif profile == "swe-cycle":
914
+ if re.search(r"\b(fullcycle|envsetup|codeimpl|testgen|phase|requirements?|issue|read|inspect|list|search|get|query|run_script|parsing_script|selected_test_files_to_run|environment_setup_commit|before_repo_set_cmd|image_name)\b", text):
915
+ return 6, "SWE-Cycle prior: identify lifecycle phase, harness fields, and issue requirements"
916
+ if re.search(r"\b(setup|install|bootstrap|dependencies|env|environment|import|collect|discover|build)\b", text):
917
+ return 6, "SWE-Cycle prior: reconstruct bare-repo environment before code/test edits"
918
+ if re.search(r"\b(testgen|test|tests|pytest|jest|vitest|selected|judge|swe[-_ ]?judge|static|dynamic|verify|check)\b", text):
919
+ return 5, "SWE-Cycle prior: generate/validate tests and preserve judge evidence"
920
+ if re.search(r"\b(codeimpl|modify|patch|edit|update|change|implement|repair)\b", text):
921
+ return 3, "SWE-Cycle prior: implementation action after lifecycle context is established"
922
+ elif profile == "swe-ci":
923
+ if re.search(r"\b(current|target|commit|sha|history|log|diff|status|read|inspect|list|search|get|query)\b", text):
924
+ return 6, "SWE-CI prior: establish current/target commits, test gaps, and repo evolution context"
925
+ if re.search(r"\b(run[_ -]?tests?|test|ci|verify|check|tox|nox|act|pytest|unittest)\b", text):
926
+ return 6, "SWE-CI prior: run the CI/test loop and preserve verifier deltas"
927
+ if re.search(r"\b(requirements?|define[_ -]?requirements?|test[_ -]?gap|failure|attribution|plan|locali[sz]e)\b", text):
928
+ return 5, "SWE-CI prior: derive requirements from CI/test gaps before modifying code"
929
+ if re.search(r"\b(modify[_ -]?code|patch|edit|update|change|implement|repair)\b", text):
930
+ return 3, "SWE-CI prior: incremental requirement-backed code modification"
931
+ elif profile == "swe-prbench":
932
+ if re.search(r"\b(pr|pull|request|diff|patch|hunk|changed|files?|review|comment|read|inspect|list|search|get|query)\b", text):
933
+ return 6, "SWE-PRBench prior: inspect PR metadata and changed diff before broad context"
934
+ if re.search(r"\b(test|verify|repro|run|check|typecheck|lint|unit)\b", text):
935
+ return 4, "SWE-PRBench prior: verify suspected review findings when feasible"
936
+ if re.search(r"\b(finish|message|answer|respond|final|review)\b", text):
937
+ return 3, "SWE-PRBench prior: deliver severity-rated review findings once evidence is sufficient"
938
+ if re.search(r"\b(edit|patch|modify|update|write|apply)\b", text):
939
+ return -3, "SWE-PRBench prior: defer code edits unless the review task explicitly asks for patches"
940
+ elif profile == "tml-bench":
941
+ if re.search(r"\b(data|dataset|train|test|sample[_ -]?submission|schema|columns?|target|id|metric|read|inspect|list|search|get|query)\b", text):
942
+ return 6, "TML-Bench prior: establish data contract and submission schema before modeling"
943
+ if re.search(r"\b(validate|validation|split|cv|cross[-_ ]?validation|leakage|baseline|score|metric|check)\b", text):
944
+ return 6, "TML-Bench prior: honest validation and leakage checks before submission"
945
+ if re.search(r"\b(train|fit|model|pipeline|preprocess|feature|predict)\b", text):
946
+ return 4, "TML-Bench prior: build a reliable tabular baseline before complex ensembling"
947
+ if re.search(r"\b(submit|submission|save|write|export|finish|answer|final)\b", text):
948
+ return 4, "TML-Bench prior: produce and validate a schema-compatible submission artifact"
949
+ elif profile == "pi-bench":
950
+ if re.search(r"\b(profile|user|history|message|file|workspace|app|context|state|read|inspect|list|search|get|query)\b", text):
951
+ return 6, "Pi-Bench prior: establish personal/workspace/app context before proactive action"
952
+ if re.search(r"\b(intent|implicit|hidden|latent|need|preference|constraint|policy|privacy|permission|clarif(?:y|ication)|ask)\b", text):
953
+ return 6, "Pi-Bench prior: resolve hidden intent, privacy, and permission uncertainty"
954
+ if re.search(r"\b(tool|action|schedule|send|update|create|modify|book|message|email|calendar|file)\b", text):
955
+ return 4, "Pi-Bench prior: take reversible proactive action only after context is grounded"
956
+ if re.search(r"\b(verify|confirm|observe|check|finish|answer|final|done)\b", text):
957
+ return 4, "Pi-Bench prior: verify observable completion and communicate concise outcome"
958
+ else:
959
+ if re.search(r"\b(observe|read|search|list|get|lookup|inspect|query)\b", text):
960
+ return 4, "generic prior: inspect available state before irreversible actions"
961
+ return 0, ""
962
+
963
+
964
+ def _shortlist_item(
965
+ doc: dict[str, Any],
966
+ score: float,
967
+ reasons: list[str],
968
+ schema_keys: list[str],
969
+ required_keys: list[str],
970
+ required_hints: list[dict[str, str]],
971
+ ) -> dict[str, Any]:
972
+ return {
973
+ "name": str(doc.get("name") or ""),
974
+ "score": round(score, 2),
975
+ "reason": "; ".join(reasons[:4]) or "available action",
976
+ "argument_keys": schema_keys[:12],
977
+ "required_argument_keys": required_keys[:12],
978
+ "available_required_hints": required_hints[:8],
979
+ "is_finish": bool(doc.get("is_finish", False)),
980
+ "is_message": bool(doc.get("is_message", False)),
981
+ }
982
+
983
+
984
+ def extract_action_payload(text: str) -> ActionPayload | None:
985
+ """Return the last valid action payload from cawdex output.
986
+
987
+ Supported shapes:
988
+ {"name": "finish", "arguments": {"answer": "..."}}
989
+ {"action": "finish", "arguments": {"answer": "..."}}
990
+ {"action": {"name": "finish", "arguments": {"answer": "..."}}}
991
+ """
992
+
993
+ for candidate in reversed(_json_candidates(text)):
994
+ payload = _coerce_action_payload(candidate)
995
+ if payload is not None:
996
+ return payload
997
+ return None
998
+
999
+
1000
+ def _json_candidates(text: str) -> list[Any]:
1001
+ candidates: list[Any] = []
1002
+
1003
+ for block in re.findall(r"```(?:json|JSON)?\s*(.*?)```", text or "", flags=re.DOTALL):
1004
+ value = _parse_json(block.strip())
1005
+ if value is not None:
1006
+ candidates.append(value)
1007
+
1008
+ marker_re = re.compile(r"cawdex-exgentic action JSON\s*:\s*(\{.*?\})\s*$", re.IGNORECASE | re.DOTALL)
1009
+ marker = marker_re.search(text or "")
1010
+ if marker:
1011
+ value = _parse_json(marker.group(1))
1012
+ if value is not None:
1013
+ candidates.append(value)
1014
+
1015
+ decoder = json.JSONDecoder()
1016
+ for match in re.finditer(r"\{", text or ""):
1017
+ try:
1018
+ value, _ = decoder.raw_decode(text[match.start() :])
1019
+ except Exception:
1020
+ continue
1021
+ candidates.append(value)
1022
+
1023
+ return candidates
1024
+
1025
+
1026
+ def _parse_json(text: str) -> Any | None:
1027
+ try:
1028
+ return json.loads(text)
1029
+ except Exception:
1030
+ return None
1031
+
1032
+
1033
+ def _coerce_action_payload(value: Any) -> ActionPayload | None:
1034
+ if not isinstance(value, dict):
1035
+ return None
1036
+
1037
+ nested = value.get("action")
1038
+ if isinstance(nested, dict):
1039
+ nested_args = nested.get("arguments")
1040
+ if nested_args is None:
1041
+ nested_args = nested.get("args")
1042
+ value = {
1043
+ "name": nested.get("name") or nested.get("action") or nested.get("tool"),
1044
+ "arguments": nested_args,
1045
+ }
1046
+
1047
+ name = value.get("name") or value.get("action") or value.get("tool")
1048
+ if not isinstance(name, str) or not name.strip():
1049
+ return None
1050
+
1051
+ arguments = value.get("arguments")
1052
+ if arguments is None:
1053
+ arguments = value.get("args")
1054
+ if arguments is None:
1055
+ arguments = value.get("action_input")
1056
+ if arguments is None:
1057
+ arguments = {}
1058
+ if not isinstance(arguments, dict):
1059
+ arguments = {"value": arguments}
1060
+
1061
+ return ActionPayload(name=name.strip(), arguments=arguments)