open-agents-ai 0.187.498 → 0.187.500
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +915 -10
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -255394,14 +255394,26 @@ function buildScaffoldedPrompt(args) {
|
|
|
255394
255394
|
lines.push(bulletList(args.features));
|
|
255395
255395
|
lines.push("");
|
|
255396
255396
|
}
|
|
255397
|
-
|
|
255398
|
-
|
|
255399
|
-
|
|
255400
|
-
|
|
255401
|
-
|
|
255402
|
-
|
|
255403
|
-
|
|
255404
|
-
|
|
255397
|
+
if (args.stack && args.stack.length > 0) {
|
|
255398
|
+
lines.push(`## Tech stack — POSITIVE constraints (use these)`);
|
|
255399
|
+
lines.push(bulletList(args.stack));
|
|
255400
|
+
lines.push("");
|
|
255401
|
+
} else {
|
|
255402
|
+
lines.push(`## Tech stack — POSITIVE constraints`);
|
|
255403
|
+
lines.push(`The caller did not specify positive tech-stack constraints. The spec MUST declare its own tech stack explicitly in section 1 (Overview) — language, runtime, frameworks, persistence, build/test tools — so the downstream implementer has unambiguous targets. Do NOT leave the stack as an open choice.`);
|
|
255404
|
+
lines.push("");
|
|
255405
|
+
}
|
|
255406
|
+
if (args.anti_stack && args.anti_stack.length > 0) {
|
|
255407
|
+
lines.push(`## Tech stack — NEGATIVE constraints (do NOT use these)`);
|
|
255408
|
+
lines.push(`These prohibitions are load-bearing. Default training data favors popular alternatives; you must respect the explicit "no" here even when a forbidden choice would be conventional.`);
|
|
255409
|
+
lines.push("");
|
|
255410
|
+
lines.push(bulletList(args.anti_stack));
|
|
255411
|
+
lines.push("");
|
|
255412
|
+
} else {
|
|
255413
|
+
lines.push(`## Tech stack — NEGATIVE constraints`);
|
|
255414
|
+
lines.push(`No explicit prohibitions supplied. The spec is free to choose any tooling consistent with the positive constraints, but the spec itself SHOULD include a brief "anti-requirements" section (see section 11) listing the patterns/libraries explicitly excluded — even if the caller did not pre-specify them.`);
|
|
255415
|
+
lines.push("");
|
|
255416
|
+
}
|
|
255405
255417
|
if (args.notes && args.notes.trim().length > 0) {
|
|
255406
255418
|
lines.push(`## Additional constraints / notes`);
|
|
255407
255419
|
lines.push(args.notes.trim());
|
|
@@ -514617,6 +514629,23 @@ function renderCriticPrompt(inputs) {
|
|
|
514617
514629
|
lines.push(`9. **Unresolved failures**: stems with attempts ≥ 3 that never cleared.`);
|
|
514618
514630
|
lines.push(`10. **Generic-vs-specific drift**: code claims to be generic but contains`);
|
|
514619
514631
|
lines.push(` framework- or vendor-specific keywords.`);
|
|
514632
|
+
lines.push(`11. **Backtest evidence for new code (CRITICAL — recurring failure mode)**: For any`);
|
|
514633
|
+
lines.push(` NEWLY ADDED tool, module, public function, or prompt-generation feature,`);
|
|
514634
|
+
lines.push(` the implementer MUST have invoked the new code with realistic input and`);
|
|
514635
|
+
lines.push(` inspected the actual rendered/returned output. Unit-test pass is necessary`);
|
|
514636
|
+
lines.push(` but NOT sufficient — unit tests verify the implementer's own assertions,`);
|
|
514637
|
+
lines.push(` not whether the output is human-usable. Look for evidence in the recent`);
|
|
514638
|
+
lines.push(` tool calls of: a node/python/shell invocation that exercises the new code`);
|
|
514639
|
+
lines.push(` with realistic args, OR a manual-inspection step (cat / file_read of the`);
|
|
514640
|
+
lines.push(` output, head/tail of generated content). If you find ONLY unit-test`);
|
|
514641
|
+
lines.push(` evidence and no realistic-invocation evidence for new code, this is a`);
|
|
514642
|
+
lines.push(` \`request_changes\` regardless of test count. Tests passing while output is`);
|
|
514643
|
+
lines.push(` nonsense is a recurring class of negligence this gate exists to catch.`);
|
|
514644
|
+
lines.push(`12. **Empty-section / contradiction sweep**: For any output the implementer`);
|
|
514645
|
+
lines.push(` generates programmatically (prompts, configs, docs), check for sections`);
|
|
514646
|
+
lines.push(` that say "(none specified)" / "(no items)" / "(empty)" alongside framing`);
|
|
514647
|
+
lines.push(` text that asserts the section IS load-bearing. These contradictions`);
|
|
514648
|
+
lines.push(` indicate the generator wasn't tested with the empty / minimal-input case.`);
|
|
514620
514649
|
lines.push(``);
|
|
514621
514650
|
lines.push(`Do NOT flag:`);
|
|
514622
514651
|
lines.push(`- Stylistic choices (formatting, naming) unless they hide a real bug.`);
|
|
@@ -514910,6 +514939,472 @@ var init_backward_pass_runner = __esm({
|
|
|
514910
514939
|
}
|
|
514911
514940
|
});
|
|
514912
514941
|
|
|
514942
|
+
// packages/orchestrator/dist/stuck-meta-analyzer.js
|
|
514943
|
+
function renderAnalyzerPrompt(inputs) {
|
|
514944
|
+
const lines = [];
|
|
514945
|
+
lines.push(`# STUCK-STATE META-ANALYSIS`);
|
|
514946
|
+
lines.push(``);
|
|
514947
|
+
lines.push(`You are a META-ANALYSIS sub-agent. Another agent (the implementer) is`);
|
|
514948
|
+
lines.push(`stuck in an unproductive tool-call loop and the runtime's structural`);
|
|
514949
|
+
lines.push(`stuck-detector has fired. Your job: examine the loop + state below and`);
|
|
514950
|
+
lines.push(`return ONE specific next tool call that will unblock the implementer.`);
|
|
514951
|
+
lines.push(``);
|
|
514952
|
+
lines.push(`## Context`);
|
|
514953
|
+
lines.push(`Goal: ${inputs.goal.slice(0, 600)}`);
|
|
514954
|
+
lines.push(`Working directory: ${inputs.workingDir}`);
|
|
514955
|
+
lines.push(`Trigger: ${inputs.triggerReason} (turn ${inputs.turn})`);
|
|
514956
|
+
if (inputs.workspaceSummary) {
|
|
514957
|
+
lines.push(``);
|
|
514958
|
+
lines.push(`## Workspace summary`);
|
|
514959
|
+
lines.push(inputs.workspaceSummary.slice(0, 1500));
|
|
514960
|
+
}
|
|
514961
|
+
lines.push(``);
|
|
514962
|
+
lines.push(`## Plan status (reconciled against disk)`);
|
|
514963
|
+
if (inputs.planStatus.length === 0) {
|
|
514964
|
+
lines.push(`(no plan items declared)`);
|
|
514965
|
+
} else {
|
|
514966
|
+
for (const t2 of inputs.planStatus.slice(0, 12)) {
|
|
514967
|
+
lines.push(` [${t2.reconciled}] ${t2.content.slice(0, 100)} — ${t2.rationale.slice(0, 120)}`);
|
|
514968
|
+
}
|
|
514969
|
+
}
|
|
514970
|
+
lines.push(``);
|
|
514971
|
+
lines.push(`## Recent unresolved failures`);
|
|
514972
|
+
if (inputs.recentFailures.length === 0) {
|
|
514973
|
+
lines.push(`(none)`);
|
|
514974
|
+
} else {
|
|
514975
|
+
for (const f2 of inputs.recentFailures.slice(0, 5)) {
|
|
514976
|
+
lines.push(` - ${f2.stem} (attempts=${f2.attempts}): ${f2.preview.slice(0, 200)}`);
|
|
514977
|
+
}
|
|
514978
|
+
}
|
|
514979
|
+
lines.push(``);
|
|
514980
|
+
lines.push(`## The loop pattern (recent tool calls, oldest first)`);
|
|
514981
|
+
if (inputs.recentToolCalls.length === 0) {
|
|
514982
|
+
lines.push(`(no recent calls)`);
|
|
514983
|
+
} else {
|
|
514984
|
+
const recent = inputs.recentToolCalls.slice(-30);
|
|
514985
|
+
for (const c9 of recent) {
|
|
514986
|
+
const status = c9.success === false ? "FAIL" : "OK";
|
|
514987
|
+
const args = c9.argsKey ? ` ${c9.argsKey}` : "";
|
|
514988
|
+
const preview = c9.outputPreview ? ` → "${c9.outputPreview.slice(0, 100)}"` : "";
|
|
514989
|
+
lines.push(` ${c9.name}${args} [${status}]${preview}`);
|
|
514990
|
+
}
|
|
514991
|
+
}
|
|
514992
|
+
lines.push(``);
|
|
514993
|
+
if (inputs.availableTools && inputs.availableTools.length > 0) {
|
|
514994
|
+
lines.push(`## Tools available to the implementer`);
|
|
514995
|
+
lines.push(inputs.availableTools.slice(0, 60).join(", "));
|
|
514996
|
+
lines.push(``);
|
|
514997
|
+
}
|
|
514998
|
+
lines.push(`## Your task`);
|
|
514999
|
+
lines.push(``);
|
|
515000
|
+
lines.push(`Diagnose the loop in 1 sentence (what specific category of un-`);
|
|
515001
|
+
lines.push(`productive activity is happening?). Then emit ONE concrete next`);
|
|
515002
|
+
lines.push(`tool call the implementer should make. Do NOT emit a list of`);
|
|
515003
|
+
lines.push(`alternatives. Do NOT emit categories like "PRODUCE" or "EDIT" —`);
|
|
515004
|
+
lines.push(`emit the actual tool name and the actual args (with concrete`);
|
|
515005
|
+
lines.push(`paths and a content seed when applicable).`);
|
|
515006
|
+
lines.push(``);
|
|
515007
|
+
lines.push(`Universal rules for the directive:`);
|
|
515008
|
+
lines.push(`- Use only tools the implementer has access to.`);
|
|
515009
|
+
lines.push(`- The next_action MUST produce new state on disk (file_write,`);
|
|
515010
|
+
lines.push(` file_edit, batch_edit, file_patch, shell mutation, or similar).`);
|
|
515011
|
+
lines.push(` If the loop is read-heavy, the unblocker is virtually always a`);
|
|
515012
|
+
lines.push(` write of some kind.`);
|
|
515013
|
+
lines.push(`- The args_seed must contain enough content that the implementer`);
|
|
515014
|
+
lines.push(` can apply or refine it directly. For file writes, the args_seed`);
|
|
515015
|
+
lines.push(` MUST include a 'content' field with at least skeleton text`);
|
|
515016
|
+
lines.push(` (function signatures, imports, key structures). For shell calls,`);
|
|
515017
|
+
lines.push(` include the exact command.`);
|
|
515018
|
+
lines.push(`- The anti_pattern must name the SPECIFIC repeated activity to stop`);
|
|
515019
|
+
lines.push(` (e.g. "list_directory of /tests/* repeatedly with no writes"),`);
|
|
515020
|
+
lines.push(` not just "stop being stuck".`);
|
|
515021
|
+
lines.push(`- The verification must be a concrete check (a tool call OR an`);
|
|
515022
|
+
lines.push(` expected state change) the implementer runs after the action.`);
|
|
515023
|
+
lines.push(``);
|
|
515024
|
+
lines.push(`## Output format`);
|
|
515025
|
+
lines.push(``);
|
|
515026
|
+
lines.push(`Reason briefly (1-3 sentences) about the loop, then emit a SINGLE`);
|
|
515027
|
+
lines.push(`JSON code block with this exact shape:`);
|
|
515028
|
+
lines.push(``);
|
|
515029
|
+
lines.push("```json");
|
|
515030
|
+
lines.push(`{`);
|
|
515031
|
+
lines.push(` "diagnosis": "<1-sentence root cause>",`);
|
|
515032
|
+
lines.push(` "next_action": {`);
|
|
515033
|
+
lines.push(` "tool": "<exact tool name from the available list>",`);
|
|
515034
|
+
lines.push(` "args_seed": { /* concrete args; for writes, include 'path' + 'content' seed */ },`);
|
|
515035
|
+
lines.push(` "rationale": "<why this unblocks>"`);
|
|
515036
|
+
lines.push(` },`);
|
|
515037
|
+
lines.push(` "anti_pattern": "<the specific loop activity to stop>",`);
|
|
515038
|
+
lines.push(` "verification": "<concrete check after the action>"`);
|
|
515039
|
+
lines.push(`}`);
|
|
515040
|
+
lines.push("```");
|
|
515041
|
+
lines.push(``);
|
|
515042
|
+
lines.push(`Be SPECIFIC. Vague directives are useless to a stuck implementer.`);
|
|
515043
|
+
return lines.join("\n");
|
|
515044
|
+
}
|
|
515045
|
+
function parseDirective(rawResponse) {
|
|
515046
|
+
const fallback = (msg) => ({
|
|
515047
|
+
diagnosis: `(meta-analyzer parse failed: ${msg})`,
|
|
515048
|
+
next_action: {
|
|
515049
|
+
tool: "(unknown)",
|
|
515050
|
+
args_seed: {},
|
|
515051
|
+
rationale: "Parser fell back; directive should not be injected."
|
|
515052
|
+
},
|
|
515053
|
+
anti_pattern: "(unknown)",
|
|
515054
|
+
verification: "(unknown)",
|
|
515055
|
+
raw: rawResponse,
|
|
515056
|
+
parseFallback: true
|
|
515057
|
+
});
|
|
515058
|
+
if (!rawResponse || typeof rawResponse !== "string" || rawResponse.trim().length === 0) {
|
|
515059
|
+
return fallback("empty response");
|
|
515060
|
+
}
|
|
515061
|
+
const fenceMatch = rawResponse.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
|
|
515062
|
+
let jsonText = null;
|
|
515063
|
+
if (fenceMatch) {
|
|
515064
|
+
jsonText = fenceMatch[1].trim();
|
|
515065
|
+
} else {
|
|
515066
|
+
const first2 = rawResponse.indexOf("{");
|
|
515067
|
+
const last2 = rawResponse.lastIndexOf("}");
|
|
515068
|
+
if (first2 !== -1 && last2 > first2)
|
|
515069
|
+
jsonText = rawResponse.slice(first2, last2 + 1);
|
|
515070
|
+
}
|
|
515071
|
+
if (!jsonText)
|
|
515072
|
+
return fallback("no JSON block found");
|
|
515073
|
+
let parsed;
|
|
515074
|
+
try {
|
|
515075
|
+
parsed = JSON.parse(jsonText);
|
|
515076
|
+
} catch (e2) {
|
|
515077
|
+
return fallback(`JSON parse: ${e2 instanceof Error ? e2.message : String(e2)}`);
|
|
515078
|
+
}
|
|
515079
|
+
if (!parsed || typeof parsed !== "object")
|
|
515080
|
+
return fallback("not an object");
|
|
515081
|
+
const diagnosis = typeof parsed.diagnosis === "string" && parsed.diagnosis.trim().length > 0 ? parsed.diagnosis.slice(0, 400) : "";
|
|
515082
|
+
const next = parsed.next_action;
|
|
515083
|
+
const tool = next && typeof next.tool === "string" ? next.tool.trim() : "";
|
|
515084
|
+
const args_seed = next && typeof next.args_seed === "object" && next.args_seed !== null ? next.args_seed : {};
|
|
515085
|
+
const rationale = next && typeof next.rationale === "string" ? next.rationale.slice(0, 400) : "";
|
|
515086
|
+
const anti_pattern = typeof parsed.anti_pattern === "string" ? parsed.anti_pattern.slice(0, 400) : "";
|
|
515087
|
+
const verification = typeof parsed.verification === "string" ? parsed.verification.slice(0, 400) : "";
|
|
515088
|
+
if (!diagnosis || !tool || !anti_pattern || !verification) {
|
|
515089
|
+
return fallback("missing required fields");
|
|
515090
|
+
}
|
|
515091
|
+
return {
|
|
515092
|
+
diagnosis,
|
|
515093
|
+
next_action: { tool, args_seed, rationale },
|
|
515094
|
+
anti_pattern,
|
|
515095
|
+
verification,
|
|
515096
|
+
raw: rawResponse
|
|
515097
|
+
};
|
|
515098
|
+
}
|
|
515099
|
+
function renderDirectiveAsMessage(d2) {
|
|
515100
|
+
if (d2.parseFallback) {
|
|
515101
|
+
return "";
|
|
515102
|
+
}
|
|
515103
|
+
const lines = [];
|
|
515104
|
+
lines.push(`[STUCK-STATE META-ANALYZER — REG-49]`);
|
|
515105
|
+
lines.push(``);
|
|
515106
|
+
lines.push(`A meta-analyzer sub-agent reviewed the recent tool-call pattern, the`);
|
|
515107
|
+
lines.push(`current world state, and the plan; it produced a single concrete`);
|
|
515108
|
+
lines.push(`unblocking action for you to take.`);
|
|
515109
|
+
lines.push(``);
|
|
515110
|
+
lines.push(`DIAGNOSIS: ${d2.diagnosis}`);
|
|
515111
|
+
lines.push(``);
|
|
515112
|
+
lines.push(`STOP DOING (anti-pattern): ${d2.anti_pattern}`);
|
|
515113
|
+
lines.push(``);
|
|
515114
|
+
lines.push(`DO NEXT:`);
|
|
515115
|
+
lines.push(` Tool: ${d2.next_action.tool}`);
|
|
515116
|
+
const argsJson = JSON.stringify(d2.next_action.args_seed, null, 2);
|
|
515117
|
+
lines.push(` Args:`);
|
|
515118
|
+
for (const ln of argsJson.split("\n"))
|
|
515119
|
+
lines.push(` ${ln}`);
|
|
515120
|
+
lines.push(` Rationale: ${d2.next_action.rationale}`);
|
|
515121
|
+
lines.push(``);
|
|
515122
|
+
lines.push(`AFTER THE ACTION, verify with: ${d2.verification}`);
|
|
515123
|
+
lines.push(``);
|
|
515124
|
+
lines.push(`This directive comes from a meta-analysis of YOUR recent activity. The`);
|
|
515125
|
+
lines.push(`args above are a SEED — refine them as needed (filenames, content) but`);
|
|
515126
|
+
lines.push(`emit a tool call of this kind on your next response. Do NOT emit`);
|
|
515127
|
+
lines.push(`another instance of the anti-pattern; that loop has been blocked.`);
|
|
515128
|
+
return lines.join("\n");
|
|
515129
|
+
}
|
|
515130
|
+
async function runStuckAnalyzer(opts) {
|
|
515131
|
+
const startMs = Date.now();
|
|
515132
|
+
const prompt = renderAnalyzerPrompt(opts.inputs);
|
|
515133
|
+
const promptBytes = Buffer.byteLength(prompt, "utf-8");
|
|
515134
|
+
let raw = "";
|
|
515135
|
+
try {
|
|
515136
|
+
raw = await opts.callable(prompt);
|
|
515137
|
+
} catch (e2) {
|
|
515138
|
+
raw = "";
|
|
515139
|
+
}
|
|
515140
|
+
const responseBytes = Buffer.byteLength(raw, "utf-8");
|
|
515141
|
+
const directive = parseDirective(raw);
|
|
515142
|
+
const injection = renderDirectiveAsMessage(directive);
|
|
515143
|
+
return {
|
|
515144
|
+
directive,
|
|
515145
|
+
injection,
|
|
515146
|
+
promptBytes,
|
|
515147
|
+
responseBytes,
|
|
515148
|
+
durationMs: Date.now() - startMs
|
|
515149
|
+
};
|
|
515150
|
+
}
|
|
515151
|
+
var init_stuck_meta_analyzer = __esm({
|
|
515152
|
+
"packages/orchestrator/dist/stuck-meta-analyzer.js"() {
|
|
515153
|
+
"use strict";
|
|
515154
|
+
}
|
|
515155
|
+
});
|
|
515156
|
+
|
|
515157
|
+
// packages/orchestrator/dist/problem-frame-validator.js
|
|
515158
|
+
function renderFrameValidatorPrompt(inputs) {
|
|
515159
|
+
const lines = [];
|
|
515160
|
+
lines.push(`# PROBLEM-FRAME VALIDATION`);
|
|
515161
|
+
lines.push(``);
|
|
515162
|
+
lines.push(`You are a META-STRATEGY VALIDATOR. Another agent (the implementer)`);
|
|
515163
|
+
lines.push(`is working on a task. You are NOT here to suggest a next tool call.`);
|
|
515164
|
+
lines.push(`You are here to ask the deeper question:`);
|
|
515165
|
+
lines.push(``);
|
|
515166
|
+
lines.push(` > "Is the implementer solving the right problem?"`);
|
|
515167
|
+
lines.push(``);
|
|
515168
|
+
lines.push(`Specifically, evaluate three things:`);
|
|
515169
|
+
lines.push(``);
|
|
515170
|
+
lines.push(` 1. **Goal-vs-subtask alignment**: does the current sub-task`);
|
|
515171
|
+
lines.push(` genuinely contribute to the original goal, or has the agent`);
|
|
515172
|
+
lines.push(` drifted into a side-quest that doesn't matter?`);
|
|
515173
|
+
lines.push(``);
|
|
515174
|
+
lines.push(` 2. **Frame ambition**: is the current frame too ambitious for`);
|
|
515175
|
+
lines.push(` the implementer's context? Is there a simpler version of`);
|
|
515176
|
+
lines.push(` the same sub-task that would still satisfy the goal?`);
|
|
515177
|
+
lines.push(``);
|
|
515178
|
+
lines.push(` 3. **Productive blockedness**: if the agent has been stuck on`);
|
|
515179
|
+
lines.push(` this sub-task for a while, is there a fundamentally`);
|
|
515180
|
+
lines.push(` different approach that would unblock progress, OR is`);
|
|
515181
|
+
lines.push(` the right move to declare this specific sub-task blocked`);
|
|
515182
|
+
lines.push(` and move on / escalate to the user?`);
|
|
515183
|
+
lines.push(``);
|
|
515184
|
+
lines.push(`## Goal (the immutable anchor)`);
|
|
515185
|
+
lines.push(inputs.goal.slice(0, 800));
|
|
515186
|
+
lines.push(``);
|
|
515187
|
+
lines.push(`## Working directory`);
|
|
515188
|
+
lines.push(inputs.workingDir);
|
|
515189
|
+
lines.push(``);
|
|
515190
|
+
lines.push(`## Current sub-task`);
|
|
515191
|
+
if (inputs.currentSubtask.trim().length > 0) {
|
|
515192
|
+
lines.push(inputs.currentSubtask.slice(0, 400));
|
|
515193
|
+
} else {
|
|
515194
|
+
lines.push(`(no in-progress sub-task identified — agent may be drifting)`);
|
|
515195
|
+
}
|
|
515196
|
+
lines.push(``);
|
|
515197
|
+
lines.push(`## Plan status`);
|
|
515198
|
+
if (inputs.planStatus.length === 0) {
|
|
515199
|
+
lines.push(`(no plan items declared)`);
|
|
515200
|
+
} else {
|
|
515201
|
+
for (const t2 of inputs.planStatus.slice(0, 12)) {
|
|
515202
|
+
lines.push(` [${t2.status}] ${t2.content.slice(0, 100)}`);
|
|
515203
|
+
}
|
|
515204
|
+
}
|
|
515205
|
+
lines.push(``);
|
|
515206
|
+
lines.push(`## Recent activity summary`);
|
|
515207
|
+
lines.push(inputs.recentActivitySummary.slice(0, 2e3));
|
|
515208
|
+
lines.push(``);
|
|
515209
|
+
lines.push(`## Triggers`);
|
|
515210
|
+
lines.push(` - Trigger reason: ${inputs.triggerReason}`);
|
|
515211
|
+
lines.push(` - Stuck-event count this run: ${inputs.recentStuckCount}`);
|
|
515212
|
+
lines.push(` - Turn: ${inputs.turn}`);
|
|
515213
|
+
lines.push(``);
|
|
515214
|
+
lines.push(`## Your verdict — pick ONE`);
|
|
515215
|
+
lines.push(``);
|
|
515216
|
+
lines.push(`Choose the verdict that best matches the situation:`);
|
|
515217
|
+
lines.push(``);
|
|
515218
|
+
lines.push(` - **continue**: the current frame is right. The agent should`);
|
|
515219
|
+
lines.push(` keep going. Use this when goal-subtask alignment is good`);
|
|
515220
|
+
lines.push(` and progress is reasonable for the difficulty.`);
|
|
515221
|
+
lines.push(``);
|
|
515222
|
+
lines.push(` - **simplify**: the current frame is correct in direction but`);
|
|
515223
|
+
lines.push(` too ambitious. Suggest a SMALLER version of the same`);
|
|
515224
|
+
lines.push(` sub-task that still satisfies the goal. Common case:`);
|
|
515225
|
+
lines.push(` agent is over-engineering a config when defaults would`);
|
|
515226
|
+
lines.push(` work, or writing comprehensive tests when 1-2 smoke tests`);
|
|
515227
|
+
lines.push(` would unblock.`);
|
|
515228
|
+
lines.push(``);
|
|
515229
|
+
lines.push(` - **pivot**: the current frame is WRONG. The sub-task the`);
|
|
515230
|
+
lines.push(` agent is on does not contribute to the goal, or there is`);
|
|
515231
|
+
lines.push(` a fundamentally better approach. Common case: agent is`);
|
|
515232
|
+
lines.push(` fixing a custom config that should be deleted; agent is`);
|
|
515233
|
+
lines.push(` writing tests against an API that needs changing, not`);
|
|
515234
|
+
lines.push(` accommodating.`);
|
|
515235
|
+
lines.push(``);
|
|
515236
|
+
lines.push(` - **declare-blocked**: there is no productive frame from this`);
|
|
515237
|
+
lines.push(` state. The current sub-task genuinely cannot be completed`);
|
|
515238
|
+
lines.push(` with the current information; the agent should call`);
|
|
515239
|
+
lines.push(` task_complete with the specific blocker named (NOT a`);
|
|
515240
|
+
lines.push(` polite "looks done"). Reserve for genuine dead-ends.`);
|
|
515241
|
+
lines.push(``);
|
|
515242
|
+
lines.push(`Do NOT default to continue out of politeness. If the recent`);
|
|
515243
|
+
lines.push(`activity shows the agent spinning on a sub-problem (especially`);
|
|
515244
|
+
lines.push(`if stuck-event count > 1), seriously consider whether simplify`);
|
|
515245
|
+
lines.push(`or pivot would help.`);
|
|
515246
|
+
lines.push(``);
|
|
515247
|
+
lines.push(`## Output format`);
|
|
515248
|
+
lines.push(``);
|
|
515249
|
+
lines.push(`Reason briefly (1-3 sentences) about goal alignment, then emit`);
|
|
515250
|
+
lines.push(`a SINGLE JSON code block with this exact shape:`);
|
|
515251
|
+
lines.push(``);
|
|
515252
|
+
lines.push("```json");
|
|
515253
|
+
lines.push(`{`);
|
|
515254
|
+
lines.push(` "verdict": "continue" | "simplify" | "pivot" | "declare-blocked",`);
|
|
515255
|
+
lines.push(` "rationale": "<1-2 sentences>",`);
|
|
515256
|
+
lines.push(` "recommended_frame": { /* OMIT for continue and declare-blocked */`);
|
|
515257
|
+
lines.push(` "new_subtask": "<concrete description of the better sub-task>",`);
|
|
515258
|
+
lines.push(` "why_better": "<why this frame is more tractable / aligned>",`);
|
|
515259
|
+
lines.push(` "success_criterion": "<what 'done' looks like for the new frame>"`);
|
|
515260
|
+
lines.push(` },`);
|
|
515261
|
+
lines.push(` "blocker_summary": "<for declare-blocked only — name the specific blocker>"`);
|
|
515262
|
+
lines.push(`}`);
|
|
515263
|
+
lines.push("```");
|
|
515264
|
+
lines.push(``);
|
|
515265
|
+
lines.push(`Be SPECIFIC. Vague verdicts are useless.`);
|
|
515266
|
+
return lines.join("\n");
|
|
515267
|
+
}
|
|
515268
|
+
function parseFrameVerdict(rawResponse) {
|
|
515269
|
+
const fallback = (msg) => ({
|
|
515270
|
+
verdict: "continue",
|
|
515271
|
+
// Safe default: don't disrupt agent on parser error
|
|
515272
|
+
rationale: `(frame-validator parse failed: ${msg})`,
|
|
515273
|
+
raw: rawResponse,
|
|
515274
|
+
parseFallback: true
|
|
515275
|
+
});
|
|
515276
|
+
if (!rawResponse || typeof rawResponse !== "string" || rawResponse.trim().length === 0) {
|
|
515277
|
+
return fallback("empty response");
|
|
515278
|
+
}
|
|
515279
|
+
const fenceMatch = rawResponse.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
|
|
515280
|
+
let jsonText = null;
|
|
515281
|
+
if (fenceMatch) {
|
|
515282
|
+
jsonText = fenceMatch[1].trim();
|
|
515283
|
+
} else {
|
|
515284
|
+
const first2 = rawResponse.indexOf("{");
|
|
515285
|
+
const last2 = rawResponse.lastIndexOf("}");
|
|
515286
|
+
if (first2 !== -1 && last2 > first2)
|
|
515287
|
+
jsonText = rawResponse.slice(first2, last2 + 1);
|
|
515288
|
+
}
|
|
515289
|
+
if (!jsonText)
|
|
515290
|
+
return fallback("no JSON block found");
|
|
515291
|
+
let parsed;
|
|
515292
|
+
try {
|
|
515293
|
+
parsed = JSON.parse(jsonText);
|
|
515294
|
+
} catch (e2) {
|
|
515295
|
+
return fallback(`JSON parse: ${e2 instanceof Error ? e2.message : String(e2)}`);
|
|
515296
|
+
}
|
|
515297
|
+
if (!parsed || typeof parsed !== "object")
|
|
515298
|
+
return fallback("not an object");
|
|
515299
|
+
const rawVerdict = String(parsed.verdict ?? "").toLowerCase().trim();
|
|
515300
|
+
let verdict;
|
|
515301
|
+
if (rawVerdict === "continue" || rawVerdict === "ok" || rawVerdict === "stay")
|
|
515302
|
+
verdict = "continue";
|
|
515303
|
+
else if (rawVerdict === "simplify" || rawVerdict === "shrink" || rawVerdict === "minimize")
|
|
515304
|
+
verdict = "simplify";
|
|
515305
|
+
else if (rawVerdict === "pivot" || rawVerdict === "change" || rawVerdict === "redirect")
|
|
515306
|
+
verdict = "pivot";
|
|
515307
|
+
else if (rawVerdict === "declare-blocked" || rawVerdict === "declare_blocked" || rawVerdict === "blocked" || rawVerdict === "escalate")
|
|
515308
|
+
verdict = "declare-blocked";
|
|
515309
|
+
else
|
|
515310
|
+
return fallback(`unrecognized verdict: ${rawVerdict}`);
|
|
515311
|
+
const rationale = typeof parsed.rationale === "string" && parsed.rationale.length > 0 ? parsed.rationale.slice(0, 600) : "(no rationale)";
|
|
515312
|
+
let recommended_frame;
|
|
515313
|
+
if (verdict === "simplify" || verdict === "pivot") {
|
|
515314
|
+
const rf = parsed.recommended_frame;
|
|
515315
|
+
if (rf && typeof rf === "object") {
|
|
515316
|
+
const newSub = typeof rf.new_subtask === "string" ? rf.new_subtask.slice(0, 400) : "";
|
|
515317
|
+
const whyBetter = typeof rf.why_better === "string" ? rf.why_better.slice(0, 400) : "";
|
|
515318
|
+
const success = typeof rf.success_criterion === "string" ? rf.success_criterion.slice(0, 400) : "";
|
|
515319
|
+
if (newSub.length > 0) {
|
|
515320
|
+
recommended_frame = { new_subtask: newSub, why_better: whyBetter, success_criterion: success };
|
|
515321
|
+
}
|
|
515322
|
+
}
|
|
515323
|
+
if (!recommended_frame) {
|
|
515324
|
+
return fallback(`${verdict} verdict missing required recommended_frame`);
|
|
515325
|
+
}
|
|
515326
|
+
}
|
|
515327
|
+
let blocker_summary;
|
|
515328
|
+
if (verdict === "declare-blocked") {
|
|
515329
|
+
blocker_summary = typeof parsed.blocker_summary === "string" && parsed.blocker_summary.length > 0 ? parsed.blocker_summary.slice(0, 600) : void 0;
|
|
515330
|
+
if (!blocker_summary) {
|
|
515331
|
+
return fallback(`declare-blocked verdict missing required blocker_summary`);
|
|
515332
|
+
}
|
|
515333
|
+
}
|
|
515334
|
+
return {
|
|
515335
|
+
verdict,
|
|
515336
|
+
rationale,
|
|
515337
|
+
recommended_frame,
|
|
515338
|
+
blocker_summary,
|
|
515339
|
+
raw: rawResponse
|
|
515340
|
+
};
|
|
515341
|
+
}
|
|
515342
|
+
function renderVerdictAsMessage(v) {
|
|
515343
|
+
if (v.parseFallback)
|
|
515344
|
+
return "";
|
|
515345
|
+
if (v.verdict === "continue")
|
|
515346
|
+
return "";
|
|
515347
|
+
const lines = [];
|
|
515348
|
+
lines.push(`[PROBLEM-FRAME VALIDATION — REG-51 — ${v.verdict.toUpperCase()}]`);
|
|
515349
|
+
lines.push(``);
|
|
515350
|
+
lines.push(`A meta-strategy validator reviewed your goal, current sub-task, and recent`);
|
|
515351
|
+
lines.push(`activity. Verdict:`);
|
|
515352
|
+
lines.push(``);
|
|
515353
|
+
lines.push(` ${v.rationale}`);
|
|
515354
|
+
lines.push(``);
|
|
515355
|
+
if (v.verdict === "simplify" || v.verdict === "pivot") {
|
|
515356
|
+
const rf = v.recommended_frame;
|
|
515357
|
+
const verb = v.verdict === "simplify" ? "SHRINK YOUR CURRENT FRAME" : "PIVOT TO A DIFFERENT FRAME";
|
|
515358
|
+
lines.push(`${verb}:`);
|
|
515359
|
+
lines.push(``);
|
|
515360
|
+
lines.push(` New sub-task: ${rf.new_subtask}`);
|
|
515361
|
+
lines.push(` Why this is better: ${rf.why_better}`);
|
|
515362
|
+
lines.push(` Done when: ${rf.success_criterion}`);
|
|
515363
|
+
lines.push(``);
|
|
515364
|
+
lines.push(`Update your todo list to reflect this new framing on your next response.`);
|
|
515365
|
+
lines.push(`Then take ONE concrete action toward the new sub-task. Do NOT continue`);
|
|
515366
|
+
lines.push(`the previous approach — it has been judged ${v.verdict === "pivot" ? "wrong" : "too ambitious"} for`);
|
|
515367
|
+
lines.push(`your current state.`);
|
|
515368
|
+
} else if (v.verdict === "declare-blocked") {
|
|
515369
|
+
lines.push(`DECLARE BLOCKED:`);
|
|
515370
|
+
lines.push(``);
|
|
515371
|
+
lines.push(` ${v.blocker_summary}`);
|
|
515372
|
+
lines.push(``);
|
|
515373
|
+
lines.push(`Call task_complete with a summary that names this blocker EXPLICITLY. Do`);
|
|
515374
|
+
lines.push(`not pretend the work is done; do not keep iterating. The validator has`);
|
|
515375
|
+
lines.push(`determined no productive frame exists from this state. Surface the`);
|
|
515376
|
+
lines.push(`blocker and let the user / next agent intervene.`);
|
|
515377
|
+
}
|
|
515378
|
+
return lines.join("\n");
|
|
515379
|
+
}
|
|
515380
|
+
async function runFrameValidator(opts) {
|
|
515381
|
+
const startMs = Date.now();
|
|
515382
|
+
const prompt = renderFrameValidatorPrompt(opts.inputs);
|
|
515383
|
+
const promptBytes = Buffer.byteLength(prompt, "utf-8");
|
|
515384
|
+
let raw = "";
|
|
515385
|
+
try {
|
|
515386
|
+
raw = await opts.callable(prompt);
|
|
515387
|
+
} catch (e2) {
|
|
515388
|
+
raw = "";
|
|
515389
|
+
void e2;
|
|
515390
|
+
}
|
|
515391
|
+
const responseBytes = Buffer.byteLength(raw, "utf-8");
|
|
515392
|
+
const verdict = parseFrameVerdict(raw);
|
|
515393
|
+
const injection = renderVerdictAsMessage(verdict);
|
|
515394
|
+
return {
|
|
515395
|
+
verdict,
|
|
515396
|
+
injection,
|
|
515397
|
+
promptBytes,
|
|
515398
|
+
responseBytes,
|
|
515399
|
+
durationMs: Date.now() - startMs
|
|
515400
|
+
};
|
|
515401
|
+
}
|
|
515402
|
+
var init_problem_frame_validator = __esm({
|
|
515403
|
+
"packages/orchestrator/dist/problem-frame-validator.js"() {
|
|
515404
|
+
"use strict";
|
|
515405
|
+
}
|
|
515406
|
+
});
|
|
515407
|
+
|
|
514913
515408
|
// packages/orchestrator/dist/pressure-gate.js
|
|
514914
515409
|
function detectPressure(message2) {
|
|
514915
515410
|
const hasProfanity = PRESSURE_SIGNALS.test(message2);
|
|
@@ -520475,7 +520970,7 @@ function executeHook(hook, env2 = {}) {
|
|
|
520475
520970
|
maxBuffer: 1024 * 1024
|
|
520476
520971
|
// 1MB
|
|
520477
520972
|
});
|
|
520478
|
-
const directive =
|
|
520973
|
+
const directive = parseDirective2(output);
|
|
520479
520974
|
return {
|
|
520480
520975
|
success: true,
|
|
520481
520976
|
output: output.trim(),
|
|
@@ -520493,7 +520988,7 @@ function executeHook(hook, env2 = {}) {
|
|
|
520493
520988
|
};
|
|
520494
520989
|
}
|
|
520495
520990
|
}
|
|
520496
|
-
function
|
|
520991
|
+
function parseDirective2(output) {
|
|
520497
520992
|
const lines = output.split("\n");
|
|
520498
520993
|
for (const line of lines) {
|
|
520499
520994
|
const trimmed = line.trim();
|
|
@@ -521149,6 +521644,8 @@ var init_agenticRunner = __esm({
|
|
|
521149
521644
|
init_world_state_regenerator();
|
|
521150
521645
|
init_backward_pass_runner();
|
|
521151
521646
|
init_world_state_plan_reconciler();
|
|
521647
|
+
init_stuck_meta_analyzer();
|
|
521648
|
+
init_problem_frame_validator();
|
|
521152
521649
|
init_pressure_gate();
|
|
521153
521650
|
init_dist5();
|
|
521154
521651
|
init_dist7();
|
|
@@ -521349,6 +521846,21 @@ var init_agenticRunner = __esm({
|
|
|
521349
521846
|
// turn until they clear. Track which we've surfaced this run so the
|
|
521350
521847
|
// signal doesn't fire >1× per turn per stem.
|
|
521351
521848
|
_stickyEscalationsSurfacedThisTurn = /* @__PURE__ */ new Set();
|
|
521849
|
+
// REG-49b: per-loop-episode dedup flag for SSMA invocation from the
|
|
521850
|
+
// Loop Intervention escalation path. Reset when repetition score
|
|
521851
|
+
// drops below the recovery threshold (0.2).
|
|
521852
|
+
_smaFiredThisLoop = false;
|
|
521853
|
+
// REG-50: same-file write-thrash detector cooldown. Once fired, give
|
|
521854
|
+
// the agent 8 turns to break out before re-firing. Distinct from REG-44
|
|
521855
|
+
// because the failure shape is different (writes >> reads, but stuck
|
|
521856
|
+
// because no verification between writes).
|
|
521857
|
+
_writeThrashCooldownUntilTurn = -1;
|
|
521858
|
+
// REG-51: Problem-Frame Validator state. Periodic checkpoint (every
|
|
521859
|
+
// OA_PFV_INTERVAL turns) + chronic-stuck trigger (when SSMA fire count
|
|
521860
|
+
// hits a threshold). Tracks last-fired turn and cumulative SSMA count
|
|
521861
|
+
// for chronic detection.
|
|
521862
|
+
_lastPfvTurn = -1;
|
|
521863
|
+
_ssmaFiredCount = 0;
|
|
521352
521864
|
// REG-46: world-state regeneration. Replaces stream-based context
|
|
521353
521865
|
// re-derivation (agent re-listing dirs, re-reading specs) with periodic
|
|
521354
521866
|
// injected snapshots of workdir + plan reconciliation + recent failures.
|
|
@@ -524025,8 +524537,318 @@ ${_staleSamples.join("\n")}` : ``,
|
|
|
524025
524537
|
content: `REG-44 STUCK detector fired at turn ${turn} — triggers=[${_trigLabels.join(",")}], reads=${_readCount}, mutations=${_mutationCount}, stale=${_staleCount}, window=${_windowCalls.length}`,
|
|
524026
524538
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
524027
524539
|
});
|
|
524540
|
+
try {
|
|
524541
|
+
const _smaRaw = (process.env["OA_STUCK_META_ANALYZER"] || "off").toLowerCase();
|
|
524542
|
+
const _smaOn = _smaRaw === "on" || _smaRaw === "1" || _smaRaw === "true";
|
|
524543
|
+
if (_smaOn) {
|
|
524544
|
+
const _smaCallable = async (prompt) => {
|
|
524545
|
+
try {
|
|
524546
|
+
const _r = await this.backend.chatCompletion({
|
|
524547
|
+
messages: [
|
|
524548
|
+
{ role: "system", content: "You are a META-ANALYSIS sub-agent. Audit the implementer's stuck state and emit a structured JSON directive." },
|
|
524549
|
+
{ role: "user", content: prompt }
|
|
524550
|
+
],
|
|
524551
|
+
tools: [],
|
|
524552
|
+
temperature: 0,
|
|
524553
|
+
maxTokens: parseInt(process.env["OA_STUCK_META_MAX_TOKENS"] || "2048", 10) || 2048,
|
|
524554
|
+
timeoutMs: parseInt(process.env["OA_STUCK_META_TIMEOUT_MS"] || "120000", 10) || 12e4
|
|
524555
|
+
});
|
|
524556
|
+
const _c = _r?.choices?.[0]?.message?.content;
|
|
524557
|
+
return typeof _c === "string" ? _c : "";
|
|
524558
|
+
} catch {
|
|
524559
|
+
return "";
|
|
524560
|
+
}
|
|
524561
|
+
};
|
|
524562
|
+
const _smaCalls = _windowCalls.slice(-25).map((c9) => ({
|
|
524563
|
+
name: c9.name,
|
|
524564
|
+
argsKey: c9.argsKey,
|
|
524565
|
+
success: c9.success,
|
|
524566
|
+
outputPreview: (c9.outputPreview || "").split(/\r?\n/)[0]?.slice(0, 120) ?? ""
|
|
524567
|
+
}));
|
|
524568
|
+
const _smaPlan = (() => {
|
|
524569
|
+
try {
|
|
524570
|
+
const _todos = this.readSessionTodos() || [];
|
|
524571
|
+
return _todos.slice(0, 12).map((t2) => ({
|
|
524572
|
+
content: t2.content || "",
|
|
524573
|
+
reconciled: t2.status || "pending",
|
|
524574
|
+
rationale: "(reconcile context unavailable here; structural status only)"
|
|
524575
|
+
}));
|
|
524576
|
+
} catch {
|
|
524577
|
+
return [];
|
|
524578
|
+
}
|
|
524579
|
+
})();
|
|
524580
|
+
const _smaFailures = Array.from(this._failureReflections.entries()).map(([stem, entry]) => ({
|
|
524581
|
+
stem,
|
|
524582
|
+
attempts: entry.attempts,
|
|
524583
|
+
preview: (entry.wentWrong || "").slice(0, 200)
|
|
524584
|
+
})).sort((a2, b) => b.attempts - a2.attempts).slice(0, 5);
|
|
524585
|
+
const _smaTools = Array.from(this.tools.keys());
|
|
524586
|
+
this._ssmaFiredCount++;
|
|
524587
|
+
runStuckAnalyzer({
|
|
524588
|
+
inputs: {
|
|
524589
|
+
goal: this._taskState.originalGoal || this._taskState.goal || "",
|
|
524590
|
+
workingDir: this._workingDirectory || process.cwd(),
|
|
524591
|
+
triggerReason: `reg44-${_trigLabels[0] || "unknown"}`,
|
|
524592
|
+
recentToolCalls: _smaCalls,
|
|
524593
|
+
planStatus: _smaPlan,
|
|
524594
|
+
recentFailures: _smaFailures,
|
|
524595
|
+
workspaceSummary: void 0,
|
|
524596
|
+
// world-state regen owns this; analyzer infers from calls
|
|
524597
|
+
availableTools: _smaTools,
|
|
524598
|
+
turn
|
|
524599
|
+
},
|
|
524600
|
+
callable: _smaCallable
|
|
524601
|
+
}).then((_smaResult) => {
|
|
524602
|
+
if (_smaResult.injection && !_smaResult.directive.parseFallback) {
|
|
524603
|
+
messages2.push({ role: "system", content: _smaResult.injection });
|
|
524604
|
+
this.emit({
|
|
524605
|
+
type: "status",
|
|
524606
|
+
content: `REG-49 stuck-meta-analyzer fired at turn ${turn} — diagnosis="${_smaResult.directive.diagnosis.slice(0, 80)}", next=${_smaResult.directive.next_action.tool}, ${_smaResult.durationMs}ms`,
|
|
524607
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
524608
|
+
});
|
|
524609
|
+
} else {
|
|
524610
|
+
this.emit({
|
|
524611
|
+
type: "status",
|
|
524612
|
+
content: `REG-49 stuck-meta-analyzer parse failed at turn ${turn} — falling back to REG-44 abstract halt only`,
|
|
524613
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
524614
|
+
});
|
|
524615
|
+
}
|
|
524616
|
+
}).catch((_e) => {
|
|
524617
|
+
this.emit({
|
|
524618
|
+
type: "status",
|
|
524619
|
+
content: `REG-49 stuck-meta-analyzer threw: ${_e instanceof Error ? _e.message : String(_e)} (non-fatal)`,
|
|
524620
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
524621
|
+
});
|
|
524622
|
+
});
|
|
524623
|
+
}
|
|
524624
|
+
} catch (_e) {
|
|
524625
|
+
}
|
|
524626
|
+
}
|
|
524627
|
+
}
|
|
524628
|
+
}
|
|
524629
|
+
if (turn > this._writeThrashCooldownUntilTurn && turn >= 12) {
|
|
524630
|
+
const _wtWindow = toolCallLog.slice(-15);
|
|
524631
|
+
if (_wtWindow.length >= 12) {
|
|
524632
|
+
const _wtWriteClass = /* @__PURE__ */ new Set([
|
|
524633
|
+
"file_write",
|
|
524634
|
+
"file_edit",
|
|
524635
|
+
"file_patch",
|
|
524636
|
+
"batch_edit",
|
|
524637
|
+
"notebook_edit"
|
|
524638
|
+
]);
|
|
524639
|
+
const _wtFileCounts = /* @__PURE__ */ new Map();
|
|
524640
|
+
for (const c9 of _wtWindow) {
|
|
524641
|
+
if (!_wtWriteClass.has(c9.name))
|
|
524642
|
+
continue;
|
|
524643
|
+
const _ak = c9.argsKey || "";
|
|
524644
|
+
const _m = /(?:^|,)path=([^,]+)/.exec(_ak);
|
|
524645
|
+
const _pk = _m ? _m[1].slice(0, 200) : _ak.slice(0, 200);
|
|
524646
|
+
_wtFileCounts.set(_pk, (_wtFileCounts.get(_pk) ?? 0) + 1);
|
|
524647
|
+
}
|
|
524648
|
+
const _wtThreshold = parseInt(process.env["OA_WRITE_THRASH_THRESHOLD"] || "4", 10) || 4;
|
|
524649
|
+
let _wtWorstPath = "";
|
|
524650
|
+
let _wtWorstCount = 0;
|
|
524651
|
+
for (const [_p, _n] of _wtFileCounts.entries()) {
|
|
524652
|
+
if (_n > _wtWorstCount) {
|
|
524653
|
+
_wtWorstCount = _n;
|
|
524654
|
+
_wtWorstPath = _p;
|
|
524655
|
+
}
|
|
524656
|
+
}
|
|
524657
|
+
const _wtHadSuccessfulVerify = _wtWindow.some((c9) => {
|
|
524658
|
+
if (c9.name !== "shell" || c9.success !== true)
|
|
524659
|
+
return false;
|
|
524660
|
+
const _out = c9.outputPreview || "";
|
|
524661
|
+
return !/error|failed?|exit code [1-9]/i.test(_out);
|
|
524662
|
+
});
|
|
524663
|
+
if (_wtWorstCount >= _wtThreshold && !_wtHadSuccessfulVerify) {
|
|
524664
|
+
this._writeThrashCooldownUntilTurn = turn + 8;
|
|
524665
|
+
messages2.push({
|
|
524666
|
+
role: "system",
|
|
524667
|
+
content: [
|
|
524668
|
+
`[WRITE-THRASH HALT — REG-50]`,
|
|
524669
|
+
``,
|
|
524670
|
+
`In the last ${_wtWindow.length} tool calls you have written the same file ${_wtWorstCount} times:`,
|
|
524671
|
+
` ${_wtWorstPath}`,
|
|
524672
|
+
``,
|
|
524673
|
+
`No successful test/build/typecheck command ran between writes — you are iterating the file blind, hoping the next variation works. This is a write-thrash anti-pattern. Repeated edits without verification confirm nothing.`,
|
|
524674
|
+
``,
|
|
524675
|
+
`Pick ONE of these for your next response:`,
|
|
524676
|
+
``,
|
|
524677
|
+
` (a) RUN-AND-READ: Run the EXACT command that fails (test/typecheck/build) and READ THE FULL ERROR MESSAGE LITERALLY. Do not summarize it from memory; paste it back into context. The current write hasn't been validated against any error.`,
|
|
524678
|
+
``,
|
|
524679
|
+
` (b) DELETE-AND-RESTART: If the file has been rewritten ${_wtWorstCount} times, the current approach is wrong. Either delete the file and try a fundamentally different design, OR revert to a known-good earlier version (use git, working_notes, or memory_search to find one).`,
|
|
524680
|
+
``,
|
|
524681
|
+
` (c) WEB-SEARCH: If the error is framework- or version-specific (config files often are), web_search the EXACT error string + relevant tool name. External knowledge beats blind iteration.`,
|
|
524682
|
+
``,
|
|
524683
|
+
` (d) DECLARE BLOCKED: If the file's correct shape genuinely isn't knowable from the spec + this codebase, call task_complete with a summary that names this specific file as the blocker. Don't burn more turns on it.`,
|
|
524684
|
+
``,
|
|
524685
|
+
`Do NOT in your next response: write to ${_wtWorstPath} again without first running and reading the failing command's output.`
|
|
524686
|
+
].join("\n")
|
|
524687
|
+
});
|
|
524688
|
+
this.emit({
|
|
524689
|
+
type: "status",
|
|
524690
|
+
content: `REG-50 WRITE-THRASH halt fired at turn ${turn} — file=${_wtWorstPath}, count=${_wtWorstCount}/${_wtThreshold}, no successful verify in window`,
|
|
524691
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
524692
|
+
});
|
|
524693
|
+
try {
|
|
524694
|
+
const _smaRaw50 = (process.env["OA_STUCK_META_ANALYZER"] || "off").toLowerCase();
|
|
524695
|
+
const _smaOn50 = _smaRaw50 === "on" || _smaRaw50 === "1" || _smaRaw50 === "true";
|
|
524696
|
+
if (_smaOn50) {
|
|
524697
|
+
const _smaCallable50 = async (prompt) => {
|
|
524698
|
+
try {
|
|
524699
|
+
const _r = await this.backend.chatCompletion({
|
|
524700
|
+
messages: [
|
|
524701
|
+
{ role: "system", content: "You are a META-ANALYSIS sub-agent. Audit the implementer's stuck state and emit a structured JSON directive." },
|
|
524702
|
+
{ role: "user", content: prompt }
|
|
524703
|
+
],
|
|
524704
|
+
tools: [],
|
|
524705
|
+
temperature: 0,
|
|
524706
|
+
maxTokens: parseInt(process.env["OA_STUCK_META_MAX_TOKENS"] || "2048", 10) || 2048,
|
|
524707
|
+
timeoutMs: parseInt(process.env["OA_STUCK_META_TIMEOUT_MS"] || "120000", 10) || 12e4
|
|
524708
|
+
});
|
|
524709
|
+
const _c = _r?.choices?.[0]?.message?.content;
|
|
524710
|
+
return typeof _c === "string" ? _c : "";
|
|
524711
|
+
} catch {
|
|
524712
|
+
return "";
|
|
524713
|
+
}
|
|
524714
|
+
};
|
|
524715
|
+
const _smaCalls50 = _wtWindow.map((c9) => ({
|
|
524716
|
+
name: c9.name,
|
|
524717
|
+
argsKey: c9.argsKey,
|
|
524718
|
+
success: c9.success,
|
|
524719
|
+
outputPreview: (c9.outputPreview || "").split(/\r?\n/)[0]?.slice(0, 120) ?? ""
|
|
524720
|
+
}));
|
|
524721
|
+
const _smaPlan50 = (() => {
|
|
524722
|
+
try {
|
|
524723
|
+
const _todos = this.readSessionTodos() || [];
|
|
524724
|
+
return _todos.slice(0, 12).map((t2) => ({
|
|
524725
|
+
content: t2.content || "",
|
|
524726
|
+
reconciled: t2.status || "pending",
|
|
524727
|
+
rationale: "(structural status only)"
|
|
524728
|
+
}));
|
|
524729
|
+
} catch {
|
|
524730
|
+
return [];
|
|
524731
|
+
}
|
|
524732
|
+
})();
|
|
524733
|
+
const _smaTools50 = Array.from(this.tools.keys());
|
|
524734
|
+
this._ssmaFiredCount++;
|
|
524735
|
+
runStuckAnalyzer({
|
|
524736
|
+
inputs: {
|
|
524737
|
+
goal: this._taskState.originalGoal || this._taskState.goal || "",
|
|
524738
|
+
workingDir: this._workingDirectory || process.cwd(),
|
|
524739
|
+
triggerReason: `reg50-write-thrash-${_wtWorstCount}x`,
|
|
524740
|
+
recentToolCalls: _smaCalls50,
|
|
524741
|
+
planStatus: _smaPlan50,
|
|
524742
|
+
recentFailures: [],
|
|
524743
|
+
workspaceSummary: `WRITE-THRASH: ${_wtWorstPath} written ${_wtWorstCount} times in last ${_wtWindow.length} calls without successful verification.`,
|
|
524744
|
+
availableTools: _smaTools50,
|
|
524745
|
+
turn
|
|
524746
|
+
},
|
|
524747
|
+
callable: _smaCallable50
|
|
524748
|
+
}).then((_smaResult) => {
|
|
524749
|
+
if (_smaResult.injection && !_smaResult.directive.parseFallback) {
|
|
524750
|
+
messages2.push({ role: "system", content: _smaResult.injection });
|
|
524751
|
+
this.emit({
|
|
524752
|
+
type: "status",
|
|
524753
|
+
content: `REG-50 → SSMA fired at turn ${turn} — diagnosis="${_smaResult.directive.diagnosis.slice(0, 80)}", next=${_smaResult.directive.next_action.tool}, ${_smaResult.durationMs}ms`,
|
|
524754
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
524755
|
+
});
|
|
524756
|
+
}
|
|
524757
|
+
}).catch(() => {
|
|
524758
|
+
});
|
|
524759
|
+
}
|
|
524760
|
+
} catch {
|
|
524761
|
+
}
|
|
524762
|
+
}
|
|
524763
|
+
}
|
|
524764
|
+
}
|
|
524765
|
+
try {
|
|
524766
|
+
const _pfvRaw = (process.env["OA_PFV"] || "off").toLowerCase();
|
|
524767
|
+
const _pfvOn = _pfvRaw === "on" || _pfvRaw === "1" || _pfvRaw === "true";
|
|
524768
|
+
if (_pfvOn && this._lastPfvTurn !== turn) {
|
|
524769
|
+
const _pfvInterval = parseInt(process.env["OA_PFV_INTERVAL"] || "30", 10) || 30;
|
|
524770
|
+
const _pfvChronicThreshold = parseInt(process.env["OA_PFV_CHRONIC_THRESHOLD"] || "2", 10) || 2;
|
|
524771
|
+
const _pfvPeriodic = turn > 0 && _pfvInterval > 0 && turn % _pfvInterval === 0;
|
|
524772
|
+
const _pfvChronic = this._ssmaFiredCount >= _pfvChronicThreshold;
|
|
524773
|
+
if (_pfvPeriodic || _pfvChronic) {
|
|
524774
|
+
this._lastPfvTurn = turn;
|
|
524775
|
+
const _pfvTriggerReason = _pfvChronic ? "chronic-stuck" : "periodic";
|
|
524776
|
+
const _pfvCallable = async (prompt) => {
|
|
524777
|
+
try {
|
|
524778
|
+
const _r = await this.backend.chatCompletion({
|
|
524779
|
+
messages: [
|
|
524780
|
+
{ role: "system", content: "You are a META-STRATEGY VALIDATOR. Audit the implementer's frame-vs-goal alignment and emit a structured JSON verdict." },
|
|
524781
|
+
{ role: "user", content: prompt }
|
|
524782
|
+
],
|
|
524783
|
+
tools: [],
|
|
524784
|
+
temperature: 0,
|
|
524785
|
+
maxTokens: parseInt(process.env["OA_PFV_MAX_TOKENS"] || "1500", 10) || 1500,
|
|
524786
|
+
timeoutMs: parseInt(process.env["OA_PFV_TIMEOUT_MS"] || "120000", 10) || 12e4
|
|
524787
|
+
});
|
|
524788
|
+
const _c = _r?.choices?.[0]?.message?.content;
|
|
524789
|
+
return typeof _c === "string" ? _c : "";
|
|
524790
|
+
} catch {
|
|
524791
|
+
return "";
|
|
524792
|
+
}
|
|
524793
|
+
};
|
|
524794
|
+
const _pfvTodos = (() => {
|
|
524795
|
+
try {
|
|
524796
|
+
return this.readSessionTodos() || [];
|
|
524797
|
+
} catch {
|
|
524798
|
+
return [];
|
|
524799
|
+
}
|
|
524800
|
+
})();
|
|
524801
|
+
const _pfvCurrentSubtask = _pfvTodos.find((t2) => t2.status === "in_progress")?.content || "";
|
|
524802
|
+
const _pfvPlanStatus = _pfvTodos.slice(0, 12).map((t2) => ({
|
|
524803
|
+
content: t2.content || "",
|
|
524804
|
+
status: t2.status || "pending"
|
|
524805
|
+
}));
|
|
524806
|
+
const _pfvRecentCalls = toolCallLog.slice(-30);
|
|
524807
|
+
const _pfvSummaryParts = [];
|
|
524808
|
+
_pfvSummaryParts.push(`Last ${_pfvRecentCalls.length} tool calls (most recent at bottom):`);
|
|
524809
|
+
for (const _c of _pfvRecentCalls) {
|
|
524810
|
+
const _ak = (_c.argsKey || "").slice(0, 80);
|
|
524811
|
+
const _flag = _c.success === false ? "ERR" : "ok";
|
|
524812
|
+
_pfvSummaryParts.push(` ${_c.name}(${_ak}) ${_flag}`);
|
|
524813
|
+
}
|
|
524814
|
+
const _pfvActivity = _pfvSummaryParts.join("\n");
|
|
524815
|
+
runFrameValidator({
|
|
524816
|
+
inputs: {
|
|
524817
|
+
goal: this._taskState.originalGoal || this._taskState.goal || "",
|
|
524818
|
+
workingDir: this._workingDirectory || process.cwd(),
|
|
524819
|
+
currentSubtask: _pfvCurrentSubtask,
|
|
524820
|
+
recentActivitySummary: _pfvActivity,
|
|
524821
|
+
planStatus: _pfvPlanStatus,
|
|
524822
|
+
recentStuckCount: this._ssmaFiredCount,
|
|
524823
|
+
turn,
|
|
524824
|
+
triggerReason: _pfvTriggerReason
|
|
524825
|
+
},
|
|
524826
|
+
callable: _pfvCallable
|
|
524827
|
+
}).then((_pfvResult) => {
|
|
524828
|
+
if (_pfvResult.injection && !_pfvResult.verdict.parseFallback && _pfvResult.verdict.verdict !== "continue") {
|
|
524829
|
+
messages2.push({ role: "system", content: _pfvResult.injection });
|
|
524830
|
+
this.emit({
|
|
524831
|
+
type: "status",
|
|
524832
|
+
content: `REG-51 PFV fired at turn ${turn} — verdict=${_pfvResult.verdict.verdict}, trigger=${_pfvTriggerReason}, ssmaCount=${this._ssmaFiredCount}, ${_pfvResult.durationMs}ms`,
|
|
524833
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
524834
|
+
});
|
|
524835
|
+
} else {
|
|
524836
|
+
this.emit({
|
|
524837
|
+
type: "status",
|
|
524838
|
+
content: `REG-51 PFV verdict=continue (or parse-fallback) at turn ${turn} — trigger=${_pfvTriggerReason}, ${_pfvResult.durationMs}ms`,
|
|
524839
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
524840
|
+
});
|
|
524841
|
+
}
|
|
524842
|
+
}).catch((_e) => {
|
|
524843
|
+
this.emit({
|
|
524844
|
+
type: "status",
|
|
524845
|
+
content: `REG-51 PFV threw: ${_e instanceof Error ? _e.message : String(_e)} (non-fatal)`,
|
|
524846
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
524847
|
+
});
|
|
524848
|
+
});
|
|
524028
524849
|
}
|
|
524029
524850
|
}
|
|
524851
|
+
} catch (_e) {
|
|
524030
524852
|
}
|
|
524031
524853
|
try {
|
|
524032
524854
|
const STICKY_PER_TURN_CAP = 2;
|
|
@@ -526063,7 +526885,90 @@ Call task_complete(summary="...") NOW with whatever you have.`
|
|
|
526063
526885
|
content: `Loop intervention ${loopInterventionCount}/${maxInterventions}: ${Math.round(currentRepScore * 100)}% repetitive (${topRepeated})`,
|
|
526064
526886
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
526065
526887
|
});
|
|
526888
|
+
try {
|
|
526889
|
+
const _smaRaw = (process.env["OA_STUCK_META_ANALYZER"] || "off").toLowerCase();
|
|
526890
|
+
const _smaOn = _smaRaw === "on" || _smaRaw === "1" || _smaRaw === "true";
|
|
526891
|
+
if (_smaOn && loopInterventionCount >= 2 && !this._smaFiredThisLoop) {
|
|
526892
|
+
this._smaFiredThisLoop = true;
|
|
526893
|
+
const _smaCallable = async (prompt) => {
|
|
526894
|
+
try {
|
|
526895
|
+
const _r = await this.backend.chatCompletion({
|
|
526896
|
+
messages: [
|
|
526897
|
+
{ role: "system", content: "You are a META-ANALYSIS sub-agent. Audit the implementer's stuck state and emit a structured JSON directive." },
|
|
526898
|
+
{ role: "user", content: prompt }
|
|
526899
|
+
],
|
|
526900
|
+
tools: [],
|
|
526901
|
+
temperature: 0,
|
|
526902
|
+
maxTokens: parseInt(process.env["OA_STUCK_META_MAX_TOKENS"] || "2048", 10) || 2048,
|
|
526903
|
+
timeoutMs: parseInt(process.env["OA_STUCK_META_TIMEOUT_MS"] || "120000", 10) || 12e4
|
|
526904
|
+
});
|
|
526905
|
+
const _c = _r?.choices?.[0]?.message?.content;
|
|
526906
|
+
return typeof _c === "string" ? _c : "";
|
|
526907
|
+
} catch {
|
|
526908
|
+
return "";
|
|
526909
|
+
}
|
|
526910
|
+
};
|
|
526911
|
+
const _smaCalls = toolCallLog.slice(-25).map((c9) => ({
|
|
526912
|
+
name: c9.name,
|
|
526913
|
+
argsKey: c9.argsKey,
|
|
526914
|
+
success: c9.success,
|
|
526915
|
+
outputPreview: (c9.outputPreview || "").split(/\r?\n/)[0]?.slice(0, 120) ?? ""
|
|
526916
|
+
}));
|
|
526917
|
+
const _smaPlan = (() => {
|
|
526918
|
+
try {
|
|
526919
|
+
const _todos = this.readSessionTodos() || [];
|
|
526920
|
+
return _todos.slice(0, 12).map((t2) => ({
|
|
526921
|
+
content: t2.content || "",
|
|
526922
|
+
reconciled: t2.status || "pending",
|
|
526923
|
+
rationale: "(structural status only)"
|
|
526924
|
+
}));
|
|
526925
|
+
} catch {
|
|
526926
|
+
return [];
|
|
526927
|
+
}
|
|
526928
|
+
})();
|
|
526929
|
+
const _smaFailures = Array.from(this._failureReflections.entries()).map(([stem, entry]) => ({ stem, attempts: entry.attempts, preview: (entry.wentWrong || "").slice(0, 200) })).sort((a2, b) => b.attempts - a2.attempts).slice(0, 5);
|
|
526930
|
+
const _smaTools = Array.from(this.tools.keys());
|
|
526931
|
+
this._ssmaFiredCount++;
|
|
526932
|
+
runStuckAnalyzer({
|
|
526933
|
+
inputs: {
|
|
526934
|
+
goal: this._taskState.originalGoal || this._taskState.goal || "",
|
|
526935
|
+
workingDir: this._workingDirectory || process.cwd(),
|
|
526936
|
+
triggerReason: `loop-intervention-${loopInterventionCount}/${maxInterventions}`,
|
|
526937
|
+
recentToolCalls: _smaCalls,
|
|
526938
|
+
planStatus: _smaPlan,
|
|
526939
|
+
recentFailures: _smaFailures,
|
|
526940
|
+
workspaceSummary: void 0,
|
|
526941
|
+
availableTools: _smaTools,
|
|
526942
|
+
turn
|
|
526943
|
+
},
|
|
526944
|
+
callable: _smaCallable
|
|
526945
|
+
}).then((_smaResult) => {
|
|
526946
|
+
if (_smaResult.injection && !_smaResult.directive.parseFallback) {
|
|
526947
|
+
messages2.push({ role: "system", content: _smaResult.injection });
|
|
526948
|
+
this.emit({
|
|
526949
|
+
type: "status",
|
|
526950
|
+
content: `REG-49 stuck-meta-analyzer fired (loop-intervention path) at turn ${turn} — diagnosis="${_smaResult.directive.diagnosis.slice(0, 80)}", next=${_smaResult.directive.next_action.tool}, ${_smaResult.durationMs}ms`,
|
|
526951
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
526952
|
+
});
|
|
526953
|
+
} else {
|
|
526954
|
+
this.emit({
|
|
526955
|
+
type: "status",
|
|
526956
|
+
content: `REG-49 stuck-meta-analyzer parse failed (loop-intervention path) at turn ${turn} — falling back to existing intervention`,
|
|
526957
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
526958
|
+
});
|
|
526959
|
+
}
|
|
526960
|
+
}).catch((_e) => {
|
|
526961
|
+
this.emit({
|
|
526962
|
+
type: "status",
|
|
526963
|
+
content: `REG-49 stuck-meta-analyzer (loop-intervention path) threw: ${_e instanceof Error ? _e.message : String(_e)} (non-fatal)`,
|
|
526964
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
526965
|
+
});
|
|
526966
|
+
});
|
|
526967
|
+
}
|
|
526968
|
+
} catch (_e) {
|
|
526969
|
+
}
|
|
526066
526970
|
} else if (currentRepScore <= 0.2) {
|
|
526971
|
+
this._smaFiredThisLoop = false;
|
|
526067
526972
|
}
|
|
526068
526973
|
} else {
|
|
526069
526974
|
const content = msg.content || "";
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "open-agents-ai",
|
|
3
|
-
"version": "0.187.
|
|
3
|
+
"version": "0.187.500",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "open-agents-ai",
|
|
9
|
-
"version": "0.187.
|
|
9
|
+
"version": "0.187.500",
|
|
10
10
|
"hasInstallScript": true,
|
|
11
11
|
"license": "CC-BY-NC-4.0",
|
|
12
12
|
"dependencies": {
|
package/package.json
CHANGED