open-agents-ai 0.187.478 → 0.187.480
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +171 -22
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
- package/prompts/agentic/system-large.md +7 -7
- package/prompts/agentic/system-medium.md +8 -13
package/dist/index.js
CHANGED
|
@@ -512194,17 +512194,18 @@ function buildStagnationDiagnostic(signals) {
|
|
|
512194
512194
|
``,
|
|
512195
512195
|
`2. STATE A HYPOTHESIS in writing — what specifically is wrong? "I think X is failing because Y." Be concrete. Do NOT propose a fix yet.`,
|
|
512196
512196
|
``,
|
|
512197
|
-
`3. VERIFY ONE ASSUMPTION — pick the ONE thing you most BELIEVE to be true and test it with the smallest possible command:`,
|
|
512198
|
-
` •
|
|
512199
|
-
` •
|
|
512200
|
-
` •
|
|
512201
|
-
` •
|
|
512197
|
+
`3. VERIFY ONE ASSUMPTION — pick the ONE thing you most BELIEVE to be true and test it with the smallest possible command native to whatever ecosystem you're in. Examples of the *shape* (not the exact commands):`,
|
|
512198
|
+
` • Is this artifact present on disk? (one read of the path)`,
|
|
512199
|
+
` • Does this import / reference resolve? (read 5 lines around it)`,
|
|
512200
|
+
` • Is this environment value set? (one query)`,
|
|
512201
|
+
` • Is this binary on PATH? (one which/where)`,
|
|
512202
|
+
` • Don't know what an error means? web_search("<exact error string>")`,
|
|
512202
512203
|
``,
|
|
512203
|
-
`4. CHECK SILENT FAILURES —
|
|
512204
|
+
`4. CHECK SILENT FAILURES — package managers and build systems frequently report "success" while silently dropping artifacts you needed. Don't trust summary output ("added N", "build complete") without verifying the SPECIFIC artifact exists.`,
|
|
512204
512205
|
``,
|
|
512205
512206
|
`DO NOT in your next response:`,
|
|
512206
512207
|
` • Try another version, flag, or variant of any command in the list above`,
|
|
512207
|
-
` • Wipe
|
|
512208
|
+
` • Wipe caches / re-install / re-build — that hides the original error`,
|
|
512208
512209
|
` • Call task_complete — being stuck on a debug problem is NEVER grounds for task_complete`,
|
|
512209
512210
|
``,
|
|
512210
512211
|
`task_complete is ONLY for actual completion or unrecoverable hardware/permission errors. You are stuck on a fixable problem; diagnose it.`
|
|
@@ -512228,6 +512229,87 @@ var init_critic = __esm({
|
|
|
512228
512229
|
}
|
|
512229
512230
|
});
|
|
512230
512231
|
|
|
512232
|
+
// packages/orchestrator/dist/reflection.js
|
|
512233
|
+
function categorizeError(errorText) {
|
|
512234
|
+
if (!errorText)
|
|
512235
|
+
return "unknown";
|
|
512236
|
+
for (const { category, re } of CATEGORY_PATTERNS) {
|
|
512237
|
+
if (re.test(errorText))
|
|
512238
|
+
return category;
|
|
512239
|
+
}
|
|
512240
|
+
return "unknown";
|
|
512241
|
+
}
|
|
512242
|
+
function buildStem(toolName, args) {
|
|
512243
|
+
if (!args || Object.keys(args).length === 0)
|
|
512244
|
+
return toolName;
|
|
512245
|
+
const entries = Object.entries(args).sort(([a2], [b]) => a2.localeCompare(b));
|
|
512246
|
+
const first2 = entries[0];
|
|
512247
|
+
const v = typeof first2[1] === "string" ? first2[1] : JSON.stringify(first2[1]);
|
|
512248
|
+
return `${toolName}:${first2[0]}=${v.slice(0, 60)}`;
|
|
512249
|
+
}
|
|
512250
|
+
function firstSignalLine(errorText) {
|
|
512251
|
+
if (!errorText)
|
|
512252
|
+
return "";
|
|
512253
|
+
const lines = errorText.split(/\r?\n/);
|
|
512254
|
+
for (const raw of lines) {
|
|
512255
|
+
const line = raw.trim();
|
|
512256
|
+
if (!line)
|
|
512257
|
+
continue;
|
|
512258
|
+
if (line === "Error:" || line === "error:")
|
|
512259
|
+
continue;
|
|
512260
|
+
return line.slice(0, 200);
|
|
512261
|
+
}
|
|
512262
|
+
return errorText.slice(0, 200);
|
|
512263
|
+
}
|
|
512264
|
+
function synthesizeReflection(input) {
|
|
512265
|
+
const category = categorizeError(input.errorText);
|
|
512266
|
+
const stem = buildStem(input.toolName, input.args);
|
|
512267
|
+
const argPreview = JSON.stringify(input.args ?? {}).slice(0, 120);
|
|
512268
|
+
return {
|
|
512269
|
+
stem,
|
|
512270
|
+
attempted: `${input.toolName}(${argPreview})`,
|
|
512271
|
+
wentWrong: firstSignalLine(input.errorText),
|
|
512272
|
+
hypothesis: HYPOTHESES[category],
|
|
512273
|
+
turn: input.turn,
|
|
512274
|
+
attempts: (input.priorAttempts ?? 0) + 1
|
|
512275
|
+
};
|
|
512276
|
+
}
|
|
512277
|
+
function renderReflectionMessage(r2) {
|
|
512278
|
+
return [
|
|
512279
|
+
`[REFLECTION — your last attempt of \`${r2.attempted}\` failed (turn ${r2.turn}, ${r2.attempts} attempt${r2.attempts === 1 ? "" : "s"} so far).`,
|
|
512280
|
+
`Last error: "${r2.wentWrong}"`,
|
|
512281
|
+
`Hypothesis: ${r2.hypothesis}`,
|
|
512282
|
+
`VERIFY this hypothesis with a single small command BEFORE retrying the same tool. If you retry without verifying, you will likely fail the same way.]`
|
|
512283
|
+
].join("\n");
|
|
512284
|
+
}
|
|
512285
|
+
var CATEGORY_PATTERNS, HYPOTHESES;
|
|
512286
|
+
var init_reflection = __esm({
|
|
512287
|
+
"packages/orchestrator/dist/reflection.js"() {
|
|
512288
|
+
"use strict";
|
|
512289
|
+
CATEGORY_PATTERNS = [
|
|
512290
|
+
{ category: "permission_denied", re: /\b(permission denied|eacces|access denied|operation not permitted|forbidden)\b/i },
|
|
512291
|
+
{ category: "type_or_reference_error", re: /\b(type error|cannot find module|cannot find name|is not (a function|defined|assignable)|undefined reference|unresolved (import|reference)|missing required)\b/i },
|
|
512292
|
+
{ category: "connection_refused", re: /\b(connection refused|econnrefused|connection reset|econnreset|host unreachable|getaddrinfo|enotfound)\b/i },
|
|
512293
|
+
{ category: "timeout", re: /\b(timeout|timed out|etimedout|deadline exceeded)\b/i },
|
|
512294
|
+
{ category: "syntax_error", re: /\b(syntax error|parse error|unexpected token|unexpected end of (input|json)|malformed)\b/i },
|
|
512295
|
+
{ category: "not_found", re: /\b(not found|enoent|no such file|cannot find|does not exist|404)\b/i },
|
|
512296
|
+
// Use [1-9]\d* so multi-digit non-zero codes (e.g. "return code 127") match —
|
|
512297
|
+
// the prior [^0] only matched a single character and failed on multi-digit.
|
|
512298
|
+
{ category: "nonzero_exit", re: /\b(exit code [1-9]\d*|exit status [1-9]\d*|command failed|exit code: ?[1-9]\d*|return code [1-9]\d*)\b/i }
|
|
512299
|
+
];
|
|
512300
|
+
HYPOTHESES = {
|
|
512301
|
+
permission_denied: "permissions issue — check ownership and mode of the target; you may need to operate on a writeable location",
|
|
512302
|
+
not_found: "the named resource doesn't exist at the expected location — verify the path/name with a single-line list before retrying",
|
|
512303
|
+
connection_refused: "remote service is unreachable — verify it's running and reachable before retrying with the same address",
|
|
512304
|
+
timeout: "operation took too long — reduce scope (smaller batch, fewer items) or verify the service is healthy",
|
|
512305
|
+
syntax_error: "malformed input — re-read the surrounding context; the input you produced doesn't match what the consumer expects",
|
|
512306
|
+
type_or_reference_error: "a name, type, or import doesn't resolve — verify the reference matches what's defined; do not guess at the symbol",
|
|
512307
|
+
nonzero_exit: "the command exited with a failure code — read the FULL error output and verify args + prerequisites before retrying",
|
|
512308
|
+
unknown: "re-read the full error message and identify the most likely cause; verify your assumption with a single small command before retrying"
|
|
512309
|
+
};
|
|
512310
|
+
}
|
|
512311
|
+
});
|
|
512312
|
+
|
|
512231
512313
|
// packages/orchestrator/dist/pressure-gate.js
|
|
512232
512314
|
function detectPressure(message2) {
|
|
512233
512315
|
const hasProfanity = PRESSURE_SIGNALS.test(message2);
|
|
@@ -518337,10 +518419,8 @@ function detectTaskMode(task) {
|
|
|
518337
518419
|
return true;
|
|
518338
518420
|
if (/(\/[\w.-]+){2,}/.test(task.slice(0, 2e3)))
|
|
518339
518421
|
return true;
|
|
518340
|
-
if (/\b(implement|build|create|refactor|
|
|
518341
|
-
|
|
518342
|
-
return true;
|
|
518343
|
-
}
|
|
518422
|
+
if (/\b(implement|build|create|refactor|rewrite|fix|migrate|deploy|generate|setup|set up|develop|design|integrate|configure|install|debug|port|extend|add)\b/.test(head)) {
|
|
518423
|
+
return true;
|
|
518344
518424
|
}
|
|
518345
518425
|
return false;
|
|
518346
518426
|
}
|
|
@@ -518461,6 +518541,7 @@ var init_agenticRunner = __esm({
|
|
|
518461
518541
|
init_personality();
|
|
518462
518542
|
init_promptLoader();
|
|
518463
518543
|
init_critic();
|
|
518544
|
+
init_reflection();
|
|
518464
518545
|
init_pressure_gate();
|
|
518465
518546
|
init_dist5();
|
|
518466
518547
|
init_dist7();
|
|
@@ -518587,6 +518668,14 @@ var init_agenticRunner = __esm({
|
|
|
518587
518668
|
_errorPatterns = /* @__PURE__ */ new Map();
|
|
518588
518669
|
_errorGuidanceInjected = /* @__PURE__ */ new Set();
|
|
518589
518670
|
// prevent duplicate injection per turn
|
|
518671
|
+
// REG-26 (Patch C): Reflexion-style structured failure memory. Indexed by
|
|
518672
|
+
// fingerprint stem (tool + first arg, truncated). When the agent retries a
|
|
518673
|
+
// tool with a stem matching a stored reflection, surface "what was tried,
|
|
518674
|
+
// what went wrong, hypothesis to verify" as a system message before the
|
|
518675
|
+
// dispatch — generic across all stacks. See packages/orchestrator/src/reflection.ts.
|
|
518676
|
+
_failureReflections = /* @__PURE__ */ new Map();
|
|
518677
|
+
_reflectionsInjectedThisTurn = /* @__PURE__ */ new Set();
|
|
518678
|
+
// prevent duplicate inject per turn
|
|
518590
518679
|
// ── WO-AM-01/04/10: Associative memory stores ──
|
|
518591
518680
|
// Episode store: every tool call → persistent episode with importance + decay
|
|
518592
518681
|
// Temporal KG: entities + relations with temporal validity (valid_from/valid_until)
|
|
@@ -520730,6 +520819,7 @@ TASK: ${task}` : task;
|
|
|
520730
520819
|
break;
|
|
520731
520820
|
}
|
|
520732
520821
|
injectionsThisTurn = 0;
|
|
520822
|
+
this._reflectionsInjectedThisTurn.clear();
|
|
520733
520823
|
while (deferredSoftInjections.length > 0 && injectionsThisTurn < INJECTION_BUDGET_SOFT) {
|
|
520734
520824
|
const next = deferredSoftInjections.shift();
|
|
520735
520825
|
messages2.push({ role: next.role, content: next.content });
|
|
@@ -521599,6 +521689,16 @@ ${memoryLines.join("\n")}`
|
|
|
521599
521689
|
if (observerRedundantBlock) {
|
|
521600
521690
|
this._littlemanRedundantBlocks.delete(toolFingerprint);
|
|
521601
521691
|
}
|
|
521692
|
+
{
|
|
521693
|
+
const _reflStem = buildStem(tc.name, tc.arguments ?? {});
|
|
521694
|
+
if (!this._reflectionsInjectedThisTurn.has(_reflStem)) {
|
|
521695
|
+
const _reflEntry = this._failureReflections.get(_reflStem);
|
|
521696
|
+
if (_reflEntry) {
|
|
521697
|
+
this._reflectionsInjectedThisTurn.add(_reflStem);
|
|
521698
|
+
pushSoftInjection("system", renderReflectionMessage(_reflEntry));
|
|
521699
|
+
}
|
|
521700
|
+
}
|
|
521701
|
+
}
|
|
521602
521702
|
const criticDecision = evaluate({
|
|
521603
521703
|
proposedCall: { tool: tc.name, args: tc.arguments ?? {} },
|
|
521604
521704
|
fingerprint: toolFingerprint,
|
|
@@ -521625,6 +521725,11 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
521625
521725
|
}
|
|
521626
521726
|
if (criticDecision.decision === "force_progress_block") {
|
|
521627
521727
|
dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
|
|
521728
|
+
const _existingFp = recentToolResults.get(toolFingerprint);
|
|
521729
|
+
if (_existingFp !== void 0) {
|
|
521730
|
+
recentToolResults.delete(toolFingerprint);
|
|
521731
|
+
recentToolResults.set(toolFingerprint, _existingFp);
|
|
521732
|
+
}
|
|
521628
521733
|
this.emit({ type: "tool_call", toolName: tc.name, toolArgs: tc.arguments, turn, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
521629
521734
|
this.emit({
|
|
521630
521735
|
type: "tool_result",
|
|
@@ -521638,6 +521743,11 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
521638
521743
|
}
|
|
521639
521744
|
if (criticDecision.decision === "serve_cached") {
|
|
521640
521745
|
dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
|
|
521746
|
+
const _existingFp = recentToolResults.get(toolFingerprint);
|
|
521747
|
+
if (_existingFp !== void 0) {
|
|
521748
|
+
recentToolResults.delete(toolFingerprint);
|
|
521749
|
+
recentToolResults.set(toolFingerprint, _existingFp);
|
|
521750
|
+
}
|
|
521641
521751
|
this.emit({
|
|
521642
521752
|
type: "tool_call",
|
|
521643
521753
|
toolName: tc.name,
|
|
@@ -522063,6 +522173,8 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
522063
522173
|
}
|
|
522064
522174
|
if (result.success) {
|
|
522065
522175
|
this._recentFailures = this._recentFailures.filter((f2) => f2.fingerprint !== toolFingerprint);
|
|
522176
|
+
const _stem = buildStem(tc.name, tc.arguments ?? {});
|
|
522177
|
+
this._failureReflections.delete(_stem);
|
|
522066
522178
|
}
|
|
522067
522179
|
if (!result.success) {
|
|
522068
522180
|
this._recentFailures.push({
|
|
@@ -522076,6 +522188,22 @@ ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confi
|
|
|
522076
522188
|
if (this._recentFailures.length > 8) {
|
|
522077
522189
|
this._recentFailures = this._recentFailures.slice(-8);
|
|
522078
522190
|
}
|
|
522191
|
+
const _refStem = buildStem(tc.name, tc.arguments ?? {});
|
|
522192
|
+
const _prior = this._failureReflections.get(_refStem);
|
|
522193
|
+
const _refErr = (result.error ?? result.output ?? "").toString();
|
|
522194
|
+
const _entry = synthesizeReflection({
|
|
522195
|
+
toolName: tc.name,
|
|
522196
|
+
args: tc.arguments ?? {},
|
|
522197
|
+
errorText: _refErr,
|
|
522198
|
+
turn,
|
|
522199
|
+
priorAttempts: _prior?.attempts ?? 0
|
|
522200
|
+
});
|
|
522201
|
+
this._failureReflections.set(_refStem, _entry);
|
|
522202
|
+
if (this._failureReflections.size > 32) {
|
|
522203
|
+
const oldestKey = this._failureReflections.keys().next().value;
|
|
522204
|
+
if (oldestKey !== void 0)
|
|
522205
|
+
this._failureReflections.delete(oldestKey);
|
|
522206
|
+
}
|
|
522079
522207
|
}
|
|
522080
522208
|
if (!result.success && tc.name === "shell" && /\[PERMISSION_ERROR\]/.test(result.error ?? "")) {
|
|
522081
522209
|
this.emit({
|
|
@@ -522341,9 +522469,35 @@ ${sr.result.output}`;
|
|
|
522341
522469
|
for (const batch2 of batches) {
|
|
522342
522470
|
if (this.aborted)
|
|
522343
522471
|
break;
|
|
522472
|
+
const batchFingerprintFirstId = /* @__PURE__ */ new Map();
|
|
522473
|
+
const batchInFlight = /* @__PURE__ */ new Map();
|
|
522474
|
+
const buildBatchFp = (call) => {
|
|
522475
|
+
const args = call.args ?? {};
|
|
522476
|
+
const argsKey = Object.entries(args).sort(([a2], [b]) => a2.localeCompare(b)).map(([k, v]) => `${k}=${typeof v === "string" ? v.slice(0, 160) : JSON.stringify(v).slice(0, 160)}`).join(",");
|
|
522477
|
+
return `${call.name}:${argsKey}`;
|
|
522478
|
+
};
|
|
522479
|
+
for (const call of batch2.calls) {
|
|
522480
|
+
const fp = buildBatchFp(call);
|
|
522481
|
+
if (!batchFingerprintFirstId.has(fp)) {
|
|
522482
|
+
batchFingerprintFirstId.set(fp, call.id);
|
|
522483
|
+
}
|
|
522484
|
+
}
|
|
522344
522485
|
const results = await executeBatch(batch2, async (call) => {
|
|
522345
522486
|
const originalTc = rawToolCalls.find((tc) => tc.id === call.id);
|
|
522346
|
-
|
|
522487
|
+
const fp = buildBatchFp(call);
|
|
522488
|
+
const firstId = batchFingerprintFirstId.get(fp);
|
|
522489
|
+
if (firstId !== void 0 && call.id !== void 0 && firstId !== call.id) {
|
|
522490
|
+
const inflight = batchInFlight.get(fp);
|
|
522491
|
+
if (inflight) {
|
|
522492
|
+
const cloned = await inflight;
|
|
522493
|
+
if (!cloned)
|
|
522494
|
+
return null;
|
|
522495
|
+
return { tc: { ...cloned.tc, id: call.id }, output: cloned.output };
|
|
522496
|
+
}
|
|
522497
|
+
}
|
|
522498
|
+
const promise = executeSingle(originalTc);
|
|
522499
|
+
batchInFlight.set(fp, promise);
|
|
522500
|
+
return promise;
|
|
522347
522501
|
}, 5);
|
|
522348
522502
|
for (const r2 of results) {
|
|
522349
522503
|
if (r2) {
|
|
@@ -525000,7 +525154,11 @@ ${transcript}`
|
|
|
525000
525154
|
/\buse\s+(\w+)/g,
|
|
525001
525155
|
/\bcall\s+(\w+)/g,
|
|
525002
525156
|
/`([a-z_]+)`/g,
|
|
525003
|
-
/\btool[:\s]+(\w+)/g
|
|
525157
|
+
/\btool[:\s]+(\w+)/g,
|
|
525158
|
+
// Function-call syntax: `name(args)` is the strongest call-site signal
|
|
525159
|
+
// a name can have. A bare identifier mention isn't enough — that catches
|
|
525160
|
+
// filesystem nouns like `node_modules` or `package_lock`.
|
|
525161
|
+
/\b([a-z][a-z0-9_]*[a-z0-9])\s*\(/g
|
|
525004
525162
|
];
|
|
525005
525163
|
const contextualNames = /* @__PURE__ */ new Set();
|
|
525006
525164
|
for (const pat of TOOL_CONTEXT_PATTERNS) {
|
|
@@ -525011,14 +525169,6 @@ ${transcript}`
|
|
|
525011
525169
|
contextualNames.add(name10);
|
|
525012
525170
|
}
|
|
525013
525171
|
}
|
|
525014
|
-
const nameCounts = /* @__PURE__ */ new Map();
|
|
525015
|
-
for (const name10 of referenced) {
|
|
525016
|
-
const escaped = name10.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
525017
|
-
const occurrences = (systemPrompt.match(new RegExp(`\\b${escaped}\\b`, "g")) ?? []).length;
|
|
525018
|
-
nameCounts.set(name10, occurrences);
|
|
525019
|
-
if (occurrences >= 2)
|
|
525020
|
-
contextualNames.add(name10);
|
|
525021
|
-
}
|
|
525022
525172
|
const IGNORE_LIST = /* @__PURE__ */ new Set([
|
|
525023
525173
|
"tool_use",
|
|
525024
525174
|
"tool_call",
|
|
@@ -525063,7 +525213,6 @@ ${transcript}`
|
|
|
525063
525213
|
// reserved status for partial-done todos
|
|
525064
525214
|
"not_started",
|
|
525065
525215
|
// alternative status phrasing
|
|
525066
|
-
// Shell/bash idioms that look like snake_case
|
|
525067
525216
|
"ctrl_c",
|
|
525068
525217
|
"ctrl_d"
|
|
525069
525218
|
]);
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "open-agents-ai",
|
|
3
|
-
"version": "0.187.
|
|
3
|
+
"version": "0.187.480",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "open-agents-ai",
|
|
9
|
-
"version": "0.187.
|
|
9
|
+
"version": "0.187.480",
|
|
10
10
|
"hasInstallScript": true,
|
|
11
11
|
"license": "CC-BY-NC-4.0",
|
|
12
12
|
"dependencies": {
|
package/package.json
CHANGED
|
@@ -169,17 +169,17 @@ If you have tried 2+ approaches to the same blocker and both failed, **STOP atte
|
|
|
169
169
|
|
|
170
170
|
**The diagnostic loop (one cycle per turn, NOT batched):**
|
|
171
171
|
|
|
172
|
-
1. **READ THE FULL ERROR** — re-read the most recent failure output ENTIRELY.
|
|
173
|
-
2. **VERIFY ONE ASSUMPTION** — pick ONE thing you BELIEVE to be true and test it with the smallest possible command
|
|
174
|
-
3. **STATE A HYPOTHESIS in writing** before your next action. Then design ONE experiment that
|
|
172
|
+
1. **READ THE FULL ERROR** — re-read the most recent failure output ENTIRELY. If it's in a log packet, query `op="errors"` then `op="lines"` for context.
|
|
173
|
+
2. **VERIFY ONE ASSUMPTION** — pick ONE thing you BELIEVE to be true and test it with the smallest possible command native to your ecosystem. Examples of the shape: "is this artifact present?", "does this import resolve?", "is this env var set?". One read, one fact verified.
|
|
174
|
+
3. **STATE A HYPOTHESIS in writing** before your next action. Then design ONE experiment that CONFIRMS or REFUTES it — verify, do NOT fix yet.
|
|
175
175
|
4. **WEB SEARCH the exact error message** if you don't know what it means. A 30-second lookup beats 10 retry attempts.
|
|
176
|
-
5. **CHECK THE OBVIOUS** —
|
|
176
|
+
5. **CHECK THE OBVIOUS** — package managers and build systems frequently report "success" while silently dropping artifacts. Don't trust summary output ("added N", "build complete") without verifying the SPECIFIC artifact you needed actually exists.
|
|
177
177
|
6. Only AFTER root cause is verified, attempt ONE fix targeting that cause. If the fix fails, return to step 1 with the new error.
|
|
178
178
|
|
|
179
179
|
**What diagnostic mode is NOT:**
|
|
180
|
-
- Trying another version
|
|
181
|
-
- Adding
|
|
182
|
-
- Wiping
|
|
180
|
+
- Trying another version of the same dependency after one failed — variant-fatigue, not diagnosis.
|
|
181
|
+
- Adding force/override flags that suppress warnings — masks root causes.
|
|
182
|
+
- Wiping caches/dependencies and reinstalling — hides the original error.
|
|
183
183
|
- Calling task_complete to escape — task_complete is NEVER the answer to a stuck debugging session.
|
|
184
184
|
- Use grep_search and find_files for efficient exploration (don't dump entire directories)
|
|
185
185
|
- Use file_edit for small changes instead of rewriting entire files
|
|
@@ -102,27 +102,22 @@ If you have tried 2+ approaches to the same blocker and both failed, **STOP atte
|
|
|
102
102
|
|
|
103
103
|
**The diagnostic loop (one cycle per turn, NOT batched):**
|
|
104
104
|
|
|
105
|
-
1. **READ THE FULL ERROR** — re-read the most recent failure output ENTIRELY. Don't skim the first 200 chars. If the output is in a log packet,
|
|
105
|
+
1. **READ THE FULL ERROR** — re-read the most recent failure output ENTIRELY. Don't skim the first 200 chars. If the output is in a log packet, query it with `op="errors"` then `op="lines"` for surrounding context.
|
|
106
106
|
|
|
107
|
-
2. **VERIFY ONE ASSUMPTION** — pick ONE thing you BELIEVE to be true and test it with the smallest possible command:
|
|
108
|
-
- "I think tailwindcss is installed" → `ls node_modules/tailwindcss/package.json` (one line)
|
|
109
|
-
- "I think the import path is right" → `cat src/lib/x.ts | head -5`
|
|
110
|
-
- "I think the env var is set" → `printenv VAR_NAME`
|
|
107
|
+
2. **VERIFY ONE ASSUMPTION** — pick ONE thing you BELIEVE to be true and test it with the smallest possible command native to whatever ecosystem you're in. Examples of the *shape* (not the exact commands): "is this artifact present on disk?", "does this import resolve?", "is this environment variable set?", "does this binary exist on PATH?". One read, one fact verified.
|
|
111
108
|
|
|
112
|
-
3. **STATE A HYPOTHESIS in writing** before your next action
|
|
113
|
-
- "Hypothesis: tailwindcss didn't install because @tailwindcss/postcss has a peer-dep conflict with autoprefixer."
|
|
114
|
-
- Then design ONE experiment that would CONFIRM or REFUTE it (not fix it — verify it first).
|
|
109
|
+
3. **STATE A HYPOTHESIS in writing** before your next action — "I think X is failing because Y." Be concrete. Then design ONE experiment that would CONFIRM or REFUTE it (verify it first; do NOT fix yet).
|
|
115
110
|
|
|
116
|
-
4. **WEB SEARCH the exact error message** if you don't know what it means.
|
|
111
|
+
4. **WEB SEARCH the exact error message** if you don't know what it means. Quote the exact error string. A 30-second lookup beats 10 retry attempts.
|
|
117
112
|
|
|
118
|
-
5. **CHECK THE OBVIOUS** —
|
|
113
|
+
5. **CHECK THE OBVIOUS** — package managers and build systems frequently report "success" while silently dropping artifacts. Don't trust a summary like "added N packages" or "build complete" without verifying the SPECIFIC artifact you needed actually exists. Check each expected output explicitly.
|
|
119
114
|
|
|
120
115
|
6. Only AFTER root cause is verified, attempt ONE fix targeting that cause. If the fix fails, return to step 1 with the new error.
|
|
121
116
|
|
|
122
117
|
**What diagnostic mode is NOT:**
|
|
123
|
-
- Trying
|
|
124
|
-
- Adding
|
|
125
|
-
- Wiping
|
|
118
|
+
- Trying a different version of the same dependency after one failed — that's variant-fatigue, not diagnosis.
|
|
119
|
+
- Adding force/override flags that suppress warnings — those mask root causes, they don't reveal them.
|
|
120
|
+
- Wiping caches/dependencies and reinstalling — that hides the original error.
|
|
126
121
|
- Calling task_complete to escape — task_complete is NEVER the answer to a stuck debugging session.
|
|
127
122
|
- Do NOT output long explanations. Focus on tool calls.
|
|
128
123
|
- If file_read/list_directory returns ENOENT, use list_directory on the project root — do NOT guess parent paths
|