@runtypelabs/sdk 1.7.1 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/endpoints.js CHANGED
@@ -1170,9 +1170,21 @@ class AgentsEndpoint {
1170
1170
  throw new Error(error.error || `HTTP ${response.status}`);
1171
1171
  }
1172
1172
  let completeEvent = null;
1173
+ // Accumulate turn delta text so finalOutput is non-empty even when the
1174
+ // agent_complete event carries no output (e.g. model ended after tool calls)
1175
+ let accumulatedOutput = '';
1173
1176
  await processAgentStream(response.body, {
1174
1177
  ...callbacks,
1178
+ onTurnDelta: (event) => {
1179
+ if (event.contentType === 'text') {
1180
+ accumulatedOutput += event.delta;
1181
+ }
1182
+ callbacks.onTurnDelta?.(event);
1183
+ },
1175
1184
  onAgentComplete: (event) => {
1185
+ if (!event.finalOutput && accumulatedOutput) {
1186
+ event.finalOutput = accumulatedOutput;
1187
+ }
1176
1188
  completeEvent = event;
1177
1189
  callbacks.onAgentComplete?.(event);
1178
1190
  },
@@ -1198,7 +1210,7 @@ class AgentsEndpoint {
1198
1210
  * })
1199
1211
  * ```
1200
1212
  */
1201
- async executeWithLocalTools(id, data, localTools, callbacks) {
1213
+ async executeWithLocalTools(id, data, localTools, callbacks, options) {
1202
1214
  // Build runtime tool definitions from local tool schemas and inject into request
1203
1215
  const runtimeTools = Object.entries(localTools).map(([name, def]) => ({
1204
1216
  name,
@@ -1219,16 +1231,36 @@ class AgentsEndpoint {
1219
1231
  throw new Error(error.error || `HTTP ${response.status}`);
1220
1232
  }
1221
1233
  let currentBody = response.body;
1234
+ // Accumulate text output across all streams (initial + resume cycles)
1235
+ // so finalOutput is non-empty even when the last resume stream has no text
1236
+ let accumulatedOutput = '';
1237
+ let pauseCount = 0;
1238
+ let discoveryPauseCount = 0;
1239
+ let consecutiveDiscoveryPauseCount = 0;
1240
+ const toolNameCounts = {};
1241
+ let recentActionKeys = [];
1222
1242
  while (true) {
1223
1243
  let pausedEvent = null;
1224
1244
  let completeEvent = null;
1225
1245
  await processAgentStream(currentBody, {
1226
1246
  ...callbacks,
1247
+ onTurnDelta: (event) => {
1248
+ if (event.contentType === 'text') {
1249
+ accumulatedOutput += event.delta;
1250
+ }
1251
+ callbacks?.onTurnDelta?.(event);
1252
+ },
1227
1253
  onAgentPaused: (event) => {
1228
1254
  pausedEvent = event;
1229
1255
  callbacks?.onAgentPaused?.(event);
1230
1256
  },
1231
1257
  onAgentComplete: (event) => {
1258
+ // Supplement finalOutput with accumulated turn deltas when the
1259
+ // agent_complete event itself carries no output (common when the
1260
+ // model's last action was a tool call rather than text output)
1261
+ if (!event.finalOutput && accumulatedOutput) {
1262
+ event.finalOutput = accumulatedOutput;
1263
+ }
1232
1264
  completeEvent = event;
1233
1265
  callbacks?.onAgentComplete?.(event);
1234
1266
  },
@@ -1273,6 +1305,41 @@ class AgentsEndpoint {
1273
1305
  // Return the error as a tool result so the agent can recover
1274
1306
  toolResult = `Error: ${err instanceof Error ? err.message : String(err)}`;
1275
1307
  }
1308
+ pauseCount += 1;
1309
+ const toolNameCount = (toolNameCounts[toolName] || 0) + 1;
1310
+ toolNameCounts[toolName] = toolNameCount;
1311
+ const discoveryTool = this.isDiscoveryLocalTool(toolName);
1312
+ if (discoveryTool) {
1313
+ discoveryPauseCount += 1;
1314
+ consecutiveDiscoveryPauseCount += 1;
1315
+ }
1316
+ else {
1317
+ consecutiveDiscoveryPauseCount = 0;
1318
+ }
1319
+ const actionKey = this.buildLocalToolActionKey(toolName, parsedParams);
1320
+ recentActionKeys = [...recentActionKeys, actionKey].slice(-12);
1321
+ const actionKeyCount = recentActionKeys.filter((candidateActionKey) => candidateActionKey === actionKey).length;
1322
+ const forcedCompleteEvent = options?.onLocalToolResult?.({
1323
+ executionId,
1324
+ pauseCount,
1325
+ discoveryPauseCount,
1326
+ consecutiveDiscoveryPauseCount,
1327
+ toolName,
1328
+ toolNameCount,
1329
+ parameters: parsedParams,
1330
+ toolResult,
1331
+ accumulatedOutput,
1332
+ actionKey,
1333
+ actionKeyCount,
1334
+ recentActionKeys,
1335
+ });
1336
+ if (forcedCompleteEvent) {
1337
+ if (!forcedCompleteEvent.finalOutput && accumulatedOutput) {
1338
+ forcedCompleteEvent.finalOutput = accumulatedOutput;
1339
+ }
1340
+ callbacks?.onAgentComplete?.(forcedCompleteEvent);
1341
+ return forcedCompleteEvent;
1342
+ }
1276
1343
  // Resume via agent resume endpoint
1277
1344
  const resumeResponse = await this.client.requestStream(`/agents/${id}/resume`, {
1278
1345
  method: 'POST',
@@ -1295,6 +1362,1017 @@ class AgentsEndpoint {
1295
1362
  }
1296
1363
  }
1297
1364
  // ─── Long-Task Agent Execution ───────────────────────────────────────
1365
+ createEmptyToolTrace() {
1366
+ return {
1367
+ entries: [],
1368
+ discoveryPerformed: false,
1369
+ attemptedWrite: false,
1370
+ wroteFiles: false,
1371
+ executionFileWritten: false,
1372
+ readFiles: false,
1373
+ readPaths: [],
1374
+ actionKeys: [],
1375
+ candidatePaths: [],
1376
+ planWritten: false,
1377
+ bestCandidateReadFailed: false,
1378
+ bestCandidateWritten: false,
1379
+ bestCandidateVerified: false,
1380
+ verificationAttempted: false,
1381
+ verificationPassed: false,
1382
+ localToolLoopGuardTriggered: false,
1383
+ };
1384
+ }
1385
+ isDiscoveryLocalTool(toolName) {
1386
+ return ['tree_directory', 'search_repo', 'glob_files', 'list_directory', 'read_file'].includes(toolName);
1387
+ }
1388
+ buildLocalToolActionKey(toolName, parameters) {
1389
+ const pathValue = typeof parameters.path === 'string' ? this.normalizeCandidatePath(parameters.path) : '';
1390
+ const queryValue = typeof parameters.query === 'string'
1391
+ ? parameters.query.trim()
1392
+ : typeof parameters.q === 'string'
1393
+ ? parameters.q.trim()
1394
+ : '';
1395
+ const patternValue = typeof parameters.pattern === 'string' ? parameters.pattern.trim() : '';
1396
+ const commandValue = typeof parameters.command === 'string' ? parameters.command.trim() : '';
1397
+ const descriptor = pathValue || queryValue || patternValue || commandValue;
1398
+ if (descriptor) {
1399
+ return `${toolName}:${descriptor.slice(0, 160)}`;
1400
+ }
1401
+ const fallback = this.summarizeUnknownForTrace(parameters, 160);
1402
+ return fallback ? `${toolName}:${fallback}` : toolName;
1403
+ }
1404
+ buildProspectiveStateForSessionTrace(state, trace) {
1405
+ const candidatePaths = Array.from(new Set([...(state.candidatePaths || []), ...trace.candidatePaths])).slice(-20);
1406
+ const recentReadPaths = Array.from(new Set([...(state.recentReadPaths || []), ...trace.readPaths])).slice(-20);
1407
+ return {
1408
+ ...state,
1409
+ ...(trace.bestCandidatePath
1410
+ ? {
1411
+ bestCandidatePath: trace.bestCandidatePath,
1412
+ bestCandidateReason: trace.bestCandidateReason,
1413
+ }
1414
+ : {}),
1415
+ candidatePaths,
1416
+ recentReadPaths,
1417
+ planWritten: state.planWritten || trace.planWritten,
1418
+ };
1419
+ }
1420
+ buildForcedLocalToolTurnCompleteEvent(state, snapshot, reason) {
1421
+ const finalOutput = [
1422
+ snapshot.accumulatedOutput.trim(),
1423
+ `Local tool loop guard ended this ${state.workflowPhase || 'research'} turn: ${reason}`,
1424
+ snapshot.recentActionKeys.length > 0
1425
+ ? `Recent local tool actions: ${snapshot.recentActionKeys.slice(-5).join(' | ')}`
1426
+ : '',
1427
+ ]
1428
+ .filter(Boolean)
1429
+ .join('\n\n');
1430
+ return {
1431
+ type: 'agent_complete',
1432
+ executionId: snapshot.executionId,
1433
+ seq: 0,
1434
+ agentId: state.agentId,
1435
+ success: true,
1436
+ iterations: 1,
1437
+ stopReason: 'end_turn',
1438
+ completedAt: new Date().toISOString(),
1439
+ totalCost: 0,
1440
+ finalOutput,
1441
+ duration: 0,
1442
+ };
1443
+ }
1444
+ createLocalToolLoopGuard(state, trace) {
1445
+ return (snapshot) => {
1446
+ const repeatedAction = snapshot.actionKeyCount >= 4;
1447
+ const heavyDiscoveryLoop = snapshot.discoveryPauseCount >= 24;
1448
+ const prospectiveState = this.buildProspectiveStateForSessionTrace(state, trace);
1449
+ const sufficientResearch = state.workflowPhase === 'research' && this.hasSufficientResearchEvidence(prospectiveState);
1450
+ let reason;
1451
+ if (state.workflowPhase === 'research') {
1452
+ if (sufficientResearch && snapshot.discoveryPauseCount >= 12) {
1453
+ reason =
1454
+ 'research evidence is already sufficient, but this execution kept issuing discovery tools instead of ending the turn';
1455
+ }
1456
+ else if (repeatedAction) {
1457
+ reason = `the same discovery action repeated ${snapshot.actionKeyCount} times in one session`;
1458
+ }
1459
+ else if (snapshot.consecutiveDiscoveryPauseCount >= 18 || heavyDiscoveryLoop) {
1460
+ reason =
1461
+ 'this session exceeded the discovery-tool budget without ending the turn';
1462
+ }
1463
+ }
1464
+ else if (state.workflowPhase === 'planning' &&
1465
+ !trace.planWritten &&
1466
+ snapshot.consecutiveDiscoveryPauseCount >= 18) {
1467
+ reason = 'planning is looping on discovery instead of writing the plan and ending the turn';
1468
+ }
1469
+ else if (state.workflowPhase === 'execution' &&
1470
+ !trace.executionFileWritten &&
1471
+ snapshot.consecutiveDiscoveryPauseCount >= 18) {
1472
+ reason = 'execution is looping on discovery instead of editing repo files and ending the turn';
1473
+ }
1474
+ if (!reason) {
1475
+ return undefined;
1476
+ }
1477
+ trace.localToolLoopGuardTriggered = true;
1478
+ trace.forcedTurnEndReason = reason;
1479
+ this.pushToolTraceEntry(trace, `local-tool loop guard forced end_turn -> ${reason}`);
1480
+ return this.buildForcedLocalToolTurnCompleteEvent(state, snapshot, reason);
1481
+ };
1482
+ }
1483
+ pushToolTraceEntry(trace, entry) {
1484
+ const trimmed = entry.trim();
1485
+ if (!trimmed)
1486
+ return;
1487
+ if (trace.entries[trace.entries.length - 1] === trimmed)
1488
+ return;
1489
+ trace.entries.push(trimmed);
1490
+ if (trace.entries.length > 12) {
1491
+ trace.entries = trace.entries.slice(-12);
1492
+ }
1493
+ }
1494
+ isPreservationSensitiveTask(state) {
1495
+ const bestCandidatePath = state.bestCandidatePath || '';
1496
+ if (/\.(html|tsx|jsx|css|scss|sass)$/i.test(bestCandidatePath)) {
1497
+ return true;
1498
+ }
1499
+ const prompt = (state.originalMessage || '').toLowerCase();
1500
+ return [
1501
+ 'ux',
1502
+ 'ui',
1503
+ 'design',
1504
+ 'frontend',
1505
+ 'front-end',
1506
+ 'theme',
1507
+ 'editor',
1508
+ 'layout',
1509
+ 'style',
1510
+ 'accessibility',
1511
+ 'visual',
1512
+ ].some((keyword) => prompt.includes(keyword));
1513
+ }
1514
+ getLikelySupportingCandidatePaths(bestCandidatePath, candidatePaths) {
1515
+ if (!bestCandidatePath || !candidatePaths || candidatePaths.length === 0)
1516
+ return [];
1517
+ const normalizedBestCandidatePath = this.normalizeCandidatePath(bestCandidatePath);
1518
+ const bestCandidateSegments = normalizedBestCandidatePath.split('/').filter(Boolean);
1519
+ const relatedRoot = bestCandidateSegments.length >= 2
1520
+ ? `${bestCandidateSegments[0]}/${bestCandidateSegments[1]}/`
1521
+ : bestCandidateSegments.length === 1
1522
+ ? `${bestCandidateSegments[0]}/`
1523
+ : '';
1524
+ const bestCandidateDir = normalizedBestCandidatePath.includes('/')
1525
+ ? `${normalizedBestCandidatePath.slice(0, normalizedBestCandidatePath.lastIndexOf('/'))}/`
1526
+ : '';
1527
+ return candidatePaths
1528
+ .map((candidatePath) => this.normalizeCandidatePath(candidatePath))
1529
+ .filter((candidatePath) => candidatePath &&
1530
+ candidatePath !== normalizedBestCandidatePath &&
1531
+ !this.isMarathonArtifactPath(candidatePath) &&
1532
+ ((bestCandidateDir && candidatePath.startsWith(bestCandidateDir)) ||
1533
+ (relatedRoot && candidatePath.startsWith(relatedRoot))));
1534
+ }
1535
+ hasSufficientResearchEvidence(state) {
1536
+ if (!state.bestCandidatePath)
1537
+ return false;
1538
+ const normalizedBestCandidatePath = this.normalizeCandidatePath(state.bestCandidatePath);
1539
+ const normalizedRecentReadPaths = (state.recentReadPaths || []).map((readPath) => this.normalizeCandidatePath(readPath));
1540
+ const readBestCandidate = normalizedRecentReadPaths.includes(normalizedBestCandidatePath);
1541
+ if (!readBestCandidate) {
1542
+ return false;
1543
+ }
1544
+ if (!this.isPreservationSensitiveTask(state)) {
1545
+ return true;
1546
+ }
1547
+ const supportingCandidatePaths = this.getLikelySupportingCandidatePaths(state.bestCandidatePath, state.candidatePaths);
1548
+ if (supportingCandidatePaths.length === 0) {
1549
+ return true;
1550
+ }
1551
+ return normalizedRecentReadPaths.some((readPath) => readPath !== normalizedBestCandidatePath && supportingCandidatePaths.includes(readPath));
1552
+ }
1553
+ buildEffectiveSessionOutput(modelOutput, toolTraceSummary) {
1554
+ return [toolTraceSummary.trim(), modelOutput.trim()].filter(Boolean).join('\n\n');
1555
+ }
1556
+ canAcceptTaskCompletion(output, state, sessionTrace) {
1557
+ if (!this.detectTaskCompletion(output)) {
1558
+ return false;
1559
+ }
1560
+ if (state.workflowPhase !== 'execution') {
1561
+ return true;
1562
+ }
1563
+ if (!state.bestCandidatePath) {
1564
+ return true;
1565
+ }
1566
+ const verificationSatisfied = !state.verificationRequired ||
1567
+ Boolean(state.lastVerificationPassed || sessionTrace.verificationPassed);
1568
+ return (Boolean(state.planWritten) &&
1569
+ Boolean(state.bestCandidateVerified || sessionTrace.bestCandidateVerified) &&
1570
+ verificationSatisfied);
1571
+ }
1572
+ summarizeUnknownForTrace(value, maxLength = 180) {
1573
+ const text = typeof value === 'string'
1574
+ ? value
1575
+ : value === undefined
1576
+ ? ''
1577
+ : JSON.stringify(value);
1578
+ return text.replace(/\s+/g, ' ').trim().slice(0, maxLength);
1579
+ }
1580
+ summarizeTextBlockForTrace(value, maxLines = 4) {
1581
+ const text = typeof value === 'string'
1582
+ ? value
1583
+ : value === undefined
1584
+ ? ''
1585
+ : JSON.stringify(value);
1586
+ if (!text)
1587
+ return '';
1588
+ return text
1589
+ .split('\n')
1590
+ .map((line) => line.trim())
1591
+ .filter(Boolean)
1592
+ .slice(0, maxLines)
1593
+ .join(' | ')
1594
+ .slice(0, 240);
1595
+ }
1596
+ parseVerificationResult(result) {
1597
+ if (typeof result !== 'string')
1598
+ return undefined;
1599
+ try {
1600
+ const parsed = JSON.parse(result);
1601
+ if (typeof parsed.success !== 'boolean')
1602
+ return undefined;
1603
+ return {
1604
+ success: parsed.success,
1605
+ ...(typeof parsed.command === 'string' ? { command: parsed.command } : {}),
1606
+ ...(typeof parsed.output === 'string' ? { output: parsed.output } : {}),
1607
+ ...(typeof parsed.error === 'string' ? { error: parsed.error } : {}),
1608
+ };
1609
+ }
1610
+ catch {
1611
+ return undefined;
1612
+ }
1613
+ }
1614
+ normalizeCandidatePath(candidatePath) {
1615
+ return candidatePath.trim().replace(/\\/g, '/').replace(/^\.?\//, '').replace(/\/+/g, '/');
1616
+ }
1617
+ dedupeNormalizedCandidatePaths(paths) {
1618
+ return Array.from(new Set((paths || [])
1619
+ .map((candidatePath) => this.normalizeCandidatePath(candidatePath))
1620
+ .filter((candidatePath) => {
1621
+ if (!candidatePath)
1622
+ return false;
1623
+ return !this.isMarathonArtifactPath(candidatePath);
1624
+ })));
1625
+ }
1626
+ isMarathonArtifactPath(candidatePath) {
1627
+ const normalized = this.normalizeCandidatePath(candidatePath).toLowerCase();
1628
+ return normalized === '.runtype' || normalized.startsWith('.runtype/');
1629
+ }
1630
+ isDiscoveryToolName(toolName) {
1631
+ return (toolName === 'search_repo' ||
1632
+ toolName === 'glob_files' ||
1633
+ toolName === 'tree_directory' ||
1634
+ toolName === 'list_directory');
1635
+ }
1636
+ sanitizeTaskSlug(taskName) {
1637
+ return taskName
1638
+ .toLowerCase()
1639
+ .replace(/[^a-z0-9_-]+/g, '-')
1640
+ .replace(/^-+|-+$/g, '')
1641
+ .slice(0, 80);
1642
+ }
1643
+ getDefaultPlanPath(taskName) {
1644
+ const slug = this.sanitizeTaskSlug(taskName || 'task');
1645
+ return `.runtype/marathons/${slug}/plan.md`;
1646
+ }
1647
+ dirnameOfCandidatePath(candidatePath) {
1648
+ const normalized = this.normalizeCandidatePath(candidatePath);
1649
+ const index = normalized.lastIndexOf('/');
1650
+ return index >= 0 ? normalized.slice(0, index) : '';
1651
+ }
1652
+ joinCandidatePath(baseDir, nextPath) {
1653
+ const normalizedNext = nextPath.replace(/\\/g, '/').trim();
1654
+ if (!normalizedNext)
1655
+ return '';
1656
+ if (normalizedNext.startsWith('/')) {
1657
+ return this.normalizeCandidatePath(`${baseDir}/${normalizedNext.slice(1)}`);
1658
+ }
1659
+ if (normalizedNext.startsWith('./')) {
1660
+ return this.normalizeCandidatePath(`${baseDir}/${normalizedNext.slice(2)}`);
1661
+ }
1662
+ return this.normalizeCandidatePath(baseDir ? `${baseDir}/${normalizedNext}` : normalizedNext);
1663
+ }
1664
+ scoreCandidatePath(candidatePath) {
1665
+ const normalized = this.normalizeCandidatePath(candidatePath).toLowerCase();
1666
+ let score = 0;
1667
+ if (normalized.endsWith('/theme.html') || normalized.endsWith('theme.html'))
1668
+ score += 80;
1669
+ if (normalized.includes('agent'))
1670
+ score += 30;
1671
+ if (normalized.includes('editor'))
1672
+ score += 30;
1673
+ if (normalized.includes('theme'))
1674
+ score += 25;
1675
+ if (normalized.endsWith('.html'))
1676
+ score += 20;
1677
+ if (normalized.includes('/src/'))
1678
+ score += 10;
1679
+ if (normalized.includes('/app/'))
1680
+ score += 10;
1681
+ if (normalized.includes('index.html'))
1682
+ score -= 10;
1683
+ return score;
1684
+ }
1685
+ addCandidateToTrace(trace, candidatePath, reason) {
1686
+ const normalized = this.normalizeCandidatePath(candidatePath);
1687
+ if (!normalized || normalized.length < 3)
1688
+ return;
1689
+ if (this.isMarathonArtifactPath(normalized))
1690
+ return;
1691
+ if (!trace.candidatePaths.includes(normalized)) {
1692
+ trace.candidatePaths.push(normalized);
1693
+ if (trace.candidatePaths.length > 12) {
1694
+ trace.candidatePaths = trace.candidatePaths.slice(-12);
1695
+ }
1696
+ }
1697
+ const currentScore = trace.bestCandidatePath ? this.scoreCandidatePath(trace.bestCandidatePath) : -1;
1698
+ const nextScore = this.scoreCandidatePath(normalized);
1699
+ if (!trace.bestCandidatePath || nextScore >= currentScore) {
1700
+ trace.bestCandidatePath = normalized;
1701
+ trace.bestCandidateReason = reason.slice(0, 200);
1702
+ }
1703
+ }
1704
+ extractCandidatePathsFromText(text, sourcePath) {
1705
+ const candidates = [];
1706
+ if (sourcePath && this.isMarathonArtifactPath(sourcePath)) {
1707
+ return candidates;
1708
+ }
1709
+ const add = (candidatePath, reason) => {
1710
+ const normalized = this.normalizeCandidatePath(candidatePath);
1711
+ if (!normalized)
1712
+ return;
1713
+ if (this.isMarathonArtifactPath(normalized))
1714
+ return;
1715
+ if (!candidates.some((candidate) => candidate.path === normalized)) {
1716
+ candidates.push({ path: normalized, reason });
1717
+ }
1718
+ };
1719
+ const baseDir = sourcePath ? this.dirnameOfCandidatePath(sourcePath) : '';
1720
+ for (const match of text.matchAll(/(?:href|src)=["']([^"']+\.(?:html|tsx|ts|jsx|js|md|json))["']/gi)) {
1721
+ const target = match[1] || '';
1722
+ const resolved = baseDir ? this.joinCandidatePath(baseDir, target) : target;
1723
+ add(resolved, `linked from ${sourcePath || 'discovery result'} via ${target}`);
1724
+ }
1725
+ for (const match of text.matchAll(/\b([\w./-]+\.(?:html|tsx|ts|jsx|js|md|json))\b/g)) {
1726
+ const target = match[1] || '';
1727
+ const resolved = sourcePath && !target.includes('/') ? this.joinCandidatePath(baseDir, target) : this.normalizeCandidatePath(target);
1728
+ add(resolved, `mentioned in ${sourcePath || 'discovery result'}`);
1729
+ }
1730
+ return candidates;
1731
+ }
1732
+ parseSearchRepoResultForCandidates(result) {
1733
+ const candidates = [];
1734
+ for (const line of result.split('\n')) {
1735
+ const contentMatch = line.match(/^\[content\]\s+([^:]+):\d+:\s+(.*)$/);
1736
+ if (contentMatch) {
1737
+ const sourcePath = this.normalizeCandidatePath(contentMatch[1] || '');
1738
+ const content = contentMatch[2] || '';
1739
+ for (const candidate of this.extractCandidatePathsFromText(content, sourcePath)) {
1740
+ if (!candidates.some((existing) => existing.path === candidate.path)) {
1741
+ candidates.push(candidate);
1742
+ }
1743
+ }
1744
+ continue;
1745
+ }
1746
+ const pathMatch = line.match(/^\[path\]\s+(.+)$/);
1747
+ if (pathMatch) {
1748
+ const sourcePath = this.normalizeCandidatePath(pathMatch[1] || '');
1749
+ if (/\.(html|tsx|ts|jsx|js|md|json)$/i.test(sourcePath)) {
1750
+ candidates.push({ path: sourcePath, reason: 'matched repository path search result' });
1751
+ }
1752
+ }
1753
+ }
1754
+ return candidates;
1755
+ }
1756
+ extractBestCandidateFromBootstrapContext(bootstrapContext) {
1757
+ if (!bootstrapContext)
1758
+ return undefined;
1759
+ const candidates = this.parseSearchRepoResultForCandidates(bootstrapContext);
1760
+ if (candidates.length === 0)
1761
+ return undefined;
1762
+ return candidates.sort((a, b) => this.scoreCandidatePath(b.path) - this.scoreCandidatePath(a.path))[0];
1763
+ }
1764
+ sanitizeResumeState(resumeState, taskName) {
1765
+ if (!resumeState)
1766
+ return undefined;
1767
+ const planPath = typeof resumeState.planPath === 'string' && resumeState.planPath.trim()
1768
+ ? this.normalizeCandidatePath(resumeState.planPath)
1769
+ : this.getDefaultPlanPath(taskName);
1770
+ const candidatePaths = this.dedupeNormalizedCandidatePaths(resumeState.candidatePaths);
1771
+ const recentReadPaths = this.dedupeNormalizedCandidatePaths(resumeState.recentReadPaths);
1772
+ const normalizedBestCandidatePath = typeof resumeState.bestCandidatePath === 'string' && resumeState.bestCandidatePath.trim()
1773
+ ? this.normalizeCandidatePath(resumeState.bestCandidatePath)
1774
+ : undefined;
1775
+ const bestCandidatePath = normalizedBestCandidatePath && !this.isMarathonArtifactPath(normalizedBestCandidatePath)
1776
+ ? normalizedBestCandidatePath
1777
+ : [...candidatePaths, ...recentReadPaths].sort((left, right) => this.scoreCandidatePath(right) - this.scoreCandidatePath(left))[0];
1778
+ const workflowPhase = resumeState.planWritten &&
1779
+ (!resumeState.workflowPhase ||
1780
+ resumeState.workflowPhase === 'research' ||
1781
+ resumeState.workflowPhase === 'planning')
1782
+ ? 'execution'
1783
+ : resumeState.workflowPhase;
1784
+ return {
1785
+ ...resumeState,
1786
+ workflowPhase,
1787
+ planPath,
1788
+ planWritten: Boolean(resumeState.planWritten),
1789
+ bestCandidatePath,
1790
+ bestCandidateReason: bestCandidatePath ? resumeState.bestCandidateReason : undefined,
1791
+ candidatePaths,
1792
+ recentReadPaths,
1793
+ recentActionKeys: Array.from(new Set(resumeState.recentActionKeys || [])).slice(-20),
1794
+ bestCandidateNeedsVerification: Boolean(resumeState.bestCandidateNeedsVerification),
1795
+ bestCandidateVerified: Boolean(resumeState.bestCandidateVerified),
1796
+ ...(resumeState.verificationRequired !== undefined
1797
+ ? { verificationRequired: resumeState.verificationRequired }
1798
+ : {}),
1799
+ lastVerificationPassed: Boolean(resumeState.lastVerificationPassed),
1800
+ };
1801
+ }
1802
+ buildPhaseInstructions(state) {
1803
+ const phase = state.workflowPhase || 'research';
1804
+ const planPath = state.planPath || this.getDefaultPlanPath(state.taskName);
1805
+ if (phase === 'planning') {
1806
+ return [
1807
+ '--- Workflow Phase: Planning ---',
1808
+ 'Research is complete. Your current job is to write the implementation plan before any product-file edits.',
1809
+ `Write the plan markdown to exactly: ${planPath}`,
1810
+ 'Do NOT edit the target product file yet.',
1811
+ 'The plan should summarize UX findings, explain why the current best candidate is the right file, and list concrete execution steps.',
1812
+ 'The plan must include a "Preserve existing functionality" section that lists current behaviors, linked files, integrations, and constraints that must keep working.',
1813
+ 'The plan must include a "Verification steps" section listing the concrete checks you will run before TASK_COMPLETE.',
1814
+ 'If the plan already exists, update that same plan file instead of creating a different one.',
1815
+ ].join('\n');
1816
+ }
1817
+ if (phase === 'execution') {
1818
+ return [
1819
+ '--- Workflow Phase: Execution ---',
1820
+ `The plan should already exist at: ${planPath}`,
1821
+ ...(state.bestCandidatePath ? [`Primary target file: ${state.bestCandidatePath}`] : []),
1822
+ 'Execute the plan by editing the target files.',
1823
+ 'Before ending each turn, update the markdown plan with progress against the steps you completed.',
1824
+ 'Modify the existing implementation incrementally. Do not replace the whole file unless the user explicitly asked for a rewrite.',
1825
+ 'Preserve existing functionality, handlers, imports, routes, configuration, and data flow unless the plan explicitly calls for changing them.',
1826
+ 'Before TASK_COMPLETE, run a verification command that matches the repo, such as lint, tests, build, or typecheck.',
1827
+ 'Avoid broad repo discovery unless the current candidate is clearly wrong.',
1828
+ ].join('\n');
1829
+ }
1830
+ return [
1831
+ '--- Workflow Phase: Research ---',
1832
+ 'Your current job is to inspect the repo, identify the correct existing target file, and gather enough evidence for a plan.',
1833
+ 'Identify related supporting files and current behaviors that must be preserved before planning.',
1834
+ 'Do NOT edit the target product file yet.',
1835
+ `When research is complete, the system will advance you to planning and require a plan at: ${planPath}`,
1836
+ ].join('\n');
1837
+ }
1838
+ updateWorkflowPhase(state, sessionTrace) {
1839
+ if (!state.workflowPhase)
1840
+ state.workflowPhase = 'research';
1841
+ if (!state.planPath)
1842
+ state.planPath = this.getDefaultPlanPath(state.taskName);
1843
+ state.phaseTransitionSummary = undefined;
1844
+ const transitionSummaries = [];
1845
+ let phaseUpdated = true;
1846
+ while (phaseUpdated) {
1847
+ phaseUpdated = false;
1848
+ if (state.workflowPhase === 'research' && this.hasSufficientResearchEvidence(state)) {
1849
+ state.workflowPhase = 'planning';
1850
+ transitionSummaries.push([
1851
+ 'Automatic phase transition: research -> planning.',
1852
+ `Best candidate confirmed: ${state.bestCandidatePath}`,
1853
+ `Next step: write the plan markdown to ${state.planPath} before editing the product file.`,
1854
+ ].join('\n'));
1855
+ phaseUpdated = true;
1856
+ continue;
1857
+ }
1858
+ if (state.workflowPhase === 'planning' && (sessionTrace.planWritten || state.planWritten)) {
1859
+ state.planWritten = true;
1860
+ state.workflowPhase = 'execution';
1861
+ transitionSummaries.push([
1862
+ 'Automatic phase transition: planning -> execution.',
1863
+ `Plan path: ${state.planPath}`,
1864
+ ...(state.bestCandidatePath ? [`Execute against: ${state.bestCandidatePath}`] : []),
1865
+ 'Next step: edit the target file(s) and update the plan with progress each turn.',
1866
+ ].join('\n'));
1867
+ phaseUpdated = true;
1868
+ }
1869
+ }
1870
+ if (state.status === 'complete') {
1871
+ state.workflowPhase = 'complete';
1872
+ }
1873
+ if (transitionSummaries.length > 0) {
1874
+ state.phaseTransitionSummary = transitionSummaries.join('\n\n');
1875
+ }
1876
+ }
1877
+ wrapLocalToolsForTrace(localTools, trace, state) {
1878
+ if (!localTools)
1879
+ return undefined;
1880
+ const wrapped = {};
1881
+ for (const [toolName, toolDef] of Object.entries(localTools)) {
1882
+ wrapped[toolName] = {
1883
+ ...toolDef,
1884
+ execute: async (args) => {
1885
+ const actionKey = `${toolName}:${String(args.path || args.query || args.pattern || '.').slice(0, 120)}`;
1886
+ trace.actionKeys.push(actionKey);
1887
+ if (trace.actionKeys.length > 10) {
1888
+ trace.actionKeys = trace.actionKeys.slice(-10);
1889
+ }
1890
+ const normalizedPathArg = typeof args.path === 'string' && args.path.trim()
1891
+ ? this.normalizeCandidatePath(String(args.path))
1892
+ : undefined;
1893
+ const normalizedPlanPath = state.planPath
1894
+ ? this.normalizeCandidatePath(state.planPath)
1895
+ : undefined;
1896
+ const normalizedBestCandidatePath = state.bestCandidatePath
1897
+ ? this.normalizeCandidatePath(state.bestCandidatePath)
1898
+ : undefined;
1899
+ const allowedWriteTargets = new Set([
1900
+ normalizedPlanPath,
1901
+ normalizedBestCandidatePath,
1902
+ ...(state.recentReadPaths || []).map((readPath) => this.normalizeCandidatePath(readPath)),
1903
+ ...trace.readPaths.map((readPath) => this.normalizeCandidatePath(readPath)),
1904
+ ].filter((value) => Boolean(value)));
1905
+ const pathArg = typeof args.path === 'string' && args.path.trim() ? ` path=${String(args.path)}` : '';
1906
+ const queryArg = typeof args.query === 'string' && args.query.trim() ? ` query="${String(args.query)}"` : '';
1907
+ const patternArg = typeof args.pattern === 'string' && args.pattern.trim()
1908
+ ? ` pattern="${String(args.pattern)}"`
1909
+ : '';
1910
+ const isWriteLikeTool = toolName === 'write_file' || toolName === 'restore_file_checkpoint';
1911
+ const isVerificationTool = toolName === 'run_check';
1912
+ if (state.workflowPhase === 'execution' &&
1913
+ normalizedBestCandidatePath &&
1914
+ this.isDiscoveryToolName(toolName) &&
1915
+ !trace.bestCandidateReadFailed) {
1916
+ const blockedMessage = [
1917
+ `Blocked by marathon execution guard: ${toolName} is disabled during execution.`,
1918
+ `Read or edit "${normalizedBestCandidatePath}" instead.`,
1919
+ 'Broad discovery is only re-enabled if a read of the current target file fails.',
1920
+ ].join(' ');
1921
+ this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> ${blockedMessage}`);
1922
+ return blockedMessage;
1923
+ }
1924
+ if (isWriteLikeTool) {
1925
+ trace.attemptedWrite = true;
1926
+ if (state.workflowPhase === 'planning' &&
1927
+ normalizedPathArg &&
1928
+ normalizedPlanPath &&
1929
+ normalizedPathArg !== normalizedPlanPath) {
1930
+ const blockedMessage = [
1931
+ `Blocked by marathon planning guard: ${toolName} must target the exact plan path during planning.`,
1932
+ `Write the plan to "${normalizedPlanPath}" before editing any product files.`,
1933
+ ].join(' ');
1934
+ this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> ${blockedMessage}`);
1935
+ return blockedMessage;
1936
+ }
1937
+ if (state.workflowPhase === 'execution' &&
1938
+ normalizedPathArg &&
1939
+ normalizedPlanPath &&
1940
+ normalizedBestCandidatePath &&
1941
+ normalizedPathArg === normalizedPlanPath &&
1942
+ !trace.executionFileWritten) {
1943
+ const blockedMessage = [
1944
+ `Blocked by marathon execution guard: ${toolName} cannot update the plan file before any real repo-file edit in this execution turn.`,
1945
+ `Edit "${normalizedBestCandidatePath}" or another previously discovered repo file first.`,
1946
+ `After that, you may update "${normalizedPlanPath}" with progress.`,
1947
+ ].join(' ');
1948
+ this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> ${blockedMessage}`);
1949
+ return blockedMessage;
1950
+ }
1951
+ if (state.workflowPhase === 'execution' &&
1952
+ normalizedPathArg &&
1953
+ normalizedPathArg !== normalizedPlanPath &&
1954
+ !allowedWriteTargets.has(normalizedPathArg)) {
1955
+ const blockedMessage = [
1956
+ `Blocked by marathon execution guard: ${toolName} is limited to the confirmed target, the plan file, or files already discovered/read for this task.`,
1957
+ `Do not create scratch files like "${normalizedPathArg}".`,
1958
+ normalizedBestCandidatePath
1959
+ ? `Edit "${normalizedBestCandidatePath}" or another previously discovered repo file instead.`
1960
+ : 'Read the current target file before writing.',
1961
+ ].join(' ');
1962
+ this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> ${blockedMessage}`);
1963
+ return blockedMessage;
1964
+ }
1965
+ }
1966
+ if (this.isDiscoveryToolName(toolName)) {
1967
+ trace.discoveryPerformed = true;
1968
+ }
1969
+ if (toolName === 'read_file') {
1970
+ trace.readFiles = true;
1971
+ if (normalizedPathArg) {
1972
+ const normalizedReadPath = normalizedPathArg;
1973
+ trace.readPaths.push(normalizedReadPath);
1974
+ if (trace.readPaths.length > 12) {
1975
+ trace.readPaths = trace.readPaths.slice(-12);
1976
+ }
1977
+ this.addCandidateToTrace(trace, normalizedReadPath, 'explicitly read by agent');
1978
+ }
1979
+ }
1980
+ let result;
1981
+ try {
1982
+ result = await toolDef.execute(args);
1983
+ }
1984
+ catch (error) {
1985
+ if (toolName === 'read_file' &&
1986
+ normalizedPathArg &&
1987
+ normalizedBestCandidatePath &&
1988
+ normalizedPathArg === normalizedBestCandidatePath) {
1989
+ trace.bestCandidateReadFailed = true;
1990
+ }
1991
+ this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> error: ${error instanceof Error ? error.message : String(error)}`);
1992
+ throw error;
1993
+ }
1994
+ if (isWriteLikeTool && normalizedPathArg) {
1995
+ trace.wroteFiles = true;
1996
+ if (normalizedPlanPath && normalizedPathArg === normalizedPlanPath) {
1997
+ trace.planWritten = true;
1998
+ }
1999
+ else if (state.workflowPhase === 'execution') {
2000
+ trace.executionFileWritten = true;
2001
+ trace.verificationPassed = false;
2002
+ if (normalizedBestCandidatePath &&
2003
+ normalizedPathArg === normalizedBestCandidatePath) {
2004
+ trace.bestCandidateWritten = true;
2005
+ }
2006
+ }
2007
+ }
2008
+ const verificationResult = isVerificationTool
2009
+ ? this.parseVerificationResult(result)
2010
+ : undefined;
2011
+ if (verificationResult) {
2012
+ trace.verificationAttempted = true;
2013
+ trace.verificationPassed = verificationResult.success;
2014
+ }
2015
+ const summarizedResult = verificationResult
2016
+ ? [
2017
+ verificationResult.command || 'verification',
2018
+ verificationResult.success ? 'passed' : 'failed',
2019
+ verificationResult.error || verificationResult.output,
2020
+ ]
2021
+ .filter(Boolean)
2022
+ .join(' | ')
2023
+ .slice(0, 240)
2024
+ : this.summarizeTextBlockForTrace(result);
2025
+ const resultSuffix = summarizedResult ? ` -> ${summarizedResult}` : '';
2026
+ this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg}${resultSuffix}`);
2027
+ const textResult = typeof result === 'string' ? result : '';
2028
+ if (toolName === 'read_file' &&
2029
+ normalizedPathArg &&
2030
+ normalizedBestCandidatePath &&
2031
+ normalizedPathArg === normalizedBestCandidatePath &&
2032
+ (trace.bestCandidateWritten || state.bestCandidateNeedsVerification)) {
2033
+ trace.bestCandidateVerified = true;
2034
+ }
2035
+ if (toolName === 'search_repo' && textResult) {
2036
+ for (const candidate of this.parseSearchRepoResultForCandidates(textResult)) {
2037
+ this.addCandidateToTrace(trace, candidate.path, candidate.reason);
2038
+ }
2039
+ }
2040
+ else if (toolName === 'glob_files' && textResult) {
2041
+ for (const line of textResult.split('\n')) {
2042
+ const candidatePath = line.trim();
2043
+ if (/\.(html|tsx|ts|jsx|js|md|json)$/i.test(candidatePath)) {
2044
+ this.addCandidateToTrace(trace, candidatePath, 'matched glob search');
2045
+ }
2046
+ }
2047
+ }
2048
+ else if (toolName === 'list_directory' && textResult && typeof args.path === 'string') {
2049
+ const baseDir = this.normalizeCandidatePath(String(args.path));
2050
+ for (const line of textResult.split('\n')) {
2051
+ const candidateName = line.trim();
2052
+ if (/\.(html|tsx|ts|jsx|js|md|json)$/i.test(candidateName)) {
2053
+ this.addCandidateToTrace(trace, this.joinCandidatePath(baseDir, candidateName), `listed in directory ${baseDir || '.'}`);
2054
+ }
2055
+ }
2056
+ }
2057
+ else if (toolName === 'read_file' && textResult && typeof args.path === 'string') {
2058
+ const sourcePath = this.normalizeCandidatePath(String(args.path));
2059
+ for (const candidate of this.extractCandidatePathsFromText(textResult, sourcePath)) {
2060
+ this.addCandidateToTrace(trace, candidate.path, candidate.reason);
2061
+ }
2062
+ }
2063
+ return result;
2064
+ },
2065
+ };
2066
+ }
2067
+ return wrapped;
2068
+ }
2069
+ createTraceCallbacks(callbacks, trace) {
2070
+ if (!callbacks) {
2071
+ return {
2072
+ onToolStart: (event) => {
2073
+ trace.actionKeys.push(`server:${event.toolName}`);
2074
+ if (trace.actionKeys.length > 10)
2075
+ trace.actionKeys = trace.actionKeys.slice(-10);
2076
+ if (event.toolName === 'write_file')
2077
+ trace.attemptedWrite = true;
2078
+ this.pushToolTraceEntry(trace, `server-tool ${event.toolName} started`);
2079
+ },
2080
+ onToolComplete: (event) => {
2081
+ const resultSummary = this.summarizeTextBlockForTrace(event.result);
2082
+ this.pushToolTraceEntry(trace, `server-tool ${event.toolName} ${event.success ? 'completed' : 'failed'}${resultSummary ? ` -> ${resultSummary}` : ''}`);
2083
+ },
2084
+ };
2085
+ }
2086
+ return {
2087
+ ...callbacks,
2088
+ onToolStart: (event) => {
2089
+ trace.actionKeys.push(`server:${event.toolName}`);
2090
+ if (trace.actionKeys.length > 10)
2091
+ trace.actionKeys = trace.actionKeys.slice(-10);
2092
+ if (event.toolName === 'write_file')
2093
+ trace.attemptedWrite = true;
2094
+ this.pushToolTraceEntry(trace, `server-tool ${event.toolName} started`);
2095
+ callbacks.onToolStart?.(event);
2096
+ },
2097
+ onToolComplete: (event) => {
2098
+ const resultSummary = this.summarizeTextBlockForTrace(event.result);
2099
+ this.pushToolTraceEntry(trace, `server-tool ${event.toolName} ${event.success ? 'completed' : 'failed'}${resultSummary ? ` -> ${resultSummary}` : ''}`);
2100
+ callbacks.onToolComplete?.(event);
2101
+ },
2102
+ };
2103
+ }
2104
+ buildToolTraceSummary(trace) {
2105
+ if (trace.entries.length === 0)
2106
+ return '';
2107
+ const lines = trace.entries.slice(-6).map((entry) => `- ${entry}`);
2108
+ const flags = [];
2109
+ if (trace.discoveryPerformed)
2110
+ flags.push('repo discovery used');
2111
+ if (trace.readFiles)
2112
+ flags.push('candidate files read');
2113
+ if (trace.wroteFiles)
2114
+ flags.push('files written');
2115
+ if (trace.localToolLoopGuardTriggered)
2116
+ flags.push('local-tool loop guard forced end_turn');
2117
+ if (trace.bestCandidateVerified)
2118
+ flags.push('target re-read after write');
2119
+ if (trace.verificationPassed)
2120
+ flags.push('verification passed');
2121
+ else if (trace.verificationAttempted)
2122
+ flags.push('verification failed');
2123
+ return [
2124
+ 'Session working memory:',
2125
+ ...(flags.length > 0 ? [`- ${flags.join('; ')}`] : []),
2126
+ ...(trace.bestCandidatePath
2127
+ ? [`- best candidate: ${trace.bestCandidatePath}${trace.bestCandidateReason ? ` (${trace.bestCandidateReason})` : ''}`]
2128
+ : []),
2129
+ ...lines,
2130
+ ]
2131
+ .join('\n')
2132
+ .slice(0, 1200);
2133
+ }
2134
+ extractBootstrapQueries(message) {
2135
+ const queries = [];
2136
+ const noisyTerms = new Set([
2137
+ 'a',
2138
+ 'against',
2139
+ 'all',
2140
+ 'analyze',
2141
+ 'and',
2142
+ 'as',
2143
+ 'at',
2144
+ 'based',
2145
+ 'before',
2146
+ 'best',
2147
+ 'by',
2148
+ 'codebase',
2149
+ 'do',
2150
+ 'exactly',
2151
+ 'files',
2152
+ 'first',
2153
+ 'following',
2154
+ 'goal',
2155
+ 'go',
2156
+ 'how',
2157
+ 'in',
2158
+ 'is',
2159
+ 'it',
2160
+ 'its',
2161
+ 'make',
2162
+ 'markdown',
2163
+ 'most',
2164
+ 'no',
2165
+ 'of',
2166
+ 'on',
2167
+ 'order',
2168
+ 'plan',
2169
+ 'progress',
2170
+ 'repo',
2171
+ 'research',
2172
+ 'right',
2173
+ 'save',
2174
+ 'session',
2175
+ 'solve',
2176
+ 'task',
2177
+ 'that',
2178
+ 'the',
2179
+ 'then',
2180
+ 'through',
2181
+ 'to',
2182
+ 'turn',
2183
+ 'update',
2184
+ 'user',
2185
+ 'ux',
2186
+ 'web',
2187
+ 'when',
2188
+ 'with',
2189
+ 'work',
2190
+ 'your',
2191
+ ]);
2192
+ const push = (candidate) => {
2193
+ const normalized = candidate
2194
+ .replace(/^[^a-z0-9/._-]+|[^a-z0-9/._ -]+$/gi, '')
2195
+ .replace(/\s+/g, ' ')
2196
+ .trim();
2197
+ if (!normalized || normalized.length < 3 || normalized.length > 60)
2198
+ return;
2199
+ const words = normalized.toLowerCase().split(' ').filter(Boolean);
2200
+ if (words.length > 4)
2201
+ return;
2202
+ if (words.every((word) => noisyTerms.has(word)))
2203
+ return;
2204
+ if (words.length > 1 && noisyTerms.has(words[words.length - 1] || ''))
2205
+ return;
2206
+ if (!queries.some((existing) => existing.toLowerCase() === normalized.toLowerCase())) {
2207
+ queries.push(normalized);
2208
+ }
2209
+ };
2210
+ const lowerMessage = message.toLowerCase();
2211
+ const phraseHints = [
2212
+ 'agent editor',
2213
+ 'theme.html',
2214
+ '/theme.html',
2215
+ 'style it visually',
2216
+ ];
2217
+ for (const hint of phraseHints) {
2218
+ if (lowerMessage.includes(hint.toLowerCase()))
2219
+ push(hint);
2220
+ }
2221
+ for (const match of message.matchAll(/"([^"]{3,60})"/g)) {
2222
+ push(match[1] || '');
2223
+ }
2224
+ for (const match of message.matchAll(/(?:go through|review|inspect|edit|improve|update|fix|modify)\s+(?:the\s+)?([a-z0-9][a-z0-9/_-]*(?:\s+[a-z0-9][a-z0-9/_-]*){0,2})/gi)) {
2225
+ push(match[1] || '');
2226
+ }
2227
+ for (const match of message.matchAll(/([a-z0-9][a-z0-9/_-]*(?:\s+[a-z0-9][a-z0-9/_-]*){0,2})\s+(?:page|editor|screen|view|route|component)\b/gi)) {
2228
+ push(match[0] || '');
2229
+ push(match[1] || '');
2230
+ }
2231
+ for (const match of message.matchAll(/\b[\w./-]+\.(?:html|tsx|ts|jsx|js|md|json)\b/g)) {
2232
+ push(match[0] || '');
2233
+ }
2234
+ for (const match of message.matchAll(/\/[A-Za-z0-9._/-]+/g)) {
2235
+ push(match[0] || '');
2236
+ }
2237
+ for (const match of message.matchAll(/\b([a-z0-9]+(?:\s+[a-z0-9]+){1,2})\b/gi)) {
2238
+ const phrase = (match[1] || '').toLowerCase();
2239
+ const words = phrase.split(' ');
2240
+ if (words.some((word) => ['editor', 'page', 'screen', 'view', 'route', 'component'].includes(word))) {
2241
+ push(match[1] || '');
2242
+ }
2243
+ }
2244
+ return queries.slice(0, 4);
2245
+ }
2246
+ async generateBootstrapDiscoveryContext(message, localTools) {
2247
+ if (!localTools)
2248
+ return undefined;
2249
+ const searchTool = localTools.search_repo;
2250
+ const globTool = localTools.glob_files;
2251
+ if (!searchTool && !globTool)
2252
+ return undefined;
2253
+ const queries = this.extractBootstrapQueries(message);
2254
+ if (queries.length === 0)
2255
+ return undefined;
2256
+ const lines = [];
2257
+ for (const query of queries) {
2258
+ if (lines.length >= 6)
2259
+ break;
2260
+ if (searchTool) {
2261
+ try {
2262
+ const result = await searchTool.execute({ query, path: '.', maxResults: 5 });
2263
+ const summary = this.summarizeTextBlockForTrace(result, 3);
2264
+ if (summary && !summary.startsWith('No matches found')) {
2265
+ lines.push(`search_repo "${query}": ${summary}`);
2266
+ continue;
2267
+ }
2268
+ }
2269
+ catch {
2270
+ // Best effort bootstrap only
2271
+ }
2272
+ }
2273
+ if (globTool && /\./.test(query)) {
2274
+ try {
2275
+ const result = await globTool.execute({ pattern: `**/${query}`, path: '.', maxResults: 5 });
2276
+ const summary = this.summarizeTextBlockForTrace(result, 3);
2277
+ if (summary && !summary.startsWith('No files matched')) {
2278
+ lines.push(`glob_files "**/${query}": ${summary}`);
2279
+ }
2280
+ }
2281
+ catch {
2282
+ // Best effort bootstrap only
2283
+ }
2284
+ }
2285
+ }
2286
+ if (lines.length === 0)
2287
+ return undefined;
2288
+ return ['Bootstrap repo hints:', ...lines].join('\n').slice(0, 1500);
2289
+ }
2290
+ buildStuckTurnRecoveryMessage(state) {
2291
+ const recent = state.sessions.slice(-2);
2292
+ const normalizedPlanPath = typeof state.planPath === 'string' && state.planPath.trim()
2293
+ ? this.normalizeCandidatePath(state.planPath)
2294
+ : undefined;
2295
+ const recentPlanOnlyLoop = Boolean(normalizedPlanPath) &&
2296
+ recent.length === 2 &&
2297
+ recent.every((session) => {
2298
+ const specificActionKeys = (session.actionKeys || [])
2299
+ .map((actionKey) => actionKey.replace(/\\/g, '/'))
2300
+ .filter((actionKey) => !actionKey.startsWith('server:'));
2301
+ return (specificActionKeys.length > 0 &&
2302
+ specificActionKeys.every((actionKey) => actionKey.includes(normalizedPlanPath)));
2303
+ });
2304
+ if (recent.length < 2 ||
2305
+ !(recent.every((session) => session.hadTextOutput === false && session.wroteFiles === false) ||
2306
+ recentPlanOnlyLoop)) {
2307
+ return undefined;
2308
+ }
2309
+ const repeatedSameActions = recent.length === 2 &&
2310
+ recent.every((session) => (session.actionKeys?.length || 0) > 0) &&
2311
+ JSON.stringify(recent[0]?.actionKeys || []) === JSON.stringify(recent[1]?.actionKeys || []);
2312
+ if (state.workflowPhase === 'planning' && state.planPath) {
2313
+ return [
2314
+ 'Recovery instruction:',
2315
+ 'Research is already complete. Stop rediscovering and write the plan now.',
2316
+ `Your next action must be write_file to "${state.planPath}".`,
2317
+ 'The plan must summarize UX findings, include a "Preserve existing functionality" section, name the best candidate file, and list execution steps.',
2318
+ 'Do not edit the product file until the plan exists.',
2319
+ ...(repeatedSameActions
2320
+ ? ['You are repeating the same discovery actions; break the loop by writing the plan file now.']
2321
+ : []),
2322
+ ].join('\n');
2323
+ }
2324
+ if (state.workflowPhase === 'execution' && state.bestCandidatePath) {
2325
+ const normalizedBestCandidatePath = this.normalizeCandidatePath(state.bestCandidatePath);
2326
+ const recentlyReadBestCandidate = (state.recentReadPaths || [])
2327
+ .map((readPath) => this.normalizeCandidatePath(readPath))
2328
+ .includes(normalizedBestCandidatePath);
2329
+ return [
2330
+ 'Recovery instruction:',
2331
+ 'Planning should already be complete. Stop rediscovering and execute the plan.',
2332
+ recentlyReadBestCandidate
2333
+ ? `Your next action must be write_file on "${state.bestCandidatePath}".`
2334
+ : `Your next action must be read_file on "${state.bestCandidatePath}" so you can edit it next.`,
2335
+ ...(state.planPath
2336
+ ? [`Do not write "${state.planPath}" again until after you complete a real repo-file edit in this session.`]
2337
+ : []),
2338
+ 'After editing, run a verification command with run_check before TASK_COMPLETE.',
2339
+ 'Do not call broad discovery tools again unless the target file is missing or invalid.',
2340
+ ...(repeatedSameActions
2341
+ ? ['You are repeating the same discovery actions; break the loop by editing the target file now.']
2342
+ : []),
2343
+ ].join('\n');
2344
+ }
2345
+ if (state.bestCandidatePath) {
2346
+ const recentlyReadBestCandidate = (state.recentReadPaths || []).includes(state.bestCandidatePath);
2347
+ return [
2348
+ 'Recovery instruction:',
2349
+ 'Your previous sessions produced no final text and did not complete a useful edit.',
2350
+ `You already have a best candidate file: "${state.bestCandidatePath}".`,
2351
+ ...(state.bestCandidateReason ? [`Reason: ${state.bestCandidateReason}`] : []),
2352
+ recentlyReadBestCandidate
2353
+ ? `Do not keep searching. Your next action must be to edit "${state.bestCandidatePath}" with write_file, or explain why that file is not the correct target.`
2354
+ : `Do not keep searching. Your next action must be read_file on "${state.bestCandidatePath}".`,
2355
+ 'Do not call list_directory, tree_directory, glob_files, or search_repo again unless that candidate path is missing or clearly wrong.',
2356
+ ...(repeatedSameActions
2357
+ ? ['You are repeating the same discovery actions; break the loop by acting on the best candidate now.']
2358
+ : []),
2359
+ ].join('\n');
2360
+ }
2361
+ const queries = this.extractBootstrapQueries(state.originalMessage || '');
2362
+ const queryHint = queries.length > 0
2363
+ ? `Start with these exact repo searches: ${queries.map((query) => `"${query}"`).join(', ')}.`
2364
+ : 'Start with a concrete repo search using the key nouns from the original task.';
2365
+ return [
2366
+ 'Recovery instruction:',
2367
+ 'Your previous sessions produced no final text and did not edit files.',
2368
+ queryHint,
2369
+ 'Then read the most relevant existing file you find before any write_file call.',
2370
+ 'If a route, link, or page already exists, edit that existing file instead of creating a new one.',
2371
+ ...(repeatedSameActions
2372
+ ? ['You are repeating the same discovery actions; pick one candidate and act on it.']
2373
+ : []),
2374
+ ].join('\n');
2375
+ }
1298
2376
  /**
1299
2377
  * Run a long-task agent across multiple sessions with automatic state management.
1300
2378
  *
@@ -1328,12 +2406,21 @@ class AgentsEndpoint {
1328
2406
  : options.trackProgress
1329
2407
  ? `${agent.name} task`
1330
2408
  : '';
2409
+ const resolvedTaskName = taskName || `${agent.name} task`;
2410
+ const seededResumeState = this.sanitizeResumeState(options.resumeState, resolvedTaskName);
1331
2411
  // Initialize state
1332
2412
  const state = {
1333
2413
  agentId: id,
1334
2414
  agentName: agent.name,
1335
- taskName: taskName || `${agent.name} task`,
2415
+ taskName: resolvedTaskName,
1336
2416
  status: 'running',
2417
+ workflowPhase: seededResumeState?.workflowPhase || 'research',
2418
+ planPath: seededResumeState?.planPath || this.getDefaultPlanPath(resolvedTaskName),
2419
+ planWritten: seededResumeState?.planWritten || false,
2420
+ bestCandidateNeedsVerification: seededResumeState?.bestCandidateNeedsVerification || false,
2421
+ bestCandidateVerified: seededResumeState?.bestCandidateVerified || false,
2422
+ verificationRequired: seededResumeState?.verificationRequired ?? Boolean(options.localTools?.run_check),
2423
+ lastVerificationPassed: seededResumeState?.lastVerificationPassed || false,
1337
2424
  sessionCount: 0,
1338
2425
  totalCost: 0,
1339
2426
  lastOutput: '',
@@ -1341,13 +2428,39 @@ class AgentsEndpoint {
1341
2428
  sessions: [],
1342
2429
  startedAt: new Date().toISOString(),
1343
2430
  updatedAt: new Date().toISOString(),
2431
+ ...(seededResumeState?.originalMessage ? { originalMessage: seededResumeState.originalMessage } : {}),
2432
+ ...(seededResumeState?.bootstrapContext ? { bootstrapContext: seededResumeState.bootstrapContext } : {}),
2433
+ ...(seededResumeState?.bestCandidatePath
2434
+ ? {
2435
+ bestCandidatePath: seededResumeState.bestCandidatePath,
2436
+ bestCandidateReason: seededResumeState.bestCandidateReason,
2437
+ }
2438
+ : {}),
2439
+ ...(seededResumeState?.candidatePaths ? { candidatePaths: seededResumeState.candidatePaths } : {}),
2440
+ ...(seededResumeState?.recentReadPaths ? { recentReadPaths: seededResumeState.recentReadPaths } : {}),
2441
+ ...(seededResumeState?.recentActionKeys
2442
+ ? { recentActionKeys: seededResumeState.recentActionKeys }
2443
+ : {}),
1344
2444
  };
2445
+ this.updateWorkflowPhase(state, this.createEmptyToolTrace());
1345
2446
  // Track the record ID if we're syncing
1346
2447
  let recordId;
1347
2448
  // Extract local tool names for prompt injection
1348
2449
  const localToolNames = options.localTools ? Object.keys(options.localTools) : undefined;
2450
+ if (!options.previousMessages) {
2451
+ state.bootstrapContext = await this.generateBootstrapDiscoveryContext(options.message, options.localTools);
2452
+ const bootstrapCandidate = this.extractBestCandidateFromBootstrapContext(state.bootstrapContext);
2453
+ if (bootstrapCandidate) {
2454
+ state.bestCandidatePath = bootstrapCandidate.path;
2455
+ state.bestCandidateReason = bootstrapCandidate.reason;
2456
+ state.candidatePaths = [bootstrapCandidate.path];
2457
+ }
2458
+ }
1349
2459
  // Session loop
1350
2460
  for (let session = 0; session < maxSessions; session++) {
2461
+ const sessionTrace = this.createEmptyToolTrace();
2462
+ const sessionLocalTools = this.wrapLocalToolsForTrace(options.localTools, sessionTrace, state);
2463
+ const sessionCallbacks = this.createTraceCallbacks(options.streamCallbacks, sessionTrace);
1351
2464
  // Build continuation context for resumed runs (first session only)
1352
2465
  const continuationContext = session === 0 && options.previousMessages
1353
2466
  ? {
@@ -1371,7 +2484,9 @@ class AgentsEndpoint {
1371
2484
  };
1372
2485
  if (useStream && options.localTools) {
1373
2486
  // Local tools require the pause/resume streaming loop
1374
- const completeEvent = await this.executeWithLocalTools(id, sessionData, options.localTools, options.streamCallbacks);
2487
+ const completeEvent = await this.executeWithLocalTools(id, sessionData, sessionLocalTools || options.localTools, sessionCallbacks, {
2488
+ onLocalToolResult: this.createLocalToolLoopGuard(state, sessionTrace),
2489
+ });
1375
2490
  if (!completeEvent) {
1376
2491
  throw new Error('Agent stream ended without a complete event');
1377
2492
  }
@@ -1385,7 +2500,7 @@ class AgentsEndpoint {
1385
2500
  };
1386
2501
  }
1387
2502
  else if (useStream && options.streamCallbacks) {
1388
- const completeEvent = await this.executeWithCallbacks(id, sessionData, options.streamCallbacks);
2503
+ const completeEvent = await this.executeWithCallbacks(id, sessionData, sessionCallbacks || options.streamCallbacks);
1389
2504
  if (!completeEvent) {
1390
2505
  throw new Error('Agent stream ended without a complete event');
1391
2506
  }
@@ -1401,11 +2516,17 @@ class AgentsEndpoint {
1401
2516
  else {
1402
2517
  sessionResult = await this.execute(id, sessionData);
1403
2518
  }
2519
+ const toolTraceSummary = this.buildToolTraceSummary(sessionTrace);
2520
+ const effectiveSessionOutput = this.buildEffectiveSessionOutput(sessionResult.result, toolTraceSummary);
1404
2521
  // Update state
1405
2522
  const sessionCost = sessionResult.totalCost;
1406
2523
  state.sessionCount = session + 1;
1407
2524
  state.totalCost += sessionCost;
1408
- state.lastOutput = sessionResult.result;
2525
+ state.lastOutput = effectiveSessionOutput;
2526
+ state.lastError =
2527
+ sessionResult.stopReason === 'error'
2528
+ ? sessionResult.error || 'Agent session ended with an error.'
2529
+ : undefined;
1409
2530
  state.lastStopReason = sessionResult.stopReason;
1410
2531
  state.updatedAt = new Date().toISOString();
1411
2532
  state.sessions.push({
@@ -1413,22 +2534,89 @@ class AgentsEndpoint {
1413
2534
  cost: sessionCost,
1414
2535
  iterations: sessionResult.iterations,
1415
2536
  stopReason: sessionResult.stopReason,
1416
- outputPreview: sessionResult.result.slice(0, 300),
2537
+ outputPreview: effectiveSessionOutput.slice(0, 300),
2538
+ toolTraceSummary: toolTraceSummary || undefined,
2539
+ discoveryPerformed: sessionTrace.discoveryPerformed,
2540
+ attemptedWrite: sessionTrace.attemptedWrite,
2541
+ wroteFiles: sessionTrace.wroteFiles,
2542
+ hadTextOutput: Boolean(sessionResult.result.trim()),
2543
+ verificationAttempted: sessionTrace.verificationAttempted,
2544
+ verificationPassed: sessionTrace.verificationPassed,
2545
+ bestCandidatePath: sessionTrace.bestCandidatePath || undefined,
2546
+ actionKeys: sessionTrace.actionKeys.slice(-5),
1417
2547
  completedAt: new Date().toISOString(),
1418
2548
  });
2549
+ if (sessionTrace.bestCandidatePath) {
2550
+ state.bestCandidatePath = sessionTrace.bestCandidatePath;
2551
+ state.bestCandidateReason = sessionTrace.bestCandidateReason;
2552
+ }
2553
+ if (sessionTrace.candidatePaths.length > 0) {
2554
+ state.candidatePaths = Array.from(new Set([...(state.candidatePaths || []), ...sessionTrace.candidatePaths])).slice(-20);
2555
+ }
2556
+ if (sessionTrace.readPaths.length > 0) {
2557
+ state.recentReadPaths = Array.from(new Set([...(state.recentReadPaths || []), ...sessionTrace.readPaths])).slice(-20);
2558
+ }
2559
+ if (sessionTrace.actionKeys.length > 0) {
2560
+ state.recentActionKeys = [...(state.recentActionKeys || []), ...sessionTrace.actionKeys].slice(-20);
2561
+ }
2562
+ if (sessionTrace.planWritten) {
2563
+ state.planWritten = true;
2564
+ }
2565
+ if (sessionTrace.executionFileWritten) {
2566
+ state.lastVerificationPassed = false;
2567
+ }
2568
+ if (sessionTrace.bestCandidateWritten) {
2569
+ state.bestCandidateNeedsVerification = true;
2570
+ state.bestCandidateVerified = false;
2571
+ }
2572
+ if (sessionTrace.bestCandidateVerified) {
2573
+ state.bestCandidateNeedsVerification = false;
2574
+ state.bestCandidateVerified = true;
2575
+ }
2576
+ if (sessionTrace.verificationAttempted) {
2577
+ state.lastVerificationPassed = sessionTrace.verificationPassed;
2578
+ }
1419
2579
  // Track cost by model
1420
2580
  const modelKey = options.model || 'default';
1421
2581
  if (!state.costByModel)
1422
2582
  state.costByModel = {};
1423
2583
  state.costByModel[modelKey] = (state.costByModel[modelKey] || 0) + sessionCost;
1424
- // Accumulate messages for future continuation
2584
+ this.updateWorkflowPhase(state, sessionTrace);
2585
+ const phaseTransitionSummary = state.phaseTransitionSummary;
2586
+ if (phaseTransitionSummary) {
2587
+ state.lastOutput = [phaseTransitionSummary, '', state.lastOutput].join('\n').trim();
2588
+ const latestSession = state.sessions[state.sessions.length - 1];
2589
+ if (latestSession) {
2590
+ latestSession.outputPreview = [phaseTransitionSummary, '', latestSession.outputPreview]
2591
+ .join('\n')
2592
+ .slice(0, 300);
2593
+ latestSession.toolTraceSummary = [phaseTransitionSummary, '', latestSession.toolTraceSummary || '']
2594
+ .join('\n')
2595
+ .trim()
2596
+ .slice(0, 1200);
2597
+ }
2598
+ }
2599
+ // Accumulate messages for future continuation.
2600
+ // When buildSessionMessages returns the full history + a new continuation
2601
+ // message, only the NEW messages at the end are appended — otherwise the
2602
+ // history would be re-pushed on every session and grow exponentially.
1425
2603
  if (!state.messages)
1426
2604
  state.messages = [];
1427
- state.messages.push(...messages);
1428
- // Also store the assistant's response as a message
1429
- if (sessionResult.result) {
1430
- state.messages.push({ role: 'assistant', content: sessionResult.result });
2605
+ if (state.messages.length > 0 && messages.length > state.messages.length) {
2606
+ // Continuation session: history was replayed, only append the new tail
2607
+ const newMessages = messages.slice(state.messages.length);
2608
+ state.messages.push(...newMessages);
2609
+ }
2610
+ else {
2611
+ // First session (or no prior history): all messages are new
2612
+ state.messages.push(...messages);
1431
2613
  }
2614
+ // Always store an assistant message so continuation sessions have full
2615
+ // conversation history. When the agent only made tool calls and produced
2616
+ // no text, fall back to a synthetic summary so the history stays coherent.
2617
+ const assistantContent = effectiveSessionOutput ||
2618
+ `[Session ${session + 1} completed (${sessionResult.stopReason}). No text output captured.]`;
2619
+ state.messages.push({ role: 'assistant', content: assistantContent });
1432
2620
  // Keep session log trimmed to last 50 entries
1433
2621
  if (state.sessions.length > 50) {
1434
2622
  state.sessions = state.sessions.slice(-50);
@@ -1438,12 +2626,12 @@ class AgentsEndpoint {
1438
2626
  state.status = 'complete';
1439
2627
  }
1440
2628
  else if (sessionResult.stopReason === 'error') {
1441
- state.status = 'complete';
2629
+ state.status = 'error';
1442
2630
  }
1443
2631
  else if (sessionResult.stopReason === 'max_cost') {
1444
2632
  state.status = 'budget_exceeded';
1445
2633
  }
1446
- else if (this.detectTaskCompletion(sessionResult.result)) {
2634
+ else if (this.canAcceptTaskCompletion(sessionResult.result, state, sessionTrace)) {
1447
2635
  // Client-side stop-phrase detection for non-loop agents returning 'end_turn'
1448
2636
  state.status = 'complete';
1449
2637
  }
@@ -1497,8 +2685,18 @@ class AgentsEndpoint {
1497
2685
  return [
1498
2686
  `Task: ${state.taskName}`,
1499
2687
  `Status: ${state.status}`,
2688
+ `Workflow phase: ${state.workflowPhase || 'research'}`,
1500
2689
  `Sessions completed: ${state.sessionCount}`,
1501
2690
  `Total cost: $${state.totalCost.toFixed(4)}`,
2691
+ ...(state.planPath ? [`Plan path: ${state.planPath}`] : []),
2692
+ ...(state.planWritten ? ['Plan written: yes'] : []),
2693
+ ...(state.bestCandidatePath
2694
+ ? [
2695
+ `Best candidate: ${state.bestCandidatePath}`,
2696
+ ...(state.bestCandidateReason ? [`Candidate reason: ${state.bestCandidateReason}`] : []),
2697
+ ]
2698
+ : []),
2699
+ ...(state.bootstrapContext ? ['', state.bootstrapContext] : []),
1502
2700
  '',
1503
2701
  'Session history:',
1504
2702
  sessionSummaries,
@@ -1513,17 +2711,63 @@ class AgentsEndpoint {
1513
2711
  */
1514
2712
  buildSessionMessages(originalMessage, state, sessionIndex, maxSessions, localToolNames, continuationContext) {
1515
2713
  // Build local tools guidance block when tools are available
2714
+ const phase = state.workflowPhase || 'research';
1516
2715
  const toolsBlock = localToolNames?.length
1517
2716
  ? [
1518
2717
  '',
1519
2718
  '--- Local Tools ---',
1520
2719
  `You have access to local filesystem tools (${localToolNames.join(', ')}) that execute directly on the user's machine.`,
1521
- 'Use these tools to create working, runnable files — not just code in your response.',
1522
- 'Prefer creating self-contained HTML files that the user can open in a web browser.',
1523
- 'For example, write a single .html file with inline CSS and JavaScript that demonstrates the result.',
2720
+ 'Use these tools to inspect the existing repository and make real file edits — not just code in your response.',
2721
+ ...(phase === 'research'
2722
+ ? [
2723
+ 'For repository modification tasks, before any write_file call you must perform at least one discovery action (search_repo, glob_files, or tree_directory).',
2724
+ 'If discovery finds a plausible existing file, you must read at least one candidate file before writing.',
2725
+ 'Before creating a new file, search the repo for existing relevant files, routes, links, components, or pages.',
2726
+ 'Prefer editing an existing file when one already implements or links to the feature you were asked to change.',
2727
+ 'Use search_repo, glob_files, and tree_directory to discover the right files before you call write_file.',
2728
+ 'Only create a new file when no suitable existing file exists, and make that decision intentionally.',
2729
+ ]
2730
+ : phase === 'planning'
2731
+ ? [
2732
+ `Research is already complete. Focus on writing or updating the plan at: ${state.planPath || this.getDefaultPlanPath(state.taskName)}.`,
2733
+ 'Do not restart broad repo discovery unless the saved best candidate is clearly invalid.',
2734
+ 'You may read the current target file or supporting source files if you need evidence for the plan.',
2735
+ 'Ground the plan in the existing implementation. Identify which current behaviors and linked files must be preserved.',
2736
+ 'List the exact verification commands you expect to run after editing, such as lint, typecheck, tests, or build.',
2737
+ ]
2738
+ : [
2739
+ ...(state.bestCandidatePath
2740
+ ? [
2741
+ `Execution-phase guard: broad discovery tools (search_repo, glob_files, tree_directory, list_directory) are locked while executing against "${state.bestCandidatePath}".`,
2742
+ ]
2743
+ : [
2744
+ 'Execution-phase guard: broad discovery tools are locked unless a read of the current target fails.',
2745
+ ]),
2746
+ 'Reading the markdown plan for status does not change the product target. Do not treat the plan file as the file to implement.',
2747
+ 'Do not write the plan file first in execution. Make a real repo-file edit before you update the plan with progress.',
2748
+ 'Do not create scratch or test files to probe the repo or tool behavior.',
2749
+ 'write_file automatically checkpoints original repo files before overwriting them. If an edit regresses behavior, use restore_file_checkpoint on that file.',
2750
+ 'Read the target file and edit it with write_file. Update the plan file with progress after completing real edits.',
2751
+ 'Before large edits, read any already discovered supporting source/style files that power the target so you preserve existing behavior.',
2752
+ 'Prefer minimal diffs over rewrites. If you cannot verify related behavior, stop and record what is still unverified instead of rewriting blindly.',
2753
+ 'Use run_check for real verification before TASK_COMPLETE. Good examples: "pnpm lint", "pnpm exec tsc --noEmit", "pnpm test", or a focused vitest/pytest command.',
2754
+ 'Broad discovery is only allowed if a read of the current target file fails.',
2755
+ ]),
1524
2756
  'Always use write_file to save your output so the user can run it immediately.',
1525
2757
  ].join('\n')
1526
2758
  : '';
2759
+ const bootstrapBlock = state.bootstrapContext
2760
+ ? ['', '--- Initial Repository Discovery ---', state.bootstrapContext].join('\n')
2761
+ : '';
2762
+ const phaseBlock = ['', this.buildPhaseInstructions(state)].join('\n');
2763
+ const candidateBlock = state.bestCandidatePath
2764
+ ? [
2765
+ '',
2766
+ '--- Best Candidate ---',
2767
+ `Current best candidate file: ${state.bestCandidatePath}`,
2768
+ ...(state.bestCandidateReason ? [`Why: ${state.bestCandidateReason}`] : []),
2769
+ ].join('\n')
2770
+ : '';
1527
2771
  const multiSessionInstruction = `This is a multi-session task (session ${sessionIndex + 1}/${maxSessions}). When you have fully completed the task, end your response with TASK_COMPLETE on its own line.`;
1528
2772
  // Continuation resume: first session of a resumed run with prior context
1529
2773
  if (continuationContext && sessionIndex === 0) {
@@ -1539,7 +2783,7 @@ class AgentsEndpoint {
1539
2783
  },
1540
2784
  {
1541
2785
  role: 'user',
1542
- content: [userMessage, toolsBlock, '', multiSessionInstruction].join('\n'),
2786
+ content: [userMessage, phaseBlock, toolsBlock, bootstrapBlock, candidateBlock, '', multiSessionInstruction].join('\n'),
1543
2787
  },
1544
2788
  ];
1545
2789
  return messages;
@@ -1553,14 +2797,14 @@ class AgentsEndpoint {
1553
2797
  },
1554
2798
  {
1555
2799
  role: 'user',
1556
- content: [userMessage, toolsBlock, '', multiSessionInstruction].join('\n'),
2800
+ content: [userMessage, phaseBlock, toolsBlock, bootstrapBlock, candidateBlock, '', multiSessionInstruction].join('\n'),
1557
2801
  },
1558
2802
  ];
1559
2803
  return messages;
1560
2804
  }
1561
2805
  // First session (non-continuation): user message + completion signal instruction
1562
2806
  if (sessionIndex === 0) {
1563
- const content = [originalMessage, toolsBlock, '', multiSessionInstruction].join('\n');
2807
+ const content = [originalMessage, phaseBlock, toolsBlock, bootstrapBlock, candidateBlock, '', multiSessionInstruction].join('\n');
1564
2808
  return [{ role: 'user', content }];
1565
2809
  }
1566
2810
  // Continuation sessions within a run: inject progress context
@@ -1568,14 +2812,57 @@ class AgentsEndpoint {
1568
2812
  const progressSummary = recentSessions
1569
2813
  .map((s) => ` Session ${s.index}: ${s.stopReason} ($${s.cost.toFixed(4)}) — ${s.outputPreview.slice(0, 100)}`)
1570
2814
  .join('\n');
2815
+ // When we have accumulated message history, replay the full conversation
2816
+ // so the model has complete context and doesn't start fresh each session.
2817
+ if (state.messages && state.messages.length > 0) {
2818
+ const recoveryMessage = this.buildStuckTurnRecoveryMessage(state);
2819
+ const continuationContent = [
2820
+ 'Continue the task.',
2821
+ phaseBlock,
2822
+ toolsBlock,
2823
+ bootstrapBlock,
2824
+ candidateBlock,
2825
+ '',
2826
+ `--- Progress (session ${sessionIndex + 1}/${maxSessions}, $${state.totalCost.toFixed(4)} spent) ---`,
2827
+ `Previous sessions:`,
2828
+ progressSummary,
2829
+ '',
2830
+ ...(recoveryMessage ? [recoveryMessage, ''] : []),
2831
+ 'Do not redo previous work. If the task is already complete, respond with TASK_COMPLETE.',
2832
+ ].join('\n');
2833
+ // Cap history to prevent context overflow on long-running marathons.
2834
+ // Keep the most recent 40 messages; prepend a system summary for trimmed ones.
2835
+ const MAX_HISTORY_MESSAGES = 40;
2836
+ let historyMessages = state.messages;
2837
+ if (historyMessages.length > MAX_HISTORY_MESSAGES) {
2838
+ const trimmedCount = historyMessages.length - MAX_HISTORY_MESSAGES;
2839
+ historyMessages = [
2840
+ {
2841
+ role: 'system',
2842
+ content: `[${trimmedCount} earlier messages trimmed to stay within context limits. Original task: ${(state.originalMessage || originalMessage).slice(0, 500)}]`,
2843
+ },
2844
+ ...historyMessages.slice(-MAX_HISTORY_MESSAGES),
2845
+ ];
2846
+ }
2847
+ return [
2848
+ ...historyMessages,
2849
+ { role: 'user', content: continuationContent },
2850
+ ];
2851
+ }
2852
+ // Fallback when no message history is available: single-message summary
2853
+ const recoveryMessage = this.buildStuckTurnRecoveryMessage(state);
1571
2854
  const content = [
1572
2855
  originalMessage,
2856
+ phaseBlock,
1573
2857
  toolsBlock,
2858
+ bootstrapBlock,
2859
+ candidateBlock,
1574
2860
  '',
1575
2861
  `--- Progress (session ${sessionIndex + 1}/${maxSessions}, $${state.totalCost.toFixed(4)} spent) ---`,
1576
2862
  `Previous sessions:`,
1577
2863
  progressSummary,
1578
2864
  '',
2865
+ ...(recoveryMessage ? [recoveryMessage, ''] : []),
1579
2866
  `Last output (do NOT repeat this — build on it):`,
1580
2867
  state.lastOutput.slice(0, 1000),
1581
2868
  '',