@runtypelabs/sdk 1.7.0 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/endpoints.js CHANGED
@@ -1170,9 +1170,21 @@ class AgentsEndpoint {
1170
1170
  throw new Error(error.error || `HTTP ${response.status}`);
1171
1171
  }
1172
1172
  let completeEvent = null;
1173
+ // Accumulate turn delta text so finalOutput is non-empty even when the
1174
+ // agent_complete event carries no output (e.g. model ended after tool calls)
1175
+ let accumulatedOutput = '';
1173
1176
  await processAgentStream(response.body, {
1174
1177
  ...callbacks,
1178
+ onTurnDelta: (event) => {
1179
+ if (event.contentType === 'text') {
1180
+ accumulatedOutput += event.delta;
1181
+ }
1182
+ callbacks.onTurnDelta?.(event);
1183
+ },
1175
1184
  onAgentComplete: (event) => {
1185
+ if (!event.finalOutput && accumulatedOutput) {
1186
+ event.finalOutput = accumulatedOutput;
1187
+ }
1176
1188
  completeEvent = event;
1177
1189
  callbacks.onAgentComplete?.(event);
1178
1190
  },
@@ -1198,7 +1210,7 @@ class AgentsEndpoint {
1198
1210
  * })
1199
1211
  * ```
1200
1212
  */
1201
- async executeWithLocalTools(id, data, localTools, callbacks) {
1213
+ async executeWithLocalTools(id, data, localTools, callbacks, options) {
1202
1214
  // Build runtime tool definitions from local tool schemas and inject into request
1203
1215
  const runtimeTools = Object.entries(localTools).map(([name, def]) => ({
1204
1216
  name,
@@ -1210,10 +1222,7 @@ class AgentsEndpoint {
1210
1222
  ...data,
1211
1223
  tools: {
1212
1224
  ...data.tools,
1213
- runtimeTools: [
1214
- ...(data.tools?.runtimeTools || []),
1215
- ...runtimeTools,
1216
- ],
1225
+ runtimeTools: [...(data.tools?.runtimeTools || []), ...runtimeTools],
1217
1226
  },
1218
1227
  };
1219
1228
  const response = await this.executeStream(id, requestData);
@@ -1222,16 +1231,36 @@ class AgentsEndpoint {
1222
1231
  throw new Error(error.error || `HTTP ${response.status}`);
1223
1232
  }
1224
1233
  let currentBody = response.body;
1234
+ // Accumulate text output across all streams (initial + resume cycles)
1235
+ // so finalOutput is non-empty even when the last resume stream has no text
1236
+ let accumulatedOutput = '';
1237
+ let pauseCount = 0;
1238
+ let discoveryPauseCount = 0;
1239
+ let consecutiveDiscoveryPauseCount = 0;
1240
+ const toolNameCounts = {};
1241
+ let recentActionKeys = [];
1225
1242
  while (true) {
1226
1243
  let pausedEvent = null;
1227
1244
  let completeEvent = null;
1228
1245
  await processAgentStream(currentBody, {
1229
1246
  ...callbacks,
1247
+ onTurnDelta: (event) => {
1248
+ if (event.contentType === 'text') {
1249
+ accumulatedOutput += event.delta;
1250
+ }
1251
+ callbacks?.onTurnDelta?.(event);
1252
+ },
1230
1253
  onAgentPaused: (event) => {
1231
1254
  pausedEvent = event;
1232
1255
  callbacks?.onAgentPaused?.(event);
1233
1256
  },
1234
1257
  onAgentComplete: (event) => {
1258
+ // Supplement finalOutput with accumulated turn deltas when the
1259
+ // agent_complete event itself carries no output (common when the
1260
+ // model's last action was a tool call rather than text output)
1261
+ if (!event.finalOutput && accumulatedOutput) {
1262
+ event.finalOutput = accumulatedOutput;
1263
+ }
1235
1264
  completeEvent = event;
1236
1265
  callbacks?.onAgentComplete?.(event);
1237
1266
  },
@@ -1276,6 +1305,41 @@ class AgentsEndpoint {
1276
1305
  // Return the error as a tool result so the agent can recover
1277
1306
  toolResult = `Error: ${err instanceof Error ? err.message : String(err)}`;
1278
1307
  }
1308
+ pauseCount += 1;
1309
+ const toolNameCount = (toolNameCounts[toolName] || 0) + 1;
1310
+ toolNameCounts[toolName] = toolNameCount;
1311
+ const discoveryTool = this.isDiscoveryLocalTool(toolName);
1312
+ if (discoveryTool) {
1313
+ discoveryPauseCount += 1;
1314
+ consecutiveDiscoveryPauseCount += 1;
1315
+ }
1316
+ else {
1317
+ consecutiveDiscoveryPauseCount = 0;
1318
+ }
1319
+ const actionKey = this.buildLocalToolActionKey(toolName, parsedParams);
1320
+ recentActionKeys = [...recentActionKeys, actionKey].slice(-12);
1321
+ const actionKeyCount = recentActionKeys.filter((candidateActionKey) => candidateActionKey === actionKey).length;
1322
+ const forcedCompleteEvent = options?.onLocalToolResult?.({
1323
+ executionId,
1324
+ pauseCount,
1325
+ discoveryPauseCount,
1326
+ consecutiveDiscoveryPauseCount,
1327
+ toolName,
1328
+ toolNameCount,
1329
+ parameters: parsedParams,
1330
+ toolResult,
1331
+ accumulatedOutput,
1332
+ actionKey,
1333
+ actionKeyCount,
1334
+ recentActionKeys,
1335
+ });
1336
+ if (forcedCompleteEvent) {
1337
+ if (!forcedCompleteEvent.finalOutput && accumulatedOutput) {
1338
+ forcedCompleteEvent.finalOutput = accumulatedOutput;
1339
+ }
1340
+ callbacks?.onAgentComplete?.(forcedCompleteEvent);
1341
+ return forcedCompleteEvent;
1342
+ }
1279
1343
  // Resume via agent resume endpoint
1280
1344
  const resumeResponse = await this.client.requestStream(`/agents/${id}/resume`, {
1281
1345
  method: 'POST',
@@ -1298,6 +1362,1017 @@ class AgentsEndpoint {
1298
1362
  }
1299
1363
  }
1300
1364
  // ─── Long-Task Agent Execution ───────────────────────────────────────
1365
+ createEmptyToolTrace() {
1366
+ return {
1367
+ entries: [],
1368
+ discoveryPerformed: false,
1369
+ attemptedWrite: false,
1370
+ wroteFiles: false,
1371
+ executionFileWritten: false,
1372
+ readFiles: false,
1373
+ readPaths: [],
1374
+ actionKeys: [],
1375
+ candidatePaths: [],
1376
+ planWritten: false,
1377
+ bestCandidateReadFailed: false,
1378
+ bestCandidateWritten: false,
1379
+ bestCandidateVerified: false,
1380
+ verificationAttempted: false,
1381
+ verificationPassed: false,
1382
+ localToolLoopGuardTriggered: false,
1383
+ };
1384
+ }
1385
+ isDiscoveryLocalTool(toolName) {
1386
+ return ['tree_directory', 'search_repo', 'glob_files', 'list_directory', 'read_file'].includes(toolName);
1387
+ }
1388
+ buildLocalToolActionKey(toolName, parameters) {
1389
+ const pathValue = typeof parameters.path === 'string' ? this.normalizeCandidatePath(parameters.path) : '';
1390
+ const queryValue = typeof parameters.query === 'string'
1391
+ ? parameters.query.trim()
1392
+ : typeof parameters.q === 'string'
1393
+ ? parameters.q.trim()
1394
+ : '';
1395
+ const patternValue = typeof parameters.pattern === 'string' ? parameters.pattern.trim() : '';
1396
+ const commandValue = typeof parameters.command === 'string' ? parameters.command.trim() : '';
1397
+ const descriptor = pathValue || queryValue || patternValue || commandValue;
1398
+ if (descriptor) {
1399
+ return `${toolName}:${descriptor.slice(0, 160)}`;
1400
+ }
1401
+ const fallback = this.summarizeUnknownForTrace(parameters, 160);
1402
+ return fallback ? `${toolName}:${fallback}` : toolName;
1403
+ }
1404
+ buildProspectiveStateForSessionTrace(state, trace) {
1405
+ const candidatePaths = Array.from(new Set([...(state.candidatePaths || []), ...trace.candidatePaths])).slice(-20);
1406
+ const recentReadPaths = Array.from(new Set([...(state.recentReadPaths || []), ...trace.readPaths])).slice(-20);
1407
+ return {
1408
+ ...state,
1409
+ ...(trace.bestCandidatePath
1410
+ ? {
1411
+ bestCandidatePath: trace.bestCandidatePath,
1412
+ bestCandidateReason: trace.bestCandidateReason,
1413
+ }
1414
+ : {}),
1415
+ candidatePaths,
1416
+ recentReadPaths,
1417
+ planWritten: state.planWritten || trace.planWritten,
1418
+ };
1419
+ }
1420
+ buildForcedLocalToolTurnCompleteEvent(state, snapshot, reason) {
1421
+ const finalOutput = [
1422
+ snapshot.accumulatedOutput.trim(),
1423
+ `Local tool loop guard ended this ${state.workflowPhase || 'research'} turn: ${reason}`,
1424
+ snapshot.recentActionKeys.length > 0
1425
+ ? `Recent local tool actions: ${snapshot.recentActionKeys.slice(-5).join(' | ')}`
1426
+ : '',
1427
+ ]
1428
+ .filter(Boolean)
1429
+ .join('\n\n');
1430
+ return {
1431
+ type: 'agent_complete',
1432
+ executionId: snapshot.executionId,
1433
+ seq: 0,
1434
+ agentId: state.agentId,
1435
+ success: true,
1436
+ iterations: 1,
1437
+ stopReason: 'end_turn',
1438
+ completedAt: new Date().toISOString(),
1439
+ totalCost: 0,
1440
+ finalOutput,
1441
+ duration: 0,
1442
+ };
1443
+ }
1444
+ createLocalToolLoopGuard(state, trace) {
1445
+ return (snapshot) => {
1446
+ const repeatedAction = snapshot.actionKeyCount >= 4;
1447
+ const heavyDiscoveryLoop = snapshot.discoveryPauseCount >= 24;
1448
+ const prospectiveState = this.buildProspectiveStateForSessionTrace(state, trace);
1449
+ const sufficientResearch = state.workflowPhase === 'research' && this.hasSufficientResearchEvidence(prospectiveState);
1450
+ let reason;
1451
+ if (state.workflowPhase === 'research') {
1452
+ if (sufficientResearch && snapshot.discoveryPauseCount >= 12) {
1453
+ reason =
1454
+ 'research evidence is already sufficient, but this execution kept issuing discovery tools instead of ending the turn';
1455
+ }
1456
+ else if (repeatedAction) {
1457
+ reason = `the same discovery action repeated ${snapshot.actionKeyCount} times in one session`;
1458
+ }
1459
+ else if (snapshot.consecutiveDiscoveryPauseCount >= 18 || heavyDiscoveryLoop) {
1460
+ reason =
1461
+ 'this session exceeded the discovery-tool budget without ending the turn';
1462
+ }
1463
+ }
1464
+ else if (state.workflowPhase === 'planning' &&
1465
+ !trace.planWritten &&
1466
+ snapshot.consecutiveDiscoveryPauseCount >= 18) {
1467
+ reason = 'planning is looping on discovery instead of writing the plan and ending the turn';
1468
+ }
1469
+ else if (state.workflowPhase === 'execution' &&
1470
+ !trace.executionFileWritten &&
1471
+ snapshot.consecutiveDiscoveryPauseCount >= 18) {
1472
+ reason = 'execution is looping on discovery instead of editing repo files and ending the turn';
1473
+ }
1474
+ if (!reason) {
1475
+ return undefined;
1476
+ }
1477
+ trace.localToolLoopGuardTriggered = true;
1478
+ trace.forcedTurnEndReason = reason;
1479
+ this.pushToolTraceEntry(trace, `local-tool loop guard forced end_turn -> ${reason}`);
1480
+ return this.buildForcedLocalToolTurnCompleteEvent(state, snapshot, reason);
1481
+ };
1482
+ }
1483
+ pushToolTraceEntry(trace, entry) {
1484
+ const trimmed = entry.trim();
1485
+ if (!trimmed)
1486
+ return;
1487
+ if (trace.entries[trace.entries.length - 1] === trimmed)
1488
+ return;
1489
+ trace.entries.push(trimmed);
1490
+ if (trace.entries.length > 12) {
1491
+ trace.entries = trace.entries.slice(-12);
1492
+ }
1493
+ }
1494
+ isPreservationSensitiveTask(state) {
1495
+ const bestCandidatePath = state.bestCandidatePath || '';
1496
+ if (/\.(html|tsx|jsx|css|scss|sass)$/i.test(bestCandidatePath)) {
1497
+ return true;
1498
+ }
1499
+ const prompt = (state.originalMessage || '').toLowerCase();
1500
+ return [
1501
+ 'ux',
1502
+ 'ui',
1503
+ 'design',
1504
+ 'frontend',
1505
+ 'front-end',
1506
+ 'theme',
1507
+ 'editor',
1508
+ 'layout',
1509
+ 'style',
1510
+ 'accessibility',
1511
+ 'visual',
1512
+ ].some((keyword) => prompt.includes(keyword));
1513
+ }
1514
+ getLikelySupportingCandidatePaths(bestCandidatePath, candidatePaths) {
1515
+ if (!bestCandidatePath || !candidatePaths || candidatePaths.length === 0)
1516
+ return [];
1517
+ const normalizedBestCandidatePath = this.normalizeCandidatePath(bestCandidatePath);
1518
+ const bestCandidateSegments = normalizedBestCandidatePath.split('/').filter(Boolean);
1519
+ const relatedRoot = bestCandidateSegments.length >= 2
1520
+ ? `${bestCandidateSegments[0]}/${bestCandidateSegments[1]}/`
1521
+ : bestCandidateSegments.length === 1
1522
+ ? `${bestCandidateSegments[0]}/`
1523
+ : '';
1524
+ const bestCandidateDir = normalizedBestCandidatePath.includes('/')
1525
+ ? `${normalizedBestCandidatePath.slice(0, normalizedBestCandidatePath.lastIndexOf('/'))}/`
1526
+ : '';
1527
+ return candidatePaths
1528
+ .map((candidatePath) => this.normalizeCandidatePath(candidatePath))
1529
+ .filter((candidatePath) => candidatePath &&
1530
+ candidatePath !== normalizedBestCandidatePath &&
1531
+ !this.isMarathonArtifactPath(candidatePath) &&
1532
+ ((bestCandidateDir && candidatePath.startsWith(bestCandidateDir)) ||
1533
+ (relatedRoot && candidatePath.startsWith(relatedRoot))));
1534
+ }
1535
+ hasSufficientResearchEvidence(state) {
1536
+ if (!state.bestCandidatePath)
1537
+ return false;
1538
+ const normalizedBestCandidatePath = this.normalizeCandidatePath(state.bestCandidatePath);
1539
+ const normalizedRecentReadPaths = (state.recentReadPaths || []).map((readPath) => this.normalizeCandidatePath(readPath));
1540
+ const readBestCandidate = normalizedRecentReadPaths.includes(normalizedBestCandidatePath);
1541
+ if (!readBestCandidate) {
1542
+ return false;
1543
+ }
1544
+ if (!this.isPreservationSensitiveTask(state)) {
1545
+ return true;
1546
+ }
1547
+ const supportingCandidatePaths = this.getLikelySupportingCandidatePaths(state.bestCandidatePath, state.candidatePaths);
1548
+ if (supportingCandidatePaths.length === 0) {
1549
+ return true;
1550
+ }
1551
+ return normalizedRecentReadPaths.some((readPath) => readPath !== normalizedBestCandidatePath && supportingCandidatePaths.includes(readPath));
1552
+ }
1553
+ buildEffectiveSessionOutput(modelOutput, toolTraceSummary) {
1554
+ return [toolTraceSummary.trim(), modelOutput.trim()].filter(Boolean).join('\n\n');
1555
+ }
1556
+ canAcceptTaskCompletion(output, state, sessionTrace) {
1557
+ if (!this.detectTaskCompletion(output)) {
1558
+ return false;
1559
+ }
1560
+ if (state.workflowPhase !== 'execution') {
1561
+ return true;
1562
+ }
1563
+ if (!state.bestCandidatePath) {
1564
+ return true;
1565
+ }
1566
+ const verificationSatisfied = !state.verificationRequired ||
1567
+ Boolean(state.lastVerificationPassed || sessionTrace.verificationPassed);
1568
+ return (Boolean(state.planWritten) &&
1569
+ Boolean(state.bestCandidateVerified || sessionTrace.bestCandidateVerified) &&
1570
+ verificationSatisfied);
1571
+ }
1572
+ summarizeUnknownForTrace(value, maxLength = 180) {
1573
+ const text = typeof value === 'string'
1574
+ ? value
1575
+ : value === undefined
1576
+ ? ''
1577
+ : JSON.stringify(value);
1578
+ return text.replace(/\s+/g, ' ').trim().slice(0, maxLength);
1579
+ }
1580
+ summarizeTextBlockForTrace(value, maxLines = 4) {
1581
+ const text = typeof value === 'string'
1582
+ ? value
1583
+ : value === undefined
1584
+ ? ''
1585
+ : JSON.stringify(value);
1586
+ if (!text)
1587
+ return '';
1588
+ return text
1589
+ .split('\n')
1590
+ .map((line) => line.trim())
1591
+ .filter(Boolean)
1592
+ .slice(0, maxLines)
1593
+ .join(' | ')
1594
+ .slice(0, 240);
1595
+ }
1596
+ parseVerificationResult(result) {
1597
+ if (typeof result !== 'string')
1598
+ return undefined;
1599
+ try {
1600
+ const parsed = JSON.parse(result);
1601
+ if (typeof parsed.success !== 'boolean')
1602
+ return undefined;
1603
+ return {
1604
+ success: parsed.success,
1605
+ ...(typeof parsed.command === 'string' ? { command: parsed.command } : {}),
1606
+ ...(typeof parsed.output === 'string' ? { output: parsed.output } : {}),
1607
+ ...(typeof parsed.error === 'string' ? { error: parsed.error } : {}),
1608
+ };
1609
+ }
1610
+ catch {
1611
+ return undefined;
1612
+ }
1613
+ }
1614
+ normalizeCandidatePath(candidatePath) {
1615
+ return candidatePath.trim().replace(/\\/g, '/').replace(/^\.?\//, '').replace(/\/+/g, '/');
1616
+ }
1617
+ dedupeNormalizedCandidatePaths(paths) {
1618
+ return Array.from(new Set((paths || [])
1619
+ .map((candidatePath) => this.normalizeCandidatePath(candidatePath))
1620
+ .filter((candidatePath) => {
1621
+ if (!candidatePath)
1622
+ return false;
1623
+ return !this.isMarathonArtifactPath(candidatePath);
1624
+ })));
1625
+ }
1626
+ isMarathonArtifactPath(candidatePath) {
1627
+ const normalized = this.normalizeCandidatePath(candidatePath).toLowerCase();
1628
+ return normalized === '.runtype' || normalized.startsWith('.runtype/');
1629
+ }
1630
+ isDiscoveryToolName(toolName) {
1631
+ return (toolName === 'search_repo' ||
1632
+ toolName === 'glob_files' ||
1633
+ toolName === 'tree_directory' ||
1634
+ toolName === 'list_directory');
1635
+ }
1636
+ sanitizeTaskSlug(taskName) {
1637
+ return taskName
1638
+ .toLowerCase()
1639
+ .replace(/[^a-z0-9_-]+/g, '-')
1640
+ .replace(/^-+|-+$/g, '')
1641
+ .slice(0, 80);
1642
+ }
1643
+ getDefaultPlanPath(taskName) {
1644
+ const slug = this.sanitizeTaskSlug(taskName || 'task');
1645
+ return `.runtype/marathons/${slug}/plan.md`;
1646
+ }
1647
+ dirnameOfCandidatePath(candidatePath) {
1648
+ const normalized = this.normalizeCandidatePath(candidatePath);
1649
+ const index = normalized.lastIndexOf('/');
1650
+ return index >= 0 ? normalized.slice(0, index) : '';
1651
+ }
1652
+ joinCandidatePath(baseDir, nextPath) {
1653
+ const normalizedNext = nextPath.replace(/\\/g, '/').trim();
1654
+ if (!normalizedNext)
1655
+ return '';
1656
+ if (normalizedNext.startsWith('/')) {
1657
+ return this.normalizeCandidatePath(`${baseDir}/${normalizedNext.slice(1)}`);
1658
+ }
1659
+ if (normalizedNext.startsWith('./')) {
1660
+ return this.normalizeCandidatePath(`${baseDir}/${normalizedNext.slice(2)}`);
1661
+ }
1662
+ return this.normalizeCandidatePath(baseDir ? `${baseDir}/${normalizedNext}` : normalizedNext);
1663
+ }
1664
+ scoreCandidatePath(candidatePath) {
1665
+ const normalized = this.normalizeCandidatePath(candidatePath).toLowerCase();
1666
+ let score = 0;
1667
+ if (normalized.endsWith('/theme.html') || normalized.endsWith('theme.html'))
1668
+ score += 80;
1669
+ if (normalized.includes('agent'))
1670
+ score += 30;
1671
+ if (normalized.includes('editor'))
1672
+ score += 30;
1673
+ if (normalized.includes('theme'))
1674
+ score += 25;
1675
+ if (normalized.endsWith('.html'))
1676
+ score += 20;
1677
+ if (normalized.includes('/src/'))
1678
+ score += 10;
1679
+ if (normalized.includes('/app/'))
1680
+ score += 10;
1681
+ if (normalized.includes('index.html'))
1682
+ score -= 10;
1683
+ return score;
1684
+ }
1685
+ addCandidateToTrace(trace, candidatePath, reason) {
1686
+ const normalized = this.normalizeCandidatePath(candidatePath);
1687
+ if (!normalized || normalized.length < 3)
1688
+ return;
1689
+ if (this.isMarathonArtifactPath(normalized))
1690
+ return;
1691
+ if (!trace.candidatePaths.includes(normalized)) {
1692
+ trace.candidatePaths.push(normalized);
1693
+ if (trace.candidatePaths.length > 12) {
1694
+ trace.candidatePaths = trace.candidatePaths.slice(-12);
1695
+ }
1696
+ }
1697
+ const currentScore = trace.bestCandidatePath ? this.scoreCandidatePath(trace.bestCandidatePath) : -1;
1698
+ const nextScore = this.scoreCandidatePath(normalized);
1699
+ if (!trace.bestCandidatePath || nextScore >= currentScore) {
1700
+ trace.bestCandidatePath = normalized;
1701
+ trace.bestCandidateReason = reason.slice(0, 200);
1702
+ }
1703
+ }
1704
+ extractCandidatePathsFromText(text, sourcePath) {
1705
+ const candidates = [];
1706
+ if (sourcePath && this.isMarathonArtifactPath(sourcePath)) {
1707
+ return candidates;
1708
+ }
1709
+ const add = (candidatePath, reason) => {
1710
+ const normalized = this.normalizeCandidatePath(candidatePath);
1711
+ if (!normalized)
1712
+ return;
1713
+ if (this.isMarathonArtifactPath(normalized))
1714
+ return;
1715
+ if (!candidates.some((candidate) => candidate.path === normalized)) {
1716
+ candidates.push({ path: normalized, reason });
1717
+ }
1718
+ };
1719
+ const baseDir = sourcePath ? this.dirnameOfCandidatePath(sourcePath) : '';
1720
+ for (const match of text.matchAll(/(?:href|src)=["']([^"']+\.(?:html|tsx|ts|jsx|js|md|json))["']/gi)) {
1721
+ const target = match[1] || '';
1722
+ const resolved = baseDir ? this.joinCandidatePath(baseDir, target) : target;
1723
+ add(resolved, `linked from ${sourcePath || 'discovery result'} via ${target}`);
1724
+ }
1725
+ for (const match of text.matchAll(/\b([\w./-]+\.(?:html|tsx|ts|jsx|js|md|json))\b/g)) {
1726
+ const target = match[1] || '';
1727
+ const resolved = sourcePath && !target.includes('/') ? this.joinCandidatePath(baseDir, target) : this.normalizeCandidatePath(target);
1728
+ add(resolved, `mentioned in ${sourcePath || 'discovery result'}`);
1729
+ }
1730
+ return candidates;
1731
+ }
1732
+ parseSearchRepoResultForCandidates(result) {
1733
+ const candidates = [];
1734
+ for (const line of result.split('\n')) {
1735
+ const contentMatch = line.match(/^\[content\]\s+([^:]+):\d+:\s+(.*)$/);
1736
+ if (contentMatch) {
1737
+ const sourcePath = this.normalizeCandidatePath(contentMatch[1] || '');
1738
+ const content = contentMatch[2] || '';
1739
+ for (const candidate of this.extractCandidatePathsFromText(content, sourcePath)) {
1740
+ if (!candidates.some((existing) => existing.path === candidate.path)) {
1741
+ candidates.push(candidate);
1742
+ }
1743
+ }
1744
+ continue;
1745
+ }
1746
+ const pathMatch = line.match(/^\[path\]\s+(.+)$/);
1747
+ if (pathMatch) {
1748
+ const sourcePath = this.normalizeCandidatePath(pathMatch[1] || '');
1749
+ if (/\.(html|tsx|ts|jsx|js|md|json)$/i.test(sourcePath)) {
1750
+ candidates.push({ path: sourcePath, reason: 'matched repository path search result' });
1751
+ }
1752
+ }
1753
+ }
1754
+ return candidates;
1755
+ }
1756
+ extractBestCandidateFromBootstrapContext(bootstrapContext) {
1757
+ if (!bootstrapContext)
1758
+ return undefined;
1759
+ const candidates = this.parseSearchRepoResultForCandidates(bootstrapContext);
1760
+ if (candidates.length === 0)
1761
+ return undefined;
1762
+ return candidates.sort((a, b) => this.scoreCandidatePath(b.path) - this.scoreCandidatePath(a.path))[0];
1763
+ }
1764
+ sanitizeResumeState(resumeState, taskName) {
1765
+ if (!resumeState)
1766
+ return undefined;
1767
+ const planPath = typeof resumeState.planPath === 'string' && resumeState.planPath.trim()
1768
+ ? this.normalizeCandidatePath(resumeState.planPath)
1769
+ : this.getDefaultPlanPath(taskName);
1770
+ const candidatePaths = this.dedupeNormalizedCandidatePaths(resumeState.candidatePaths);
1771
+ const recentReadPaths = this.dedupeNormalizedCandidatePaths(resumeState.recentReadPaths);
1772
+ const normalizedBestCandidatePath = typeof resumeState.bestCandidatePath === 'string' && resumeState.bestCandidatePath.trim()
1773
+ ? this.normalizeCandidatePath(resumeState.bestCandidatePath)
1774
+ : undefined;
1775
+ const bestCandidatePath = normalizedBestCandidatePath && !this.isMarathonArtifactPath(normalizedBestCandidatePath)
1776
+ ? normalizedBestCandidatePath
1777
+ : [...candidatePaths, ...recentReadPaths].sort((left, right) => this.scoreCandidatePath(right) - this.scoreCandidatePath(left))[0];
1778
+ const workflowPhase = resumeState.planWritten &&
1779
+ (!resumeState.workflowPhase ||
1780
+ resumeState.workflowPhase === 'research' ||
1781
+ resumeState.workflowPhase === 'planning')
1782
+ ? 'execution'
1783
+ : resumeState.workflowPhase;
1784
+ return {
1785
+ ...resumeState,
1786
+ workflowPhase,
1787
+ planPath,
1788
+ planWritten: Boolean(resumeState.planWritten),
1789
+ bestCandidatePath,
1790
+ bestCandidateReason: bestCandidatePath ? resumeState.bestCandidateReason : undefined,
1791
+ candidatePaths,
1792
+ recentReadPaths,
1793
+ recentActionKeys: Array.from(new Set(resumeState.recentActionKeys || [])).slice(-20),
1794
+ bestCandidateNeedsVerification: Boolean(resumeState.bestCandidateNeedsVerification),
1795
+ bestCandidateVerified: Boolean(resumeState.bestCandidateVerified),
1796
+ ...(resumeState.verificationRequired !== undefined
1797
+ ? { verificationRequired: resumeState.verificationRequired }
1798
+ : {}),
1799
+ lastVerificationPassed: Boolean(resumeState.lastVerificationPassed),
1800
+ };
1801
+ }
1802
+ buildPhaseInstructions(state) {
1803
+ const phase = state.workflowPhase || 'research';
1804
+ const planPath = state.planPath || this.getDefaultPlanPath(state.taskName);
1805
+ if (phase === 'planning') {
1806
+ return [
1807
+ '--- Workflow Phase: Planning ---',
1808
+ 'Research is complete. Your current job is to write the implementation plan before any product-file edits.',
1809
+ `Write the plan markdown to exactly: ${planPath}`,
1810
+ 'Do NOT edit the target product file yet.',
1811
+ 'The plan should summarize UX findings, explain why the current best candidate is the right file, and list concrete execution steps.',
1812
+ 'The plan must include a "Preserve existing functionality" section that lists current behaviors, linked files, integrations, and constraints that must keep working.',
1813
+ 'The plan must include a "Verification steps" section listing the concrete checks you will run before TASK_COMPLETE.',
1814
+ 'If the plan already exists, update that same plan file instead of creating a different one.',
1815
+ ].join('\n');
1816
+ }
1817
+ if (phase === 'execution') {
1818
+ return [
1819
+ '--- Workflow Phase: Execution ---',
1820
+ `The plan should already exist at: ${planPath}`,
1821
+ ...(state.bestCandidatePath ? [`Primary target file: ${state.bestCandidatePath}`] : []),
1822
+ 'Execute the plan by editing the target files.',
1823
+ 'Before ending each turn, update the markdown plan with progress against the steps you completed.',
1824
+ 'Modify the existing implementation incrementally. Do not replace the whole file unless the user explicitly asked for a rewrite.',
1825
+ 'Preserve existing functionality, handlers, imports, routes, configuration, and data flow unless the plan explicitly calls for changing them.',
1826
+ 'Before TASK_COMPLETE, run a verification command that matches the repo, such as lint, tests, build, or typecheck.',
1827
+ 'Avoid broad repo discovery unless the current candidate is clearly wrong.',
1828
+ ].join('\n');
1829
+ }
1830
+ return [
1831
+ '--- Workflow Phase: Research ---',
1832
+ 'Your current job is to inspect the repo, identify the correct existing target file, and gather enough evidence for a plan.',
1833
+ 'Identify related supporting files and current behaviors that must be preserved before planning.',
1834
+ 'Do NOT edit the target product file yet.',
1835
+ `When research is complete, the system will advance you to planning and require a plan at: ${planPath}`,
1836
+ ].join('\n');
1837
+ }
1838
+ updateWorkflowPhase(state, sessionTrace) {
1839
+ if (!state.workflowPhase)
1840
+ state.workflowPhase = 'research';
1841
+ if (!state.planPath)
1842
+ state.planPath = this.getDefaultPlanPath(state.taskName);
1843
+ state.phaseTransitionSummary = undefined;
1844
+ const transitionSummaries = [];
1845
+ let phaseUpdated = true;
1846
+ while (phaseUpdated) {
1847
+ phaseUpdated = false;
1848
+ if (state.workflowPhase === 'research' && this.hasSufficientResearchEvidence(state)) {
1849
+ state.workflowPhase = 'planning';
1850
+ transitionSummaries.push([
1851
+ 'Automatic phase transition: research -> planning.',
1852
+ `Best candidate confirmed: ${state.bestCandidatePath}`,
1853
+ `Next step: write the plan markdown to ${state.planPath} before editing the product file.`,
1854
+ ].join('\n'));
1855
+ phaseUpdated = true;
1856
+ continue;
1857
+ }
1858
+ if (state.workflowPhase === 'planning' && (sessionTrace.planWritten || state.planWritten)) {
1859
+ state.planWritten = true;
1860
+ state.workflowPhase = 'execution';
1861
+ transitionSummaries.push([
1862
+ 'Automatic phase transition: planning -> execution.',
1863
+ `Plan path: ${state.planPath}`,
1864
+ ...(state.bestCandidatePath ? [`Execute against: ${state.bestCandidatePath}`] : []),
1865
+ 'Next step: edit the target file(s) and update the plan with progress each turn.',
1866
+ ].join('\n'));
1867
+ phaseUpdated = true;
1868
+ }
1869
+ }
1870
+ if (state.status === 'complete') {
1871
+ state.workflowPhase = 'complete';
1872
+ }
1873
+ if (transitionSummaries.length > 0) {
1874
+ state.phaseTransitionSummary = transitionSummaries.join('\n\n');
1875
+ }
1876
+ }
1877
+ wrapLocalToolsForTrace(localTools, trace, state) {
1878
+ if (!localTools)
1879
+ return undefined;
1880
+ const wrapped = {};
1881
+ for (const [toolName, toolDef] of Object.entries(localTools)) {
1882
+ wrapped[toolName] = {
1883
+ ...toolDef,
1884
+ execute: async (args) => {
1885
+ const actionKey = `${toolName}:${String(args.path || args.query || args.pattern || '.').slice(0, 120)}`;
1886
+ trace.actionKeys.push(actionKey);
1887
+ if (trace.actionKeys.length > 10) {
1888
+ trace.actionKeys = trace.actionKeys.slice(-10);
1889
+ }
1890
+ const normalizedPathArg = typeof args.path === 'string' && args.path.trim()
1891
+ ? this.normalizeCandidatePath(String(args.path))
1892
+ : undefined;
1893
+ const normalizedPlanPath = state.planPath
1894
+ ? this.normalizeCandidatePath(state.planPath)
1895
+ : undefined;
1896
+ const normalizedBestCandidatePath = state.bestCandidatePath
1897
+ ? this.normalizeCandidatePath(state.bestCandidatePath)
1898
+ : undefined;
1899
+ const allowedWriteTargets = new Set([
1900
+ normalizedPlanPath,
1901
+ normalizedBestCandidatePath,
1902
+ ...(state.recentReadPaths || []).map((readPath) => this.normalizeCandidatePath(readPath)),
1903
+ ...trace.readPaths.map((readPath) => this.normalizeCandidatePath(readPath)),
1904
+ ].filter((value) => Boolean(value)));
1905
+ const pathArg = typeof args.path === 'string' && args.path.trim() ? ` path=${String(args.path)}` : '';
1906
+ const queryArg = typeof args.query === 'string' && args.query.trim() ? ` query="${String(args.query)}"` : '';
1907
+ const patternArg = typeof args.pattern === 'string' && args.pattern.trim()
1908
+ ? ` pattern="${String(args.pattern)}"`
1909
+ : '';
1910
+ const isWriteLikeTool = toolName === 'write_file' || toolName === 'restore_file_checkpoint';
1911
+ const isVerificationTool = toolName === 'run_check';
1912
+ if (state.workflowPhase === 'execution' &&
1913
+ normalizedBestCandidatePath &&
1914
+ this.isDiscoveryToolName(toolName) &&
1915
+ !trace.bestCandidateReadFailed) {
1916
+ const blockedMessage = [
1917
+ `Blocked by marathon execution guard: ${toolName} is disabled during execution.`,
1918
+ `Read or edit "${normalizedBestCandidatePath}" instead.`,
1919
+ 'Broad discovery is only re-enabled if a read of the current target file fails.',
1920
+ ].join(' ');
1921
+ this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> ${blockedMessage}`);
1922
+ return blockedMessage;
1923
+ }
1924
+ if (isWriteLikeTool) {
1925
+ trace.attemptedWrite = true;
1926
+ if (state.workflowPhase === 'planning' &&
1927
+ normalizedPathArg &&
1928
+ normalizedPlanPath &&
1929
+ normalizedPathArg !== normalizedPlanPath) {
1930
+ const blockedMessage = [
1931
+ `Blocked by marathon planning guard: ${toolName} must target the exact plan path during planning.`,
1932
+ `Write the plan to "${normalizedPlanPath}" before editing any product files.`,
1933
+ ].join(' ');
1934
+ this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> ${blockedMessage}`);
1935
+ return blockedMessage;
1936
+ }
1937
+ if (state.workflowPhase === 'execution' &&
1938
+ normalizedPathArg &&
1939
+ normalizedPlanPath &&
1940
+ normalizedBestCandidatePath &&
1941
+ normalizedPathArg === normalizedPlanPath &&
1942
+ !trace.executionFileWritten) {
1943
+ const blockedMessage = [
1944
+ `Blocked by marathon execution guard: ${toolName} cannot update the plan file before any real repo-file edit in this execution turn.`,
1945
+ `Edit "${normalizedBestCandidatePath}" or another previously discovered repo file first.`,
1946
+ `After that, you may update "${normalizedPlanPath}" with progress.`,
1947
+ ].join(' ');
1948
+ this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> ${blockedMessage}`);
1949
+ return blockedMessage;
1950
+ }
1951
+ if (state.workflowPhase === 'execution' &&
1952
+ normalizedPathArg &&
1953
+ normalizedPathArg !== normalizedPlanPath &&
1954
+ !allowedWriteTargets.has(normalizedPathArg)) {
1955
+ const blockedMessage = [
1956
+ `Blocked by marathon execution guard: ${toolName} is limited to the confirmed target, the plan file, or files already discovered/read for this task.`,
1957
+ `Do not create scratch files like "${normalizedPathArg}".`,
1958
+ normalizedBestCandidatePath
1959
+ ? `Edit "${normalizedBestCandidatePath}" or another previously discovered repo file instead.`
1960
+ : 'Read the current target file before writing.',
1961
+ ].join(' ');
1962
+ this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> ${blockedMessage}`);
1963
+ return blockedMessage;
1964
+ }
1965
+ }
1966
+ if (this.isDiscoveryToolName(toolName)) {
1967
+ trace.discoveryPerformed = true;
1968
+ }
1969
+ if (toolName === 'read_file') {
1970
+ trace.readFiles = true;
1971
+ if (normalizedPathArg) {
1972
+ const normalizedReadPath = normalizedPathArg;
1973
+ trace.readPaths.push(normalizedReadPath);
1974
+ if (trace.readPaths.length > 12) {
1975
+ trace.readPaths = trace.readPaths.slice(-12);
1976
+ }
1977
+ this.addCandidateToTrace(trace, normalizedReadPath, 'explicitly read by agent');
1978
+ }
1979
+ }
1980
+ let result;
1981
+ try {
1982
+ result = await toolDef.execute(args);
1983
+ }
1984
+ catch (error) {
1985
+ if (toolName === 'read_file' &&
1986
+ normalizedPathArg &&
1987
+ normalizedBestCandidatePath &&
1988
+ normalizedPathArg === normalizedBestCandidatePath) {
1989
+ trace.bestCandidateReadFailed = true;
1990
+ }
1991
+ this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg} -> error: ${error instanceof Error ? error.message : String(error)}`);
1992
+ throw error;
1993
+ }
1994
+ if (isWriteLikeTool && normalizedPathArg) {
1995
+ trace.wroteFiles = true;
1996
+ if (normalizedPlanPath && normalizedPathArg === normalizedPlanPath) {
1997
+ trace.planWritten = true;
1998
+ }
1999
+ else if (state.workflowPhase === 'execution') {
2000
+ trace.executionFileWritten = true;
2001
+ trace.verificationPassed = false;
2002
+ if (normalizedBestCandidatePath &&
2003
+ normalizedPathArg === normalizedBestCandidatePath) {
2004
+ trace.bestCandidateWritten = true;
2005
+ }
2006
+ }
2007
+ }
2008
+ const verificationResult = isVerificationTool
2009
+ ? this.parseVerificationResult(result)
2010
+ : undefined;
2011
+ if (verificationResult) {
2012
+ trace.verificationAttempted = true;
2013
+ trace.verificationPassed = verificationResult.success;
2014
+ }
2015
+ const summarizedResult = verificationResult
2016
+ ? [
2017
+ verificationResult.command || 'verification',
2018
+ verificationResult.success ? 'passed' : 'failed',
2019
+ verificationResult.error || verificationResult.output,
2020
+ ]
2021
+ .filter(Boolean)
2022
+ .join(' | ')
2023
+ .slice(0, 240)
2024
+ : this.summarizeTextBlockForTrace(result);
2025
+ const resultSuffix = summarizedResult ? ` -> ${summarizedResult}` : '';
2026
+ this.pushToolTraceEntry(trace, `${toolName}${pathArg}${queryArg}${patternArg}${resultSuffix}`);
2027
+ const textResult = typeof result === 'string' ? result : '';
2028
+ if (toolName === 'read_file' &&
2029
+ normalizedPathArg &&
2030
+ normalizedBestCandidatePath &&
2031
+ normalizedPathArg === normalizedBestCandidatePath &&
2032
+ (trace.bestCandidateWritten || state.bestCandidateNeedsVerification)) {
2033
+ trace.bestCandidateVerified = true;
2034
+ }
2035
+ if (toolName === 'search_repo' && textResult) {
2036
+ for (const candidate of this.parseSearchRepoResultForCandidates(textResult)) {
2037
+ this.addCandidateToTrace(trace, candidate.path, candidate.reason);
2038
+ }
2039
+ }
2040
+ else if (toolName === 'glob_files' && textResult) {
2041
+ for (const line of textResult.split('\n')) {
2042
+ const candidatePath = line.trim();
2043
+ if (/\.(html|tsx|ts|jsx|js|md|json)$/i.test(candidatePath)) {
2044
+ this.addCandidateToTrace(trace, candidatePath, 'matched glob search');
2045
+ }
2046
+ }
2047
+ }
2048
+ else if (toolName === 'list_directory' && textResult && typeof args.path === 'string') {
2049
+ const baseDir = this.normalizeCandidatePath(String(args.path));
2050
+ for (const line of textResult.split('\n')) {
2051
+ const candidateName = line.trim();
2052
+ if (/\.(html|tsx|ts|jsx|js|md|json)$/i.test(candidateName)) {
2053
+ this.addCandidateToTrace(trace, this.joinCandidatePath(baseDir, candidateName), `listed in directory ${baseDir || '.'}`);
2054
+ }
2055
+ }
2056
+ }
2057
+ else if (toolName === 'read_file' && textResult && typeof args.path === 'string') {
2058
+ const sourcePath = this.normalizeCandidatePath(String(args.path));
2059
+ for (const candidate of this.extractCandidatePathsFromText(textResult, sourcePath)) {
2060
+ this.addCandidateToTrace(trace, candidate.path, candidate.reason);
2061
+ }
2062
+ }
2063
+ return result;
2064
+ },
2065
+ };
2066
+ }
2067
+ return wrapped;
2068
+ }
2069
+ createTraceCallbacks(callbacks, trace) {
2070
+ if (!callbacks) {
2071
+ return {
2072
+ onToolStart: (event) => {
2073
+ trace.actionKeys.push(`server:${event.toolName}`);
2074
+ if (trace.actionKeys.length > 10)
2075
+ trace.actionKeys = trace.actionKeys.slice(-10);
2076
+ if (event.toolName === 'write_file')
2077
+ trace.attemptedWrite = true;
2078
+ this.pushToolTraceEntry(trace, `server-tool ${event.toolName} started`);
2079
+ },
2080
+ onToolComplete: (event) => {
2081
+ const resultSummary = this.summarizeTextBlockForTrace(event.result);
2082
+ this.pushToolTraceEntry(trace, `server-tool ${event.toolName} ${event.success ? 'completed' : 'failed'}${resultSummary ? ` -> ${resultSummary}` : ''}`);
2083
+ },
2084
+ };
2085
+ }
2086
+ return {
2087
+ ...callbacks,
2088
+ onToolStart: (event) => {
2089
+ trace.actionKeys.push(`server:${event.toolName}`);
2090
+ if (trace.actionKeys.length > 10)
2091
+ trace.actionKeys = trace.actionKeys.slice(-10);
2092
+ if (event.toolName === 'write_file')
2093
+ trace.attemptedWrite = true;
2094
+ this.pushToolTraceEntry(trace, `server-tool ${event.toolName} started`);
2095
+ callbacks.onToolStart?.(event);
2096
+ },
2097
+ onToolComplete: (event) => {
2098
+ const resultSummary = this.summarizeTextBlockForTrace(event.result);
2099
+ this.pushToolTraceEntry(trace, `server-tool ${event.toolName} ${event.success ? 'completed' : 'failed'}${resultSummary ? ` -> ${resultSummary}` : ''}`);
2100
+ callbacks.onToolComplete?.(event);
2101
+ },
2102
+ };
2103
+ }
2104
+ buildToolTraceSummary(trace) {
2105
+ if (trace.entries.length === 0)
2106
+ return '';
2107
+ const lines = trace.entries.slice(-6).map((entry) => `- ${entry}`);
2108
+ const flags = [];
2109
+ if (trace.discoveryPerformed)
2110
+ flags.push('repo discovery used');
2111
+ if (trace.readFiles)
2112
+ flags.push('candidate files read');
2113
+ if (trace.wroteFiles)
2114
+ flags.push('files written');
2115
+ if (trace.localToolLoopGuardTriggered)
2116
+ flags.push('local-tool loop guard forced end_turn');
2117
+ if (trace.bestCandidateVerified)
2118
+ flags.push('target re-read after write');
2119
+ if (trace.verificationPassed)
2120
+ flags.push('verification passed');
2121
+ else if (trace.verificationAttempted)
2122
+ flags.push('verification failed');
2123
+ return [
2124
+ 'Session working memory:',
2125
+ ...(flags.length > 0 ? [`- ${flags.join('; ')}`] : []),
2126
+ ...(trace.bestCandidatePath
2127
+ ? [`- best candidate: ${trace.bestCandidatePath}${trace.bestCandidateReason ? ` (${trace.bestCandidateReason})` : ''}`]
2128
+ : []),
2129
+ ...lines,
2130
+ ]
2131
+ .join('\n')
2132
+ .slice(0, 1200);
2133
+ }
2134
+ extractBootstrapQueries(message) {
2135
+ const queries = [];
2136
+ const noisyTerms = new Set([
2137
+ 'a',
2138
+ 'against',
2139
+ 'all',
2140
+ 'analyze',
2141
+ 'and',
2142
+ 'as',
2143
+ 'at',
2144
+ 'based',
2145
+ 'before',
2146
+ 'best',
2147
+ 'by',
2148
+ 'codebase',
2149
+ 'do',
2150
+ 'exactly',
2151
+ 'files',
2152
+ 'first',
2153
+ 'following',
2154
+ 'goal',
2155
+ 'go',
2156
+ 'how',
2157
+ 'in',
2158
+ 'is',
2159
+ 'it',
2160
+ 'its',
2161
+ 'make',
2162
+ 'markdown',
2163
+ 'most',
2164
+ 'no',
2165
+ 'of',
2166
+ 'on',
2167
+ 'order',
2168
+ 'plan',
2169
+ 'progress',
2170
+ 'repo',
2171
+ 'research',
2172
+ 'right',
2173
+ 'save',
2174
+ 'session',
2175
+ 'solve',
2176
+ 'task',
2177
+ 'that',
2178
+ 'the',
2179
+ 'then',
2180
+ 'through',
2181
+ 'to',
2182
+ 'turn',
2183
+ 'update',
2184
+ 'user',
2185
+ 'ux',
2186
+ 'web',
2187
+ 'when',
2188
+ 'with',
2189
+ 'work',
2190
+ 'your',
2191
+ ]);
2192
+ const push = (candidate) => {
2193
+ const normalized = candidate
2194
+ .replace(/^[^a-z0-9/._-]+|[^a-z0-9/._ -]+$/gi, '')
2195
+ .replace(/\s+/g, ' ')
2196
+ .trim();
2197
+ if (!normalized || normalized.length < 3 || normalized.length > 60)
2198
+ return;
2199
+ const words = normalized.toLowerCase().split(' ').filter(Boolean);
2200
+ if (words.length > 4)
2201
+ return;
2202
+ if (words.every((word) => noisyTerms.has(word)))
2203
+ return;
2204
+ if (words.length > 1 && noisyTerms.has(words[words.length - 1] || ''))
2205
+ return;
2206
+ if (!queries.some((existing) => existing.toLowerCase() === normalized.toLowerCase())) {
2207
+ queries.push(normalized);
2208
+ }
2209
+ };
2210
+ const lowerMessage = message.toLowerCase();
2211
+ const phraseHints = [
2212
+ 'agent editor',
2213
+ 'theme.html',
2214
+ '/theme.html',
2215
+ 'style it visually',
2216
+ ];
2217
+ for (const hint of phraseHints) {
2218
+ if (lowerMessage.includes(hint.toLowerCase()))
2219
+ push(hint);
2220
+ }
2221
+ for (const match of message.matchAll(/"([^"]{3,60})"/g)) {
2222
+ push(match[1] || '');
2223
+ }
2224
+ for (const match of message.matchAll(/(?:go through|review|inspect|edit|improve|update|fix|modify)\s+(?:the\s+)?([a-z0-9][a-z0-9/_-]*(?:\s+[a-z0-9][a-z0-9/_-]*){0,2})/gi)) {
2225
+ push(match[1] || '');
2226
+ }
2227
+ for (const match of message.matchAll(/([a-z0-9][a-z0-9/_-]*(?:\s+[a-z0-9][a-z0-9/_-]*){0,2})\s+(?:page|editor|screen|view|route|component)\b/gi)) {
2228
+ push(match[0] || '');
2229
+ push(match[1] || '');
2230
+ }
2231
+ for (const match of message.matchAll(/\b[\w./-]+\.(?:html|tsx|ts|jsx|js|md|json)\b/g)) {
2232
+ push(match[0] || '');
2233
+ }
2234
+ for (const match of message.matchAll(/\/[A-Za-z0-9._/-]+/g)) {
2235
+ push(match[0] || '');
2236
+ }
2237
+ for (const match of message.matchAll(/\b([a-z0-9]+(?:\s+[a-z0-9]+){1,2})\b/gi)) {
2238
+ const phrase = (match[1] || '').toLowerCase();
2239
+ const words = phrase.split(' ');
2240
+ if (words.some((word) => ['editor', 'page', 'screen', 'view', 'route', 'component'].includes(word))) {
2241
+ push(match[1] || '');
2242
+ }
2243
+ }
2244
+ return queries.slice(0, 4);
2245
+ }
2246
+ async generateBootstrapDiscoveryContext(message, localTools) {
2247
+ if (!localTools)
2248
+ return undefined;
2249
+ const searchTool = localTools.search_repo;
2250
+ const globTool = localTools.glob_files;
2251
+ if (!searchTool && !globTool)
2252
+ return undefined;
2253
+ const queries = this.extractBootstrapQueries(message);
2254
+ if (queries.length === 0)
2255
+ return undefined;
2256
+ const lines = [];
2257
+ for (const query of queries) {
2258
+ if (lines.length >= 6)
2259
+ break;
2260
+ if (searchTool) {
2261
+ try {
2262
+ const result = await searchTool.execute({ query, path: '.', maxResults: 5 });
2263
+ const summary = this.summarizeTextBlockForTrace(result, 3);
2264
+ if (summary && !summary.startsWith('No matches found')) {
2265
+ lines.push(`search_repo "${query}": ${summary}`);
2266
+ continue;
2267
+ }
2268
+ }
2269
+ catch {
2270
+ // Best effort bootstrap only
2271
+ }
2272
+ }
2273
+ if (globTool && /\./.test(query)) {
2274
+ try {
2275
+ const result = await globTool.execute({ pattern: `**/${query}`, path: '.', maxResults: 5 });
2276
+ const summary = this.summarizeTextBlockForTrace(result, 3);
2277
+ if (summary && !summary.startsWith('No files matched')) {
2278
+ lines.push(`glob_files "**/${query}": ${summary}`);
2279
+ }
2280
+ }
2281
+ catch {
2282
+ // Best effort bootstrap only
2283
+ }
2284
+ }
2285
+ }
2286
+ if (lines.length === 0)
2287
+ return undefined;
2288
+ return ['Bootstrap repo hints:', ...lines].join('\n').slice(0, 1500);
2289
+ }
2290
+ buildStuckTurnRecoveryMessage(state) {
2291
+ const recent = state.sessions.slice(-2);
2292
+ const normalizedPlanPath = typeof state.planPath === 'string' && state.planPath.trim()
2293
+ ? this.normalizeCandidatePath(state.planPath)
2294
+ : undefined;
2295
+ const recentPlanOnlyLoop = Boolean(normalizedPlanPath) &&
2296
+ recent.length === 2 &&
2297
+ recent.every((session) => {
2298
+ const specificActionKeys = (session.actionKeys || [])
2299
+ .map((actionKey) => actionKey.replace(/\\/g, '/'))
2300
+ .filter((actionKey) => !actionKey.startsWith('server:'));
2301
+ return (specificActionKeys.length > 0 &&
2302
+ specificActionKeys.every((actionKey) => actionKey.includes(normalizedPlanPath)));
2303
+ });
2304
+ if (recent.length < 2 ||
2305
+ !(recent.every((session) => session.hadTextOutput === false && session.wroteFiles === false) ||
2306
+ recentPlanOnlyLoop)) {
2307
+ return undefined;
2308
+ }
2309
+ const repeatedSameActions = recent.length === 2 &&
2310
+ recent.every((session) => (session.actionKeys?.length || 0) > 0) &&
2311
+ JSON.stringify(recent[0]?.actionKeys || []) === JSON.stringify(recent[1]?.actionKeys || []);
2312
+ if (state.workflowPhase === 'planning' && state.planPath) {
2313
+ return [
2314
+ 'Recovery instruction:',
2315
+ 'Research is already complete. Stop rediscovering and write the plan now.',
2316
+ `Your next action must be write_file to "${state.planPath}".`,
2317
+ 'The plan must summarize UX findings, include a "Preserve existing functionality" section, name the best candidate file, and list execution steps.',
2318
+ 'Do not edit the product file until the plan exists.',
2319
+ ...(repeatedSameActions
2320
+ ? ['You are repeating the same discovery actions; break the loop by writing the plan file now.']
2321
+ : []),
2322
+ ].join('\n');
2323
+ }
2324
+ if (state.workflowPhase === 'execution' && state.bestCandidatePath) {
2325
+ const normalizedBestCandidatePath = this.normalizeCandidatePath(state.bestCandidatePath);
2326
+ const recentlyReadBestCandidate = (state.recentReadPaths || [])
2327
+ .map((readPath) => this.normalizeCandidatePath(readPath))
2328
+ .includes(normalizedBestCandidatePath);
2329
+ return [
2330
+ 'Recovery instruction:',
2331
+ 'Planning should already be complete. Stop rediscovering and execute the plan.',
2332
+ recentlyReadBestCandidate
2333
+ ? `Your next action must be write_file on "${state.bestCandidatePath}".`
2334
+ : `Your next action must be read_file on "${state.bestCandidatePath}" so you can edit it next.`,
2335
+ ...(state.planPath
2336
+ ? [`Do not write "${state.planPath}" again until after you complete a real repo-file edit in this session.`]
2337
+ : []),
2338
+ 'After editing, run a verification command with run_check before TASK_COMPLETE.',
2339
+ 'Do not call broad discovery tools again unless the target file is missing or invalid.',
2340
+ ...(repeatedSameActions
2341
+ ? ['You are repeating the same discovery actions; break the loop by editing the target file now.']
2342
+ : []),
2343
+ ].join('\n');
2344
+ }
2345
+ if (state.bestCandidatePath) {
2346
+ const recentlyReadBestCandidate = (state.recentReadPaths || []).includes(state.bestCandidatePath);
2347
+ return [
2348
+ 'Recovery instruction:',
2349
+ 'Your previous sessions produced no final text and did not complete a useful edit.',
2350
+ `You already have a best candidate file: "${state.bestCandidatePath}".`,
2351
+ ...(state.bestCandidateReason ? [`Reason: ${state.bestCandidateReason}`] : []),
2352
+ recentlyReadBestCandidate
2353
+ ? `Do not keep searching. Your next action must be to edit "${state.bestCandidatePath}" with write_file, or explain why that file is not the correct target.`
2354
+ : `Do not keep searching. Your next action must be read_file on "${state.bestCandidatePath}".`,
2355
+ 'Do not call list_directory, tree_directory, glob_files, or search_repo again unless that candidate path is missing or clearly wrong.',
2356
+ ...(repeatedSameActions
2357
+ ? ['You are repeating the same discovery actions; break the loop by acting on the best candidate now.']
2358
+ : []),
2359
+ ].join('\n');
2360
+ }
2361
+ const queries = this.extractBootstrapQueries(state.originalMessage || '');
2362
+ const queryHint = queries.length > 0
2363
+ ? `Start with these exact repo searches: ${queries.map((query) => `"${query}"`).join(', ')}.`
2364
+ : 'Start with a concrete repo search using the key nouns from the original task.';
2365
+ return [
2366
+ 'Recovery instruction:',
2367
+ 'Your previous sessions produced no final text and did not edit files.',
2368
+ queryHint,
2369
+ 'Then read the most relevant existing file you find before any write_file call.',
2370
+ 'If a route, link, or page already exists, edit that existing file instead of creating a new one.',
2371
+ ...(repeatedSameActions
2372
+ ? ['You are repeating the same discovery actions; pick one candidate and act on it.']
2373
+ : []),
2374
+ ].join('\n');
2375
+ }
1301
2376
  /**
1302
2377
  * Run a long-task agent across multiple sessions with automatic state management.
1303
2378
  *
@@ -1331,12 +2406,21 @@ class AgentsEndpoint {
1331
2406
  : options.trackProgress
1332
2407
  ? `${agent.name} task`
1333
2408
  : '';
2409
+ const resolvedTaskName = taskName || `${agent.name} task`;
2410
+ const seededResumeState = this.sanitizeResumeState(options.resumeState, resolvedTaskName);
1334
2411
  // Initialize state
1335
2412
  const state = {
1336
2413
  agentId: id,
1337
2414
  agentName: agent.name,
1338
- taskName: taskName || `${agent.name} task`,
2415
+ taskName: resolvedTaskName,
1339
2416
  status: 'running',
2417
+ workflowPhase: seededResumeState?.workflowPhase || 'research',
2418
+ planPath: seededResumeState?.planPath || this.getDefaultPlanPath(resolvedTaskName),
2419
+ planWritten: seededResumeState?.planWritten || false,
2420
+ bestCandidateNeedsVerification: seededResumeState?.bestCandidateNeedsVerification || false,
2421
+ bestCandidateVerified: seededResumeState?.bestCandidateVerified || false,
2422
+ verificationRequired: seededResumeState?.verificationRequired ?? Boolean(options.localTools?.run_check),
2423
+ lastVerificationPassed: seededResumeState?.lastVerificationPassed || false,
1340
2424
  sessionCount: 0,
1341
2425
  totalCost: 0,
1342
2426
  lastOutput: '',
@@ -1344,13 +2428,39 @@ class AgentsEndpoint {
1344
2428
  sessions: [],
1345
2429
  startedAt: new Date().toISOString(),
1346
2430
  updatedAt: new Date().toISOString(),
2431
+ ...(seededResumeState?.originalMessage ? { originalMessage: seededResumeState.originalMessage } : {}),
2432
+ ...(seededResumeState?.bootstrapContext ? { bootstrapContext: seededResumeState.bootstrapContext } : {}),
2433
+ ...(seededResumeState?.bestCandidatePath
2434
+ ? {
2435
+ bestCandidatePath: seededResumeState.bestCandidatePath,
2436
+ bestCandidateReason: seededResumeState.bestCandidateReason,
2437
+ }
2438
+ : {}),
2439
+ ...(seededResumeState?.candidatePaths ? { candidatePaths: seededResumeState.candidatePaths } : {}),
2440
+ ...(seededResumeState?.recentReadPaths ? { recentReadPaths: seededResumeState.recentReadPaths } : {}),
2441
+ ...(seededResumeState?.recentActionKeys
2442
+ ? { recentActionKeys: seededResumeState.recentActionKeys }
2443
+ : {}),
1347
2444
  };
2445
+ this.updateWorkflowPhase(state, this.createEmptyToolTrace());
1348
2446
  // Track the record ID if we're syncing
1349
2447
  let recordId;
1350
2448
  // Extract local tool names for prompt injection
1351
2449
  const localToolNames = options.localTools ? Object.keys(options.localTools) : undefined;
2450
+ if (!options.previousMessages) {
2451
+ state.bootstrapContext = await this.generateBootstrapDiscoveryContext(options.message, options.localTools);
2452
+ const bootstrapCandidate = this.extractBestCandidateFromBootstrapContext(state.bootstrapContext);
2453
+ if (bootstrapCandidate) {
2454
+ state.bestCandidatePath = bootstrapCandidate.path;
2455
+ state.bestCandidateReason = bootstrapCandidate.reason;
2456
+ state.candidatePaths = [bootstrapCandidate.path];
2457
+ }
2458
+ }
1352
2459
  // Session loop
1353
2460
  for (let session = 0; session < maxSessions; session++) {
2461
+ const sessionTrace = this.createEmptyToolTrace();
2462
+ const sessionLocalTools = this.wrapLocalToolsForTrace(options.localTools, sessionTrace, state);
2463
+ const sessionCallbacks = this.createTraceCallbacks(options.streamCallbacks, sessionTrace);
1354
2464
  // Build continuation context for resumed runs (first session only)
1355
2465
  const continuationContext = session === 0 && options.previousMessages
1356
2466
  ? {
@@ -1367,10 +2477,16 @@ class AgentsEndpoint {
1367
2477
  const messages = this.buildSessionMessages(options.message, state, session, maxSessions, localToolNames, continuationContext);
1368
2478
  // Execute one session
1369
2479
  let sessionResult;
1370
- const sessionData = { messages, debugMode: options.debugMode, model: options.model };
2480
+ const sessionData = {
2481
+ messages,
2482
+ debugMode: options.debugMode,
2483
+ model: options.model,
2484
+ };
1371
2485
  if (useStream && options.localTools) {
1372
2486
  // Local tools require the pause/resume streaming loop
1373
- const completeEvent = await this.executeWithLocalTools(id, sessionData, options.localTools, options.streamCallbacks);
2487
+ const completeEvent = await this.executeWithLocalTools(id, sessionData, sessionLocalTools || options.localTools, sessionCallbacks, {
2488
+ onLocalToolResult: this.createLocalToolLoopGuard(state, sessionTrace),
2489
+ });
1374
2490
  if (!completeEvent) {
1375
2491
  throw new Error('Agent stream ended without a complete event');
1376
2492
  }
@@ -1384,7 +2500,7 @@ class AgentsEndpoint {
1384
2500
  };
1385
2501
  }
1386
2502
  else if (useStream && options.streamCallbacks) {
1387
- const completeEvent = await this.executeWithCallbacks(id, sessionData, options.streamCallbacks);
2503
+ const completeEvent = await this.executeWithCallbacks(id, sessionData, sessionCallbacks || options.streamCallbacks);
1388
2504
  if (!completeEvent) {
1389
2505
  throw new Error('Agent stream ended without a complete event');
1390
2506
  }
@@ -1400,11 +2516,17 @@ class AgentsEndpoint {
1400
2516
  else {
1401
2517
  sessionResult = await this.execute(id, sessionData);
1402
2518
  }
2519
+ const toolTraceSummary = this.buildToolTraceSummary(sessionTrace);
2520
+ const effectiveSessionOutput = this.buildEffectiveSessionOutput(sessionResult.result, toolTraceSummary);
1403
2521
  // Update state
1404
2522
  const sessionCost = sessionResult.totalCost;
1405
2523
  state.sessionCount = session + 1;
1406
2524
  state.totalCost += sessionCost;
1407
- state.lastOutput = sessionResult.result;
2525
+ state.lastOutput = effectiveSessionOutput;
2526
+ state.lastError =
2527
+ sessionResult.stopReason === 'error'
2528
+ ? sessionResult.error || 'Agent session ended with an error.'
2529
+ : undefined;
1408
2530
  state.lastStopReason = sessionResult.stopReason;
1409
2531
  state.updatedAt = new Date().toISOString();
1410
2532
  state.sessions.push({
@@ -1412,22 +2534,89 @@ class AgentsEndpoint {
1412
2534
  cost: sessionCost,
1413
2535
  iterations: sessionResult.iterations,
1414
2536
  stopReason: sessionResult.stopReason,
1415
- outputPreview: sessionResult.result.slice(0, 300),
2537
+ outputPreview: effectiveSessionOutput.slice(0, 300),
2538
+ toolTraceSummary: toolTraceSummary || undefined,
2539
+ discoveryPerformed: sessionTrace.discoveryPerformed,
2540
+ attemptedWrite: sessionTrace.attemptedWrite,
2541
+ wroteFiles: sessionTrace.wroteFiles,
2542
+ hadTextOutput: Boolean(sessionResult.result.trim()),
2543
+ verificationAttempted: sessionTrace.verificationAttempted,
2544
+ verificationPassed: sessionTrace.verificationPassed,
2545
+ bestCandidatePath: sessionTrace.bestCandidatePath || undefined,
2546
+ actionKeys: sessionTrace.actionKeys.slice(-5),
1416
2547
  completedAt: new Date().toISOString(),
1417
2548
  });
2549
+ if (sessionTrace.bestCandidatePath) {
2550
+ state.bestCandidatePath = sessionTrace.bestCandidatePath;
2551
+ state.bestCandidateReason = sessionTrace.bestCandidateReason;
2552
+ }
2553
+ if (sessionTrace.candidatePaths.length > 0) {
2554
+ state.candidatePaths = Array.from(new Set([...(state.candidatePaths || []), ...sessionTrace.candidatePaths])).slice(-20);
2555
+ }
2556
+ if (sessionTrace.readPaths.length > 0) {
2557
+ state.recentReadPaths = Array.from(new Set([...(state.recentReadPaths || []), ...sessionTrace.readPaths])).slice(-20);
2558
+ }
2559
+ if (sessionTrace.actionKeys.length > 0) {
2560
+ state.recentActionKeys = [...(state.recentActionKeys || []), ...sessionTrace.actionKeys].slice(-20);
2561
+ }
2562
+ if (sessionTrace.planWritten) {
2563
+ state.planWritten = true;
2564
+ }
2565
+ if (sessionTrace.executionFileWritten) {
2566
+ state.lastVerificationPassed = false;
2567
+ }
2568
+ if (sessionTrace.bestCandidateWritten) {
2569
+ state.bestCandidateNeedsVerification = true;
2570
+ state.bestCandidateVerified = false;
2571
+ }
2572
+ if (sessionTrace.bestCandidateVerified) {
2573
+ state.bestCandidateNeedsVerification = false;
2574
+ state.bestCandidateVerified = true;
2575
+ }
2576
+ if (sessionTrace.verificationAttempted) {
2577
+ state.lastVerificationPassed = sessionTrace.verificationPassed;
2578
+ }
1418
2579
  // Track cost by model
1419
2580
  const modelKey = options.model || 'default';
1420
2581
  if (!state.costByModel)
1421
2582
  state.costByModel = {};
1422
2583
  state.costByModel[modelKey] = (state.costByModel[modelKey] || 0) + sessionCost;
1423
- // Accumulate messages for future continuation
2584
+ this.updateWorkflowPhase(state, sessionTrace);
2585
+ const phaseTransitionSummary = state.phaseTransitionSummary;
2586
+ if (phaseTransitionSummary) {
2587
+ state.lastOutput = [phaseTransitionSummary, '', state.lastOutput].join('\n').trim();
2588
+ const latestSession = state.sessions[state.sessions.length - 1];
2589
+ if (latestSession) {
2590
+ latestSession.outputPreview = [phaseTransitionSummary, '', latestSession.outputPreview]
2591
+ .join('\n')
2592
+ .slice(0, 300);
2593
+ latestSession.toolTraceSummary = [phaseTransitionSummary, '', latestSession.toolTraceSummary || '']
2594
+ .join('\n')
2595
+ .trim()
2596
+ .slice(0, 1200);
2597
+ }
2598
+ }
2599
+ // Accumulate messages for future continuation.
2600
+ // When buildSessionMessages returns the full history + a new continuation
2601
+ // message, only the NEW messages at the end are appended — otherwise the
2602
+ // history would be re-pushed on every session and grow exponentially.
1424
2603
  if (!state.messages)
1425
2604
  state.messages = [];
1426
- state.messages.push(...messages);
1427
- // Also store the assistant's response as a message
1428
- if (sessionResult.result) {
1429
- state.messages.push({ role: 'assistant', content: sessionResult.result });
2605
+ if (state.messages.length > 0 && messages.length > state.messages.length) {
2606
+ // Continuation session: history was replayed, only append the new tail
2607
+ const newMessages = messages.slice(state.messages.length);
2608
+ state.messages.push(...newMessages);
2609
+ }
2610
+ else {
2611
+ // First session (or no prior history): all messages are new
2612
+ state.messages.push(...messages);
1430
2613
  }
2614
+ // Always store an assistant message so continuation sessions have full
2615
+ // conversation history. When the agent only made tool calls and produced
2616
+ // no text, fall back to a synthetic summary so the history stays coherent.
2617
+ const assistantContent = effectiveSessionOutput ||
2618
+ `[Session ${session + 1} completed (${sessionResult.stopReason}). No text output captured.]`;
2619
+ state.messages.push({ role: 'assistant', content: assistantContent });
1431
2620
  // Keep session log trimmed to last 50 entries
1432
2621
  if (state.sessions.length > 50) {
1433
2622
  state.sessions = state.sessions.slice(-50);
@@ -1437,12 +2626,12 @@ class AgentsEndpoint {
1437
2626
  state.status = 'complete';
1438
2627
  }
1439
2628
  else if (sessionResult.stopReason === 'error') {
1440
- state.status = 'complete';
2629
+ state.status = 'error';
1441
2630
  }
1442
2631
  else if (sessionResult.stopReason === 'max_cost') {
1443
2632
  state.status = 'budget_exceeded';
1444
2633
  }
1445
- else if (this.detectTaskCompletion(sessionResult.result)) {
2634
+ else if (this.canAcceptTaskCompletion(sessionResult.result, state, sessionTrace)) {
1446
2635
  // Client-side stop-phrase detection for non-loop agents returning 'end_turn'
1447
2636
  state.status = 'complete';
1448
2637
  }
@@ -1496,8 +2685,18 @@ class AgentsEndpoint {
1496
2685
  return [
1497
2686
  `Task: ${state.taskName}`,
1498
2687
  `Status: ${state.status}`,
2688
+ `Workflow phase: ${state.workflowPhase || 'research'}`,
1499
2689
  `Sessions completed: ${state.sessionCount}`,
1500
2690
  `Total cost: $${state.totalCost.toFixed(4)}`,
2691
+ ...(state.planPath ? [`Plan path: ${state.planPath}`] : []),
2692
+ ...(state.planWritten ? ['Plan written: yes'] : []),
2693
+ ...(state.bestCandidatePath
2694
+ ? [
2695
+ `Best candidate: ${state.bestCandidatePath}`,
2696
+ ...(state.bestCandidateReason ? [`Candidate reason: ${state.bestCandidateReason}`] : []),
2697
+ ]
2698
+ : []),
2699
+ ...(state.bootstrapContext ? ['', state.bootstrapContext] : []),
1501
2700
  '',
1502
2701
  'Session history:',
1503
2702
  sessionSummaries,
@@ -1512,17 +2711,63 @@ class AgentsEndpoint {
1512
2711
  */
1513
2712
  buildSessionMessages(originalMessage, state, sessionIndex, maxSessions, localToolNames, continuationContext) {
1514
2713
  // Build local tools guidance block when tools are available
2714
+ const phase = state.workflowPhase || 'research';
1515
2715
  const toolsBlock = localToolNames?.length
1516
2716
  ? [
1517
2717
  '',
1518
2718
  '--- Local Tools ---',
1519
2719
  `You have access to local filesystem tools (${localToolNames.join(', ')}) that execute directly on the user's machine.`,
1520
- 'Use these tools to create working, runnable files — not just code in your response.',
1521
- 'Prefer creating self-contained HTML files that the user can open in a web browser.',
1522
- 'For example, write a single .html file with inline CSS and JavaScript that demonstrates the result.',
2720
+ 'Use these tools to inspect the existing repository and make real file edits — not just code in your response.',
2721
+ ...(phase === 'research'
2722
+ ? [
2723
+ 'For repository modification tasks, before any write_file call you must perform at least one discovery action (search_repo, glob_files, or tree_directory).',
2724
+ 'If discovery finds a plausible existing file, you must read at least one candidate file before writing.',
2725
+ 'Before creating a new file, search the repo for existing relevant files, routes, links, components, or pages.',
2726
+ 'Prefer editing an existing file when one already implements or links to the feature you were asked to change.',
2727
+ 'Use search_repo, glob_files, and tree_directory to discover the right files before you call write_file.',
2728
+ 'Only create a new file when no suitable existing file exists, and make that decision intentionally.',
2729
+ ]
2730
+ : phase === 'planning'
2731
+ ? [
2732
+ `Research is already complete. Focus on writing or updating the plan at: ${state.planPath || this.getDefaultPlanPath(state.taskName)}.`,
2733
+ 'Do not restart broad repo discovery unless the saved best candidate is clearly invalid.',
2734
+ 'You may read the current target file or supporting source files if you need evidence for the plan.',
2735
+ 'Ground the plan in the existing implementation. Identify which current behaviors and linked files must be preserved.',
2736
+ 'List the exact verification commands you expect to run after editing, such as lint, typecheck, tests, or build.',
2737
+ ]
2738
+ : [
2739
+ ...(state.bestCandidatePath
2740
+ ? [
2741
+ `Execution-phase guard: broad discovery tools (search_repo, glob_files, tree_directory, list_directory) are locked while executing against "${state.bestCandidatePath}".`,
2742
+ ]
2743
+ : [
2744
+ 'Execution-phase guard: broad discovery tools are locked unless a read of the current target fails.',
2745
+ ]),
2746
+ 'Reading the markdown plan for status does not change the product target. Do not treat the plan file as the file to implement.',
2747
+ 'Do not write the plan file first in execution. Make a real repo-file edit before you update the plan with progress.',
2748
+ 'Do not create scratch or test files to probe the repo or tool behavior.',
2749
+ 'write_file automatically checkpoints original repo files before overwriting them. If an edit regresses behavior, use restore_file_checkpoint on that file.',
2750
+ 'Read the target file and edit it with write_file. Update the plan file with progress after completing real edits.',
2751
+ 'Before large edits, read any already discovered supporting source/style files that power the target so you preserve existing behavior.',
2752
+ 'Prefer minimal diffs over rewrites. If you cannot verify related behavior, stop and record what is still unverified instead of rewriting blindly.',
2753
+ 'Use run_check for real verification before TASK_COMPLETE. Good examples: "pnpm lint", "pnpm exec tsc --noEmit", "pnpm test", or a focused vitest/pytest command.',
2754
+ 'Broad discovery is only allowed if a read of the current target file fails.',
2755
+ ]),
1523
2756
  'Always use write_file to save your output so the user can run it immediately.',
1524
2757
  ].join('\n')
1525
2758
  : '';
2759
+ const bootstrapBlock = state.bootstrapContext
2760
+ ? ['', '--- Initial Repository Discovery ---', state.bootstrapContext].join('\n')
2761
+ : '';
2762
+ const phaseBlock = ['', this.buildPhaseInstructions(state)].join('\n');
2763
+ const candidateBlock = state.bestCandidatePath
2764
+ ? [
2765
+ '',
2766
+ '--- Best Candidate ---',
2767
+ `Current best candidate file: ${state.bestCandidatePath}`,
2768
+ ...(state.bestCandidateReason ? [`Why: ${state.bestCandidateReason}`] : []),
2769
+ ].join('\n')
2770
+ : '';
1526
2771
  const multiSessionInstruction = `This is a multi-session task (session ${sessionIndex + 1}/${maxSessions}). When you have fully completed the task, end your response with TASK_COMPLETE on its own line.`;
1527
2772
  // Continuation resume: first session of a resumed run with prior context
1528
2773
  if (continuationContext && sessionIndex === 0) {
@@ -1538,7 +2783,7 @@ class AgentsEndpoint {
1538
2783
  },
1539
2784
  {
1540
2785
  role: 'user',
1541
- content: [userMessage, toolsBlock, '', multiSessionInstruction].join('\n'),
2786
+ content: [userMessage, phaseBlock, toolsBlock, bootstrapBlock, candidateBlock, '', multiSessionInstruction].join('\n'),
1542
2787
  },
1543
2788
  ];
1544
2789
  return messages;
@@ -1552,19 +2797,14 @@ class AgentsEndpoint {
1552
2797
  },
1553
2798
  {
1554
2799
  role: 'user',
1555
- content: [userMessage, toolsBlock, '', multiSessionInstruction].join('\n'),
2800
+ content: [userMessage, phaseBlock, toolsBlock, bootstrapBlock, candidateBlock, '', multiSessionInstruction].join('\n'),
1556
2801
  },
1557
2802
  ];
1558
2803
  return messages;
1559
2804
  }
1560
2805
  // First session (non-continuation): user message + completion signal instruction
1561
2806
  if (sessionIndex === 0) {
1562
- const content = [
1563
- originalMessage,
1564
- toolsBlock,
1565
- '',
1566
- multiSessionInstruction,
1567
- ].join('\n');
2807
+ const content = [originalMessage, phaseBlock, toolsBlock, bootstrapBlock, candidateBlock, '', multiSessionInstruction].join('\n');
1568
2808
  return [{ role: 'user', content }];
1569
2809
  }
1570
2810
  // Continuation sessions within a run: inject progress context
@@ -1572,14 +2812,57 @@ class AgentsEndpoint {
1572
2812
  const progressSummary = recentSessions
1573
2813
  .map((s) => ` Session ${s.index}: ${s.stopReason} ($${s.cost.toFixed(4)}) — ${s.outputPreview.slice(0, 100)}`)
1574
2814
  .join('\n');
2815
+ // When we have accumulated message history, replay the full conversation
2816
+ // so the model has complete context and doesn't start fresh each session.
2817
+ if (state.messages && state.messages.length > 0) {
2818
+ const recoveryMessage = this.buildStuckTurnRecoveryMessage(state);
2819
+ const continuationContent = [
2820
+ 'Continue the task.',
2821
+ phaseBlock,
2822
+ toolsBlock,
2823
+ bootstrapBlock,
2824
+ candidateBlock,
2825
+ '',
2826
+ `--- Progress (session ${sessionIndex + 1}/${maxSessions}, $${state.totalCost.toFixed(4)} spent) ---`,
2827
+ `Previous sessions:`,
2828
+ progressSummary,
2829
+ '',
2830
+ ...(recoveryMessage ? [recoveryMessage, ''] : []),
2831
+ 'Do not redo previous work. If the task is already complete, respond with TASK_COMPLETE.',
2832
+ ].join('\n');
2833
+ // Cap history to prevent context overflow on long-running marathons.
2834
+ // Keep the most recent 40 messages; prepend a system summary for trimmed ones.
2835
+ const MAX_HISTORY_MESSAGES = 40;
2836
+ let historyMessages = state.messages;
2837
+ if (historyMessages.length > MAX_HISTORY_MESSAGES) {
2838
+ const trimmedCount = historyMessages.length - MAX_HISTORY_MESSAGES;
2839
+ historyMessages = [
2840
+ {
2841
+ role: 'system',
2842
+ content: `[${trimmedCount} earlier messages trimmed to stay within context limits. Original task: ${(state.originalMessage || originalMessage).slice(0, 500)}]`,
2843
+ },
2844
+ ...historyMessages.slice(-MAX_HISTORY_MESSAGES),
2845
+ ];
2846
+ }
2847
+ return [
2848
+ ...historyMessages,
2849
+ { role: 'user', content: continuationContent },
2850
+ ];
2851
+ }
2852
+ // Fallback when no message history is available: single-message summary
2853
+ const recoveryMessage = this.buildStuckTurnRecoveryMessage(state);
1575
2854
  const content = [
1576
2855
  originalMessage,
2856
+ phaseBlock,
1577
2857
  toolsBlock,
2858
+ bootstrapBlock,
2859
+ candidateBlock,
1578
2860
  '',
1579
2861
  `--- Progress (session ${sessionIndex + 1}/${maxSessions}, $${state.totalCost.toFixed(4)} spent) ---`,
1580
2862
  `Previous sessions:`,
1581
2863
  progressSummary,
1582
2864
  '',
2865
+ ...(recoveryMessage ? [recoveryMessage, ''] : []),
1583
2866
  `Last output (do NOT repeat this — build on it):`,
1584
2867
  state.lastOutput.slice(0, 1000),
1585
2868
  '',
@@ -1607,14 +2890,22 @@ class AgentsEndpoint {
1607
2890
  try {
1608
2891
  if (existingRecordId) {
1609
2892
  // Update existing record
1610
- const record = await this.client.put(`/records/${existingRecordId}`, { metadata });
2893
+ const record = await this.client.put(`/records/${existingRecordId}`, {
2894
+ metadata,
2895
+ });
1611
2896
  return record.id;
1612
2897
  }
1613
2898
  else {
1614
2899
  // Try to find existing record by type + name first
1615
- const existing = await this.client.get('/records', { type: 'agent-task', name: state.taskName, limit: 1 });
2900
+ const existing = await this.client.get('/records', {
2901
+ type: 'agent-task',
2902
+ name: state.taskName,
2903
+ limit: 1,
2904
+ });
1616
2905
  if (existing.data.length > 0) {
1617
- const record = await this.client.put(`/records/${existing.data[0].id}`, { metadata });
2906
+ const record = await this.client.put(`/records/${existing.data[0].id}`, {
2907
+ metadata,
2908
+ });
1618
2909
  return record.id;
1619
2910
  }
1620
2911
  // Create new record