@yeaft/webchat-agent 0.1.91 → 0.1.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/claude.js CHANGED
@@ -147,6 +147,16 @@ export async function startClaudeQuery(conversationId, workDir, resumeSessionId)
147
147
  return state;
148
148
  }
149
149
 
150
+ /**
151
+ * Detect if an error message indicates prompt token count exceeded the model limit.
152
+ * Matches API errors like "prompt token count of 138392 exceeds the limit of 128000".
153
+ */
154
+ export function isPromptTokenOverflow(errorMessage) {
155
+ if (!errorMessage) return false;
156
+ const msg = errorMessage.toLowerCase();
157
+ return msg.includes('prompt') && msg.includes('token') && (msg.includes('exceed') || msg.includes('limit'));
158
+ }
159
+
150
160
  /**
151
161
  * 检测并追踪后台任务(仅 Bash 和 Agent 任务)
152
162
  * 普通工具调用(Read、Edit、Grep、Glob 等)不跟踪
@@ -508,10 +518,12 @@ async function processClaudeOutput(conversationId, claudeQuery, state) {
508
518
  // ★ Pre-send compact check for RolePlay auto-continue
509
519
  const rpAutoCompactThreshold = ctx.CONFIG?.autoCompactThreshold || 110000;
510
520
  const rpEstimatedNewTokens = Math.ceil(prompt.length / 3);
511
- const rpEstimatedTotal = inputTokens + rpEstimatedNewTokens;
521
+ // Include output_tokens: the assistant's output becomes part of context for the next turn
522
+ const rpOutputTokens = message.usage?.output_tokens || 0;
523
+ const rpEstimatedTotal = inputTokens + rpOutputTokens + rpEstimatedNewTokens;
512
524
 
513
525
  if (rpEstimatedTotal > rpAutoCompactThreshold) {
514
- console.log(`[RolePlay] Pre-send compact: estimated ${rpEstimatedTotal} tokens (last: ${inputTokens} + new: ~${rpEstimatedNewTokens}) exceeds threshold ${rpAutoCompactThreshold}`);
526
+ console.log(`[RolePlay] Pre-send compact: estimated ${rpEstimatedTotal} tokens (input: ${inputTokens} + output: ${rpOutputTokens} + new: ~${rpEstimatedNewTokens}) exceeds threshold ${rpAutoCompactThreshold}`);
515
527
  ctx.sendToServer({
516
528
  type: 'compact_status',
517
529
  conversationId,
@@ -543,6 +555,7 @@ async function processClaudeOutput(conversationId, claudeQuery, state) {
543
555
  type: 'user',
544
556
  message: { role: 'user', content: prompt }
545
557
  };
558
+ state._lastUserMessage = userMessage; // Save for prompt-overflow retry
546
559
  sendOutput(conversationId, userMessage);
547
560
  state.inputStream.enqueue(userMessage);
548
561
 
@@ -648,6 +661,39 @@ async function processClaudeOutput(conversationId, claudeQuery, state) {
648
661
  } else if (resultHandled) {
649
662
  // Turn 已正常完成,进程退出产生的 error 不发送给用户
650
663
  console.warn(`[SDK] Ignoring post-result error for ${conversationId}: ${error.message}`);
664
+ } else if (isPromptTokenOverflow(error.message) && state.claudeSessionId && !state._compactRetried) {
665
+ // ★ 兜底:prompt token 溢出 → 自动 compact + 重试(而非暴露 raw API error 给用户)
666
+ console.warn(`[SDK] Prompt token overflow for ${conversationId}, auto-compact + retry`);
667
+ const savedSessionId = state.claudeSessionId;
668
+ const savedLastMsg = state._lastUserMessage;
669
+
670
+ ctx.sendToServer({
671
+ type: 'compact_status',
672
+ conversationId,
673
+ status: 'compacting',
674
+ message: 'Context too long, auto-compacting and retrying...'
675
+ });
676
+
677
+ // 重启 SDK(startClaudeQuery 会先 abort 当前 state,使 finally 中 isStale=true)
678
+ try {
679
+ const newState = await startClaudeQuery(conversationId, state.workDir, savedSessionId);
680
+ newState._compactRetried = true; // 防止无限重试
681
+ newState.turnActive = true;
682
+ newState.turnResultReceived = false;
683
+
684
+ // 先 compact,再重试原始消息(如果有的话)
685
+ if (savedLastMsg) {
686
+ newState._pendingUserMessage = savedLastMsg;
687
+ }
688
+ newState.inputStream.enqueue({
689
+ type: 'user',
690
+ message: { role: 'user', content: '/compact' }
691
+ });
692
+ sendConversationList();
693
+ } catch (retryError) {
694
+ console.error(`[SDK] Compact-retry failed for ${conversationId}:`, retryError.message);
695
+ sendError(conversationId, `Context too long. Auto-compact failed: ${retryError.message}`);
696
+ }
651
697
  } else {
652
698
  console.error(`[SDK] Error for ${conversationId}:`, error.message);
653
699
  sendError(conversationId, error.message);
package/conversation.js CHANGED
@@ -675,11 +675,13 @@ export async function handleUserInput(msg) {
675
675
  // ★ Pre-send compact check: estimate total tokens and compact before sending if needed
676
676
  const autoCompactThreshold = ctx.CONFIG?.autoCompactThreshold || 110000;
677
677
  const lastInputTokens = state.lastResultInputTokens || 0;
678
+ const lastOutputTokens = state.lastResultOutputTokens || 0;
678
679
  const estimatedNewTokens = Math.ceil(effectivePrompt.length / 3); // conservative: ~3 chars per token
679
- const estimatedTotal = lastInputTokens + estimatedNewTokens;
680
+ // Include output_tokens: the assistant's last output becomes part of context for the next turn
681
+ const estimatedTotal = lastInputTokens + lastOutputTokens + estimatedNewTokens;
680
682
 
681
683
  if (estimatedTotal > autoCompactThreshold && state.inputStream) {
682
- console.log(`[${conversationId}] Pre-send compact: estimated ${estimatedTotal} tokens (last: ${lastInputTokens} + new: ~${estimatedNewTokens}) exceeds threshold ${autoCompactThreshold}`);
684
+ console.log(`[${conversationId}] Pre-send compact: estimated ${estimatedTotal} tokens (input: ${lastInputTokens} + output: ${lastOutputTokens} + new: ~${estimatedNewTokens}) exceeds threshold ${autoCompactThreshold}`);
683
685
  ctx.sendToServer({
684
686
  type: 'compact_status',
685
687
  conversationId,
@@ -702,6 +704,7 @@ export async function handleUserInput(msg) {
702
704
 
703
705
  state.turnActive = true;
704
706
  state.turnResultReceived = false; // 重置 per-turn 去重标志
707
+ state._lastUserMessage = userMessage; // Save for prompt-overflow retry
705
708
  sendConversationList(); // 在 turnActive=true 后通知 server,确保 processing 状态正确
706
709
  sendOutput(conversationId, displayMessage);
707
710
  state.inputStream.enqueue(userMessage);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yeaft/webchat-agent",
3
- "version": "0.1.91",
3
+ "version": "0.1.92",
4
4
  "description": "Remote agent for Yeaft WebChat — connects worker machines to the central server",
5
5
  "main": "index.js",
6
6
  "type": "module",