npm - @yeaft/webchat-agent - Versions diffs - 0.1.91 → 0.1.92 - Mend

@yeaft/webchat-agent 0.1.91 → 0.1.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/claude.js CHANGED Viewed

@@ -147,6 +147,16 @@ export async function startClaudeQuery(conversationId, workDir, resumeSessionId)
   return state;
 }
+/**
+ * Detect if an error message indicates prompt token count exceeded the model limit.
+ * Matches API errors like "prompt token count of 138392 exceeds the limit of 128000".
+ */
+export function isPromptTokenOverflow(errorMessage) {
+  if (!errorMessage) return false;
+  const msg = errorMessage.toLowerCase();
+  return msg.includes('prompt') && msg.includes('token') && (msg.includes('exceed') || msg.includes('limit'));
+}
 /**
  * 检测并追踪后台任务（仅 Bash 和 Agent 任务）
  * 普通工具调用（Read、Edit、Grep、Glob 等）不跟踪
@@ -508,10 +518,12 @@ async function processClaudeOutput(conversationId, claudeQuery, state) {
             // ★ Pre-send compact check for RolePlay auto-continue
             const rpAutoCompactThreshold = ctx.CONFIG?.autoCompactThreshold || 110000;
             const rpEstimatedNewTokens = Math.ceil(prompt.length / 3);
-            const rpEstimatedTotal = inputTokens + rpEstimatedNewTokens;
+            // Include output_tokens: the assistant's output becomes part of context for the next turn
+            const rpOutputTokens = message.usage?.output_tokens || 0;
+            const rpEstimatedTotal = inputTokens + rpOutputTokens + rpEstimatedNewTokens;
             if (rpEstimatedTotal > rpAutoCompactThreshold) {
-              console.log(`[RolePlay] Pre-send compact: estimated ${rpEstimatedTotal} tokens (last: ${inputTokens} + new: ~${rpEstimatedNewTokens}) exceeds threshold ${rpAutoCompactThreshold}`);
+              console.log(`[RolePlay] Pre-send compact: estimated ${rpEstimatedTotal} tokens (input: ${inputTokens} + output: ${rpOutputTokens} + new: ~${rpEstimatedNewTokens}) exceeds threshold ${rpAutoCompactThreshold}`);
               ctx.sendToServer({
                 type: 'compact_status',
                 conversationId,
@@ -543,6 +555,7 @@ async function processClaudeOutput(conversationId, claudeQuery, state) {
               type: 'user',
               message: { role: 'user', content: prompt }
             };
+            state._lastUserMessage = userMessage; // Save for prompt-overflow retry
             sendOutput(conversationId, userMessage);
             state.inputStream.enqueue(userMessage);
@@ -648,6 +661,39 @@ async function processClaudeOutput(conversationId, claudeQuery, state) {
     } else if (resultHandled) {
       // Turn 已正常完成，进程退出产生的 error 不发送给用户
       console.warn(`[SDK] Ignoring post-result error for ${conversationId}: ${error.message}`);
+    } else if (isPromptTokenOverflow(error.message) && state.claudeSessionId && !state._compactRetried) {
+      // ★ 兜底：prompt token 溢出 → 自动 compact + 重试（而非暴露 raw API error 给用户）
+      console.warn(`[SDK] Prompt token overflow for ${conversationId}, auto-compact + retry`);
+      const savedSessionId = state.claudeSessionId;
+      const savedLastMsg = state._lastUserMessage;
+      ctx.sendToServer({
+        type: 'compact_status',
+        conversationId,
+        status: 'compacting',
+        message: 'Context too long, auto-compacting and retrying...'
+      });
+      // 重启 SDK（startClaudeQuery 会先 abort 当前 state，使 finally 中 isStale=true）
+      try {
+        const newState = await startClaudeQuery(conversationId, state.workDir, savedSessionId);
+        newState._compactRetried = true; // 防止无限重试
+        newState.turnActive = true;
+        newState.turnResultReceived = false;
+        // 先 compact，再重试原始消息（如果有的话）
+        if (savedLastMsg) {
+          newState._pendingUserMessage = savedLastMsg;
+        }
+        newState.inputStream.enqueue({
+          type: 'user',
+          message: { role: 'user', content: '/compact' }
+        });
+        sendConversationList();
+      } catch (retryError) {
+        console.error(`[SDK] Compact-retry failed for ${conversationId}:`, retryError.message);
+        sendError(conversationId, `Context too long. Auto-compact failed: ${retryError.message}`);
+      }
     } else {
       console.error(`[SDK] Error for ${conversationId}:`, error.message);
       sendError(conversationId, error.message);

package/conversation.js CHANGED Viewed

@@ -675,11 +675,13 @@ export async function handleUserInput(msg) {
   // ★ Pre-send compact check: estimate total tokens and compact before sending if needed
   const autoCompactThreshold = ctx.CONFIG?.autoCompactThreshold || 110000;
   const lastInputTokens = state.lastResultInputTokens || 0;
+  const lastOutputTokens = state.lastResultOutputTokens || 0;
   const estimatedNewTokens = Math.ceil(effectivePrompt.length / 3); // conservative: ~3 chars per token
-  const estimatedTotal = lastInputTokens + estimatedNewTokens;
+  // Include output_tokens: the assistant's last output becomes part of context for the next turn
+  const estimatedTotal = lastInputTokens + lastOutputTokens + estimatedNewTokens;
   if (estimatedTotal > autoCompactThreshold && state.inputStream) {
-    console.log(`[${conversationId}] Pre-send compact: estimated ${estimatedTotal} tokens (last: ${lastInputTokens} + new: ~${estimatedNewTokens}) exceeds threshold ${autoCompactThreshold}`);
+    console.log(`[${conversationId}] Pre-send compact: estimated ${estimatedTotal} tokens (input: ${lastInputTokens} + output: ${lastOutputTokens} + new: ~${estimatedNewTokens}) exceeds threshold ${autoCompactThreshold}`);
     ctx.sendToServer({
       type: 'compact_status',
       conversationId,
@@ -702,6 +704,7 @@ export async function handleUserInput(msg) {
   state.turnActive = true;
   state.turnResultReceived = false; // 重置 per-turn 去重标志
+  state._lastUserMessage = userMessage; // Save for prompt-overflow retry
   sendConversationList(); // 在 turnActive=true 后通知 server，确保 processing 状态正确
   sendOutput(conversationId, displayMessage);
   state.inputStream.enqueue(userMessage);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@yeaft/webchat-agent",
-  "version": "0.1.91",
+  "version": "0.1.92",
   "description": "Remote agent for Yeaft WebChat — connects worker machines to the central server",
   "main": "index.js",
   "type": "module",