npm - @yeaft/webchat-agent - Versions diffs - 0.1.794 → 0.1.796 - Mend

@yeaft/webchat-agent 0.1.794 → 0.1.796

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json +1 -1
package/unify/engine.js +118 -24
package/unify/session.js +7 -5
package/unify/tools/index.js +3 -3
package/unify/tools/todo-write.js +5 -5
package/unify/vp/thread-classifier.js +113 -0
package/unify/vp-status-broker.js +120 -165
package/unify/web-bridge.js +480 -296

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@yeaft/webchat-agent",
-  "version": "0.1.794",
+  "version": "0.1.796",
   "description": "Remote agent for Yeaft WebChat — connects worker machines to the central server",
   "main": "index.js",
   "type": "module",

package/unify/engine.js CHANGED Viewed

@@ -31,9 +31,8 @@ import { readSummary as readScopeSummary } from './memory/store-v2.js';
 import { runAdjust } from './memory/adjust.js';
 import { isVpSeedBackfillStub } from './memory/seed-backfill.js';
 import { runStopHooks } from './stop-hooks.js';
-// H2.f.5: threads/ retired. Persisted messages still carry a `threadId`
-// field for back-compat with old conversation files; new writes always use
-// the constant 'main'.
+// Default thread marker for legacy / non-group flows. Group VP runtime may
+// pass a real threadId per (groupId, vpId, threadId) engine instance.
 const MAIN_THREAD_ID = 'main';
 import { pickEffort, parseEffortPrefix } from './effort.js';
 import { normalizeEffort, resolveContextWindow } from './models.js';
@@ -313,6 +312,12 @@ export class Engine {
   #reflectedTurns = new Set();
   #__queryCounter = 0;
+  /** @type {string} */
+  #currentThreadId = MAIN_THREAD_ID;
+  /** @type {Array<{content:string|Array, preview:string}>} */
+  #pendingUserMessages = [];
   /**
    * Per-group "adjust has run at least once this engine lifetime" flag.
    * Keyed by groupId (or 'default'). The first turn always runs adjust;
@@ -950,9 +955,9 @@ export class Engine {
     if (!this.#conversationStore) return;
     if (this.#config._readOnly) return;
-    // H2.f.5: threads retired. Persisted messages still carry threadId
-    // for back-compat with old conversation files; new writes always use 'main'.
-    const threadId = MAIN_THREAD_ID;
+    // Persist with the active runtime thread. Legacy / non-group flows use
+    // MAIN_THREAD_ID; group VP flows pass their classified threadId.
+    const threadId = this.#currentThreadId || MAIN_THREAD_ID;
     // Persist user message — unless an upstream caller (e.g. the group
     // coordinator) has already done so for this turn.
@@ -1093,6 +1098,33 @@ export class Engine {
     }
   }
+  #drainPendingUserMessages(drainPendingUserMessages) {
+    const pending = [];
+    if (typeof drainPendingUserMessages === 'function') {
+      try {
+        const drained = drainPendingUserMessages();
+        if (Array.isArray(drained)) pending.push(...drained);
+      } catch {
+        // Best-effort hook; a bad bridge callback must not kill the engine loop.
+      }
+    }
+    if (this.#pendingUserMessages.length > 0) {
+      pending.push(...this.#pendingUserMessages.splice(0));
+    }
+    return pending
+      .map((item) => {
+        if (typeof item === 'string') return { content: item, preview: item };
+        if (!item || typeof item !== 'object') return null;
+        const content = item.content ?? item.text;
+        if (typeof content !== 'string' && !Array.isArray(content)) return null;
+        const preview = typeof item.preview === 'string'
+          ? item.preview
+          : (typeof content === 'string' ? content : '[content blocks]');
+        return { content, preview };
+      })
+      .filter(Boolean);
+  }
   /**
    * Run a query — the main loop.
    *
@@ -1122,7 +1154,7 @@ export class Engine {
    *   string-prompt shape (no regression for existing callers).
    * @yields {EngineEvent}
    */
-  async *query({ prompt, promptParts = null, messages = [], signal, userEffort = null, scenario = 'chat', vpPersona, router, senderVpId, inboundEnvelope, taskId, taskMembers, groupId, vpPlan, groupAnnouncement, workDir, userAlreadyPersisted = false, getCurrentTodos = null, setCurrentTodos = null } = {}) {
+  async *query({ prompt, promptParts = null, messages = [], signal, userEffort = null, scenario = 'chat', vpPersona, router, senderVpId, inboundEnvelope, taskId, taskMembers, groupId, vpPlan, groupAnnouncement, workDir, userAlreadyPersisted = false, getCurrentTodos = null, setCurrentTodos = null, threadId = MAIN_THREAD_ID, drainPendingUserMessages = null } = {}) {
     if (!prompt || typeof prompt !== 'string' || !prompt.trim()) {
       yield {
         type: 'error',
@@ -1181,7 +1213,8 @@ export class Engine {
     const runSignal = abortCtrl.signal;
     try {
-      yield* this.#runQuery({ prompt: effectivePrompt, promptParts, messages, signal: runSignal, userEffort: effectiveUserEffort, scenario, vpPersona, router, senderVpId, inboundEnvelope, taskId, taskMembers, groupId, vpPlan, groupAnnouncement, workDir, userAlreadyPersisted, getCurrentTodos, setCurrentTodos });
+      this.#currentThreadId = threadId || MAIN_THREAD_ID;
+      yield* this.#runQuery({ prompt: effectivePrompt, promptParts, messages, signal: runSignal, userEffort: effectiveUserEffort, scenario, vpPersona, router, senderVpId, inboundEnvelope, taskId, taskMembers, groupId, vpPlan, groupAnnouncement, workDir, userAlreadyPersisted, getCurrentTodos, setCurrentTodos, threadId: this.#currentThreadId, drainPendingUserMessages });
     } finally {
       if (signal) {
         try { signal.removeEventListener('abort', onExternalAbort); } catch { /* ignore */ }
@@ -1190,6 +1223,8 @@ export class Engine {
       // and a subsequent query() starts with a clean slate.
       this.#currentAbortCtrl = null;
       this.#abortReason = null;
+      this.#currentThreadId = MAIN_THREAD_ID;
+      this.#pendingUserMessages.length = 0;
     }
   }
@@ -1199,7 +1234,7 @@ export class Engine {
    * in a try/finally without indenting the whole loop.
    * @private
    */
-  async *#runQuery({ prompt, promptParts = null, messages, signal, userEffort = null, scenario = 'chat', vpPersona, router, senderVpId, inboundEnvelope, taskId, taskMembers, groupId, vpPlan, groupAnnouncement, workDir, userAlreadyPersisted = false, getCurrentTodos = null, setCurrentTodos = null }) {
+  async *#runQuery({ prompt, promptParts = null, messages, signal, userEffort = null, scenario = 'chat', vpPersona, router, senderVpId, inboundEnvelope, taskId, taskMembers, groupId, vpPlan, groupAnnouncement, workDir, userAlreadyPersisted = false, getCurrentTodos = null, setCurrentTodos = null, threadId = MAIN_THREAD_ID, drainPendingUserMessages = null }) {
     // ─── Pre-query: FTS5 Memory Recall + AMS snapshot ─────
     // Memory has a SINGLE render outlet now (DESIGN-PROMPT §3 ③):
@@ -1223,7 +1258,7 @@ export class Engine {
       ? recallResult.entries.length
       : 0;
     if (recallEntryCount > 0) {
-      yield { type: 'recall', entryCount: recallEntryCount, cached: false };
+      yield { type: 'recall', entryCount: recallEntryCount, cached: false, threadId };
     }
     // Layer-A summaries — same scopes AMS Resident will surface, loaded
@@ -1374,6 +1409,7 @@ export class Engine {
     yield {
       type: 'turn_open',
       turnId: queryTurnId,
+      threadId,
       userPrompt: userQuestionPreview,
       vpId: queryVpId,
       groupId: groupId || null,
@@ -1424,8 +1460,8 @@ export class Engine {
       // the previous iteration) cleanly ends the loop instead of
       // launching another adapter stream.
       if (signal?.aborted) {
-        yield { type: 'aborted', reason: this.#abortReason || 'external', turnNumber };
-        yield { type: 'turn_end', turnNumber, stopReason: 'aborted' };
+        yield { type: 'aborted', reason: this.#abortReason || 'external', turnNumber, threadId };
+        yield { type: 'turn_end', turnNumber, stopReason: 'aborted', threadId };
         break;
       }
@@ -1459,7 +1495,21 @@ export class Engine {
       // tools/registry.js can never disagree.
       const currentContextWindow = resolveContextWindow(currentModel, this.#config);
-      yield { type: 'turn_start', turnNumber };
+      yield { type: 'turn_start', turnNumber, threadId };
+      const appendedBeforeStream = this.#drainPendingUserMessages(drainPendingUserMessages);
+      if (appendedBeforeStream.length > 0) {
+        for (const item of appendedBeforeStream) {
+          conversationMessages.push({ role: 'user', content: item.content });
+          yield {
+            type: 'user_append',
+            turnId: queryTurnId,
+            loopNumber: turnNumber,
+            threadId,
+            preview: String(item.preview || '').slice(0, 200),
+          };
+        }
+      }
       try {
         // task-327b: resolve effort per-turn so the long-loop auto-bump
@@ -1635,6 +1685,7 @@ export class Engine {
         yield {
           type: 'loop',
           turnId: queryTurnId,
+          threadId,
           loopNumber: turnNumber,
           model: currentModel,
           systemPrompt,
@@ -1664,8 +1715,8 @@ export class Engine {
           || err?.name === 'LLMAbortError'
           || (signal?.aborted && /abort/i.test(err?.message || ''));
         if (isAbort || signal?.aborted) {
-          yield { type: 'aborted', reason: this.#abortReason || 'external', turnNumber };
-          yield { type: 'turn_end', turnNumber, stopReason: 'aborted' };
+          yield { type: 'aborted', reason: this.#abortReason || 'external', turnNumber, threadId };
+          yield { type: 'turn_end', turnNumber, stopReason: 'aborted', threadId };
           break;
         }
@@ -1674,7 +1725,7 @@ export class Engine {
           const consolidated = await this.#maybeConsolidate();
           if (consolidated && consolidated.archivedCount > 0) {
             yield { type: 'consolidate', archivedCount: consolidated.archivedCount, extractedCount: consolidated.extractedCount };
-            yield { type: 'turn_end', turnNumber, stopReason: 'context_overflow_retry' };
+            yield { type: 'turn_end', turnNumber, stopReason: 'context_overflow_retry', threadId };
             continue; // retry with fewer messages
           }
         }
@@ -1685,7 +1736,7 @@ export class Engine {
             (err.name === 'LLMRateLimitError' || err.name === 'LLMServerError')) {
           yield { type: 'fallback', from: currentModel, to: fallbackModel, reason: err.message };
           currentModel = fallbackModel;
-          yield { type: 'turn_end', turnNumber, stopReason: 'fallback_retry' };
+          yield { type: 'turn_end', turnNumber, stopReason: 'fallback_retry', threadId };
           continue; // retry with fallback model
         }
@@ -1694,7 +1745,7 @@ export class Engine {
           error: err,
           retryable: err.name === 'LLMRateLimitError' || err.name === 'LLMServerError',
         };
-        yield { type: 'turn_end', turnNumber, stopReason: 'error' };
+        yield { type: 'turn_end', turnNumber, stopReason: 'error', threadId };
         break;
       }
@@ -1788,13 +1839,33 @@ export class Engine {
         continueTurns++;
         // Append a "Continue" user message
         conversationMessages.push({ role: 'user', content: 'Continue' });
-        yield { type: 'turn_end', turnNumber, stopReason: 'max_tokens_continue' };
+        yield { type: 'turn_end', turnNumber, stopReason: 'max_tokens_continue', threadId };
         continue; // loop back to call adapter again
       }
+      // If new user input was appended while this loop was streaming and
+      // there are no tools to force another loop, splice it now and continue
+      // instead of ending the thread. This preserves token streaming and
+      // still only mutates messages at a clean loop boundary.
+      const appendedAfterAssistant = this.#drainPendingUserMessages(drainPendingUserMessages);
+      if (appendedAfterAssistant.length > 0) {
+        for (const item of appendedAfterAssistant) {
+          conversationMessages.push({ role: 'user', content: item.content });
+          yield {
+            type: 'user_append',
+            turnId: queryTurnId,
+            loopNumber: turnNumber,
+            threadId,
+            preview: String(item.preview || '').slice(0, 200),
+          };
+        }
+        yield { type: 'turn_end', turnNumber, stopReason: 'user_append_continue', threadId };
+        continue;
+      }
       // If no tool calls, we're done
       if (stopReason !== 'tool_use' || toolCalls.length === 0) {
-        yield { type: 'turn_end', turnNumber, stopReason };
+        yield { type: 'turn_end', turnNumber, stopReason, threadId };
         // ─── Post-query: StopHooks or Legacy ─────────────
         if (this.#config._readOnly) {
@@ -1825,6 +1896,7 @@ export class Engine {
             // Bug 6: tag persisted messages with the originating group so
             // history replay can re-stamp them on reload.
             groupId,
+            threadId,
             // Multi-VP fan-out (history-dedup): skip the user-row append
             // in stop-hooks when the orchestrator already wrote it once
             // for this turn. The hook still persists assistant + tool
@@ -1859,6 +1931,7 @@ export class Engine {
             yield {
               type: 'memory_adjust',
               turnId: queryTurnId,
+              threadId,
               groupKey: amsContext.groupKey,
               added: adjustResult.added,
               evicted: adjustResult.evicted,
@@ -2051,6 +2124,7 @@ export class Engine {
         yield {
           type: 'tool_exec',
           turnId: queryTurnId,
+          threadId,
           loopNumber: turnNumber,
           callId: tc.id,
           name: tc.name,
@@ -2135,6 +2209,7 @@ export class Engine {
           turnNumber,
           stopReason: 'tool_handoff',
           detail: handoffDetail,
+          threadId,
         };
         break;
       }
@@ -2178,6 +2253,7 @@ export class Engine {
           yield {
             type: 'reflection',
             turnId: queryTurnId,
+            threadId,
             loopNumber: turnNumber,
             trigger: 't1',
             status: 'pending',
@@ -2212,6 +2288,7 @@ export class Engine {
           yield {
             type: 'reflection',
             turnId: queryTurnId,
+            threadId,
             loopNumber: turnNumber,
             trigger: 't1',
             // PR-L bug fix: keep the same loopRange as the `pending` event
@@ -2229,6 +2306,7 @@ export class Engine {
           yield {
             type: 'reflection',
             turnId: queryTurnId,
+            threadId,
             loopNumber: turnNumber,
             trigger: 't1',
             status: 'error',
@@ -2254,12 +2332,12 @@ export class Engine {
       // the typed `aborted` event + a final turn_end with stopReason
       // 'aborted' instead of looping back to a new adapter call.
       if (abortedDuringTools || signal?.aborted) {
-        yield { type: 'aborted', reason: this.#abortReason || 'external', turnNumber };
-        yield { type: 'turn_end', turnNumber, stopReason: 'aborted' };
+        yield { type: 'aborted', reason: this.#abortReason || 'external', turnNumber, threadId };
+        yield { type: 'turn_end', turnNumber, stopReason: 'aborted', threadId };
         break;
       }
-      yield { type: 'turn_end', turnNumber, stopReason: 'tool_use' };
+      yield { type: 'turn_end', turnNumber, stopReason: 'tool_use', threadId };
       // task-327b: count this as a tool-loop turn. Next iteration's
       // pickEffort() will see the bumped counter and upgrade to 'max'
@@ -2276,6 +2354,7 @@ export class Engine {
     yield {
       type: 'turn_close',
       turnId: queryTurnId,
+      threadId,
       totalMs: Date.now() - queryStartedAt,
       totalTokens: cumulativeInputTokens + cumulativeOutputTokens,
       loopCount: turnNumber,
@@ -2399,7 +2478,22 @@ export class Engine {
    * @returns {string}
    */
   get currentThreadId() {
-    return MAIN_THREAD_ID;
+    return this.#currentThreadId || MAIN_THREAD_ID;
+  }
+  /**
+   * Append a user message into the currently running query. The loop consumes
+   * it only at adapter boundaries, never mid-token and never between an
+   * assistant tool_use and its paired tool_result messages.
+   * @param {string|Array} content
+   * @returns {boolean}
+   */
+  appendUserMessage(content) {
+    if (typeof content !== 'string' && !Array.isArray(content)) return false;
+    if (typeof content === 'string' && !content.trim()) return false;
+    const preview = typeof content === 'string' ? content : '[content blocks]';
+    this.#pendingUserMessages.push({ content, preview });
+    return true;
   }
   /** @returns {string|null} */

package/unify/session.js CHANGED Viewed

@@ -24,8 +24,9 @@ import { createFullRegistry } from './tools/index.js';
 import { Engine } from './engine.js';
 import { Compactor } from './compact/compactor.js';
 import { ToolUsageStats } from './stats/tool-usage.js';
-// H2.f.5: threads/, pipeline/dispatcher and input-queue retired. The
-// session now exposes a single Engine.
+// H2.f.5 removed the old user-facing thread pipeline/dispatcher. The base
+// session still exposes a single default Engine; PR #797 adds group VP thread
+// engines in web-bridge runtime state, keyed below the session layer.
 //
 // GC.1 (final): the session opens a SegmentIndex (SQLite FTS5 over
 // memory.md) and passes it to the Engine. Engine.#recallMemory routes
@@ -250,7 +251,7 @@ export async function loadSession(options = {}) {
   // ─── 5a. (removed 2026-05-13) Feature store init — Feature system retired.
-  // ─── 5b. (H2.f.5) thread store retired. Single conversation. ───
+  // ─── 5b. (H2.f.5) user-facing thread store retired. ───
   // ─── 5c. D1 first-boot seed (task-334m) ─────────────────
   //         When no groups exist on disk AND we're not in read-only mode,
@@ -445,8 +446,9 @@ export async function loadSession(options = {}) {
     }).catch(() => { /* best-effort catch-up */ });
   }
-  // H2.f.5: thread engine registry, input queue, and dispatcher retired.
-  // The session exposes a single `engine`; web-bridge calls engine.query()
+  // H2.f.5 retired the old session-level thread engine registry, input queue,
+  // and dispatcher. The session exposes a default `engine`; PR #797 keeps
+  // group VP thread engines in web-bridge runtime state and calls engine.query()
   // directly. Memory recall happens via memory/preflow.js (pre-turn) and
   // memory/adjust.js (post-turn).

package/unify/tools/index.js CHANGED Viewed

@@ -47,9 +47,9 @@ import routeForward from './route-forward.js';
 import todoWrite from './todo-write.js';
 import startPlan from './start-plan.js';
-// H2.f.4: thread tools (spawnThread/switchThread/listThreads/...) deleted.
-// The agent now runs in a single conversation; multi-thread orchestration
-// has been retired across the H2.f series.
+// H2.f.4: user-facing thread tools (spawnThread/switchThread/listThreads/...)
+// were deleted. PR #797 reintroduces runtime-owned VP thread routing below the
+// tool layer; LLMs still do not manage threads via tools.
 //
 // Feature tools (FeatureCreate/Update/List/Get/Progress/Memory + Followup
 // + UpdatePlan + feature_summary_post) and the FeatureArc auto-creation

package/unify/tools/todo-write.js CHANGED Viewed

@@ -9,11 +9,11 @@
  * `tool_use` event — not the result — so this tool's persistence story
  * is "stamp into the LLM event stream and cache on ctx for replay."
  *
- * Per-VP isolation: each VP keeps its own current todo list. The
- * web-bridge injects `ctx.getCurrentTodos()` / `ctx.setCurrentTodos()`
- * pointing at a per-(groupId,vpId) slot so two VPs in the same group
- * can independently track their own multi-step tasks without
- * stepping on each other.
+ * Per-thread isolation: each running VP thread keeps its own current todo
+ * list. The web-bridge injects `ctx.getCurrentTodos()` /
+ * `ctx.setCurrentTodos()` pointing at a per-(groupId,vpId,threadId) slot so
+ * two concurrent threads for the same VP cannot overwrite each other's
+ * progress.
  *
  * Reference: plan §2 (2026-05-13 — Feature system retired, TodoWrite
  * added as the actual progress-tracking surface for the LLM).

package/unify/vp/thread-classifier.js ADDED Viewed

@@ -0,0 +1,113 @@
+const VALID_DECISIONS = new Set(['related', 'unrelated']);
+export const THREAD_CLASSIFIER_SYSTEM_PROMPT = `You route a new user query for one VP into an existing running thread or a new thread.
+Return JSON only:
+{"decision":"related|unrelated","targetThreadId":"string|null","title":"short title","reason":"optional debug reason"}
+Rules:
+- If the query continues, clarifies, corrects, or adds details to an existing thread, choose related.
+- If multiple threads match, choose the most relevant thread.
+- If none match, choose unrelated.
+- title must be 5-20 Chinese characters or 3-8 English words, matching the user language.
+- Do not include markdown or extra prose.`;
+export function fallbackTitle(query) {
+  const text = String(query || '').replace(/\s+/g, ' ').trim();
+  if (!text) return '新任务';
+  const withoutMentions = text.replace(/@\S+/g, '').trim() || text;
+  if (/[^\x00-\x7F]/.test(withoutMentions)) return withoutMentions.slice(0, 20);
+  return withoutMentions.split(' ').slice(0, 8).join(' ').slice(0, 80);
+}
+function stripJsonFence(text) {
+  const raw = String(text || '').trim();
+  if (!raw.startsWith('```')) return raw;
+  return raw.replace(/^```(?:json)?\s*/i, '').replace(/\s*```$/i, '').trim();
+}
+export function parseThreadClassification(text, runningThreads = [], query = '') {
+  let parsed;
+  try {
+    parsed = JSON.parse(stripJsonFence(text));
+  } catch {
+    return fallbackClassification(runningThreads, query, 'invalid_json');
+  }
+  return validateThreadClassification(parsed, runningThreads, query);
+}
+export function validateThreadClassification(value, runningThreads = [], query = '') {
+  const known = new Set((runningThreads || []).map(t => t && t.threadId).filter(Boolean));
+  const decision = VALID_DECISIONS.has(value && value.decision) ? value.decision : null;
+  const title = typeof value?.title === 'string' && value.title.trim()
+    ? value.title.trim().slice(0, 80)
+    : fallbackTitle(query);
+  const reason = typeof value?.reason === 'string' ? value.reason.slice(0, 500) : '';
+  if (decision === 'related') {
+    const targetThreadId = typeof value?.targetThreadId === 'string' ? value.targetThreadId : '';
+    if (targetThreadId && known.has(targetThreadId)) {
+      return { decision: 'related', targetThreadId, title, reason };
+    }
+    return { decision: 'unrelated', targetThreadId: null, title, reason: reason || 'invalid_target_thread' };
+  }
+  if (decision === 'unrelated') {
+    return { decision: 'unrelated', targetThreadId: null, title, reason };
+  }
+  return fallbackClassification(runningThreads, query, 'invalid_decision');
+}
+export function fallbackClassification(runningThreads = [], query = '', reason = 'fallback') {
+  const live = (runningThreads || []).filter(t => t && t.threadId);
+  if (live.length === 1) {
+    return {
+      decision: 'related',
+      targetThreadId: live[0].threadId,
+      title: live[0].title || fallbackTitle(query),
+      reason,
+    };
+  }
+  return {
+    decision: 'unrelated',
+    targetThreadId: null,
+    title: fallbackTitle(query),
+    reason,
+  };
+}
+export function buildThreadClassificationPrompt({ vp = {}, runningThreads = [], newQuery = '' } = {}) {
+  const payload = {
+    vp: {
+      vpId: vp.vpId || '',
+      displayName: vp.displayName || vp.displayNameZh || vp.vpId || '',
+      role: vp.role || vp.roleZh || '',
+      persona: String(vp.persona || '').slice(0, 600),
+    },
+    runningThreads: (runningThreads || []).map(t => ({
+      threadId: t.threadId,
+      title: t.title || '',
+      status: t.status || '',
+      updatedAt: t.updatedAt || null,
+      recentMessages: Array.isArray(t.recentMessages) ? t.recentMessages.slice(-6) : [],
+      summary: t.summary || '',
+    })),
+    newQuery: String(newQuery || '').slice(0, 4000),
+  };
+  return JSON.stringify(payload, null, 2);
+}
+export async function classifyThread({ adapter, model, vp, runningThreads, newQuery, signal } = {}) {
+  if (!adapter || typeof adapter.call !== 'function') {
+    return fallbackClassification(runningThreads, newQuery, 'no_adapter');
+  }
+  try {
+    const res = await adapter.call({
+      model,
+      system: THREAD_CLASSIFIER_SYSTEM_PROMPT,
+      messages: [{ role: 'user', content: buildThreadClassificationPrompt({ vp, runningThreads, newQuery }) }],
+      maxTokens: 256,
+      signal,
+    });
+    return parseThreadClassification(res && res.text, runningThreads, newQuery);
+  } catch (err) {
+    return fallbackClassification(runningThreads, newQuery, `classifier_error:${err?.message || err}`);
+  }
+}