npm - @yemi33/minions - Versions diffs - 0.1.1912 → 0.1.1913 - Mend

@yemi33/minions 0.1.1912 → 0.1.1913

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dashboard/js/settings.js +2 -0
package/dashboard.js +128 -1
package/engine/cli.js +1 -0
package/engine/shared.js +1 -0
package/package.json +1 -1
package/engine/copilot-models.json +0 -5

package/dashboard/js/settings.js CHANGED Viewed

@@ -230,6 +230,7 @@ async function openSettings() {
           settingsToggle('Copilot: suppress AGENTS.md', 'set-copilotSuppressAgentsMd', e.copilotSuppressAgentsMd !== false, '--no-custom-instructions: stops AGENTS.md auto-load from fighting Minions playbook prompts') +
           settingsToggle('Copilot: reasoning summaries', 'set-copilotReasoningSummaries', !!e.copilotReasoningSummaries, '--enable-reasoning-summaries (Anthropic-family models only)') +
           settingsToggle('Disable model discovery', 'set-disableModelDiscovery', !!e.disableModelDiscovery, 'Skip /api/runtimes/<name>/models REST calls fleet-wide. Settings UI falls back to free-text.') +
+          settingsToggle('Use persistent Copilot worker pool (faster CC responses)', 'set-ccUseWorkerPool', !!e.ccUseWorkerPool, 'Experimental — sub-task C of W-mp2w003600196c51 (CC perf). When ON, Command Center routes through engine/cc-worker-pool.js (one persistent `copilot --acp` process per CC tab) instead of spawning a fresh CLI per turn. Saves ~14s of cold-start cost on warm follow-up turns. Engine/agent dispatch path is unchanged. Off by default.') +
         '</div>' +
         '<div style="display:grid;grid-template-columns:1fr 3fr;gap:8px;margin-top:8px">' +
           '<div>' +
@@ -625,6 +626,7 @@ async function saveSettings() {
       copilotReasoningSummaries: !!document.getElementById('set-copilotReasoningSummaries')?.checked,
       maxBudgetUsd: (document.getElementById('set-maxBudgetUsd')?.value ?? '').trim(),
       disableModelDiscovery: !!document.getElementById('set-disableModelDiscovery')?.checked,
+      ccUseWorkerPool: !!document.getElementById('set-ccUseWorkerPool')?.checked,
       maxTurnsByType: (function() {
         var mbt = {};
         var types = ['explore', 'ask', 'review', 'implement', 'fix', 'test', 'verify', 'plan', 'decompose'];

package/dashboard.js CHANGED Viewed

@@ -32,6 +32,7 @@ const dispatchMod = require('./engine/dispatch');
 const steering = require('./engine/steering');
 const projectDiscovery = require('./engine/project-discovery');
 const features = require('./engine/features');
+const ccWorkerPool = require('./engine/cc-worker-pool');
 const os = require('os');
 const { safeRead, safeReadDir, safeWrite, safeJson, safeJsonObj, safeJsonArr, safeJsonNoRestore, safeUnlink, mutateJsonFileLocked, mutateTextFileLocked, mutateControl, mutateCooldowns, mutateWorkItems, getProjects: _getProjects, DONE_STATUSES, WI_STATUS, WORK_TYPE, reopenWorkItem } = shared;
@@ -5826,6 +5827,18 @@ What would you like to discuss or change? When you're happy, say "approve" and I
         const abort = ccInFlightAborts.get(tabId);
         if (abort) { try { abort(); } catch {} }
       }
+      // Sub-task C of W-mp2w003600196c51: when the worker pool is on, abort
+      // must also fire `session/cancel` on the persistent ACP process so the
+      // remote daemon stops generating into a torn-down session. The pool
+      // exposes cancellation via the SessionHandle returned from getSession;
+      // we don't keep that handle around here, so route through closeTab to
+      // both cancel inflight and tear down the worker (cheaper than tracking
+      // per-tab handles in dashboard state, and matches "tab close" semantics
+      // — if the user explicitly aborted, we don't owe them a warm process).
+      // Off when the flag is off so legacy SIGTERM-only behavior is preserved.
+      if (CONFIG.engine && CONFIG.engine.ccUseWorkerPool) {
+        try { ccWorkerPool.closeTab(tabId); } catch { /* swallow */ }
+      }
       _clearCcLiveStream(tabId);
       _releaseCCTab(tabId);
       return jsonReply(res, 200, { ok: true });
@@ -5846,6 +5859,11 @@ What would you like to discuss or change? When you're happy, say "approve" and I
       const sessions = _filterCcTabSessions(raw);
       return sessions.filter(s => s.id !== id);
     }, { defaultValue: [] });
+    // Sub-task C of W-mp2w003600196c51: tear down the persistent ACP worker
+    // for this tab so we don't leak a Copilot process after the user closes
+    // the tab. closeTab is a no-op when the pool has no entry for the tabId,
+    // so it's safe to call regardless of whether the flag is on.
+    try { ccWorkerPool.closeTab(id); } catch { /* swallow */ }
     return jsonReply(res, 200, { ok: true });
   }
@@ -5875,6 +5893,14 @@ What would you like to discuss or change? When you're happy, say "approve" and I
         if (body.sessionId && ccSession._promptHash && ccSession._promptHash !== _ccPromptHash) {
           ccSession = { sessionId: null, createdAt: null, lastActiveAt: null, turnCount: 0 };
           sessionReset = true;
+          // Sub-task C of W-mp2w003600196c51: drop the persistent ACP worker
+          // for this tab so the next turn rebuilds against the new prompt.
+          // The pool's getSession() handles systemPromptHash deltas via
+          // newSession() (warm-process reuse), but evicting the tab here is
+          // belt-and-suspenders — matches the spec's "call closeTab on
+          // _ccPromptHash change" requirement and matches the way we just
+          // dropped ccSession entirely.
+          try { ccWorkerPool.closeTab(tabId); } catch { /* swallow */ }
         }
         const wasResume = !!(body.sessionId && body.sessionId === ccSession.sessionId && ccSessionValid());
@@ -5943,8 +5969,21 @@ What would you like to discuss or change? When you're happy, say "approve" and I
    * onChunk/onToolUse shape — only `sessionId` differs (set on
    * initial call, undefined on retry). Hoisted to keep the two call sites
    * in lock-step.
+   *
+   * Sub-task C of W-mp2w003600196c51 (CC perf): when
+   * `engineConfig.ccUseWorkerPool` is true the call routes through
+   * engine/cc-worker-pool.js (`copilot --acp`, one persistent process per
+   * CC tab) instead of spawning a fresh CLI per turn. The pool path
+   * preserves the existing onChunk/onToolUse/result shape so callers
+   * (handleCommandCenterStream, retry path) need no change. Engine/agent
+   * dispatch path is intentionally NOT routed through the pool; that
+   * lives on the per-dispatch _spawnProcess model in engine.js (regression
+   * test enforces engine.js does not import cc-worker-pool).
    */
-  function _invokeCcStream({ prompt, sessionId, liveState, toolUses, model, effort, maxTurns, engineConfig, systemPrompt = CC_STATIC_SYSTEM_PROMPT }) {
+  function _invokeCcStream({ prompt, sessionId, liveState, toolUses, model, effort, maxTurns, engineConfig, systemPrompt = CC_STATIC_SYSTEM_PROMPT, tabId }) {
+    if (engineConfig && engineConfig.ccUseWorkerPool) {
+      return _invokeCcStreamViaPool({ prompt, liveState, model, effort, engineConfig, systemPrompt, tabId });
+    }
     const { callLLMStreaming } = require('./engine/llm');
     return callLLMStreaming(prompt, systemPrompt, {
       timeout: CC_CALL_TIMEOUT_MS, label: 'command-center', model, maxTurns,
@@ -5965,6 +6004,87 @@ What would you like to discuss or change? When you're happy, say "approve" and I
     });
   }
+  /**
+   * Pool-routed implementation of _invokeCcStream. Keeps the public contract
+   * identical: returns a Promise that resolves to a result envelope shaped like
+   * callLLMStreaming's (`{ text, sessionId, code, usage, raw, stderr,
+   * missingRuntime }`) and exposes an `.abort` property so the SSE handler can
+   * cancel the in-flight stream by calling `_ccStreamAbort()`.
+   *
+   * Differences vs the direct path:
+   *   - Pool's `onChunk(text)` from agent_message_chunk is a DELTA, but
+   *     callLLMStreaming's contract is "full accumulated text"; we accumulate
+   *     here so `liveState.text` and downstream chunk events keep the same
+   *     semantics consumers already depend on.
+   *   - Tool calls are not surfaced in sub-task B (the pool ignores
+   *     `tool_call` notifications). `toolUses` stays empty on this path; if
+   *     sub-task C/D adds tool_call surfacing in the pool we'll plumb a
+   *     callback here too.
+   *   - `usage` is reported as an empty object — ACP doesn't expose token
+   *     counts in the in-flight session/update notifications, and the pool's
+   *     long-lived process makes per-turn usage attribution non-trivial.
+   *     trackEngineUsage is a no-op on `{}`.
+   */
+  function _invokeCcStreamViaPool({ prompt, liveState, model, effort, engineConfig, systemPrompt, tabId }) {
+    const resolvedTabId = tabId || 'default';
+    let cancelled = false;
+    let accumulated = '';
+    let sessionHandle = null;
+    let resolveResult;
+    const promise = new Promise((resolve) => { resolveResult = resolve; });
+    promise.abort = () => {
+      cancelled = true;
+      try { sessionHandle && sessionHandle.cancel(); } catch { /* swallow */ }
+    };
+    (async () => {
+      try {
+        sessionHandle = await ccWorkerPool.getSession({
+          tabId: resolvedTabId,
+          model,
+          effort,
+          mcpServers: (engineConfig && engineConfig.mcpServers) || [],
+          systemPromptHash: _ccPromptHash,
+        });
+      } catch (err) {
+        return resolveResult({
+          text: '',
+          sessionId: null,
+          code: 1,
+          usage: {},
+          raw: '',
+          stderr: String((err && err.message) || err || 'cc-worker-pool spawn failed'),
+        });
+      }
+      if (cancelled) {
+        try { sessionHandle.cancel(); } catch { /* swallow */ }
+        return resolveResult({ text: accumulated, sessionId: sessionHandle.sessionId, code: 0, usage: {}, raw: accumulated, stderr: '' });
+      }
+      await sessionHandle.stream(prompt, {
+        systemPromptText: systemPrompt,
+        onChunk: (delta) => {
+          accumulated += delta;
+          _touchCcLiveStream(liveState);
+          liveState.text = accumulated;
+          if (liveState.writer) liveState.writer({ type: 'chunk', text: accumulated });
+        },
+        onDone: () => {
+          resolveResult({ text: accumulated, sessionId: sessionHandle.sessionId, code: 0, usage: {}, raw: accumulated, stderr: '' });
+        },
+        onError: (err) => {
+          resolveResult({
+            text: accumulated,
+            sessionId: sessionHandle.sessionId,
+            code: cancelled ? 0 : 1,
+            usage: {},
+            raw: accumulated,
+            stderr: String((err && err.message) || err || 'cc-worker-pool stream error'),
+          });
+        },
+      });
+    })();
+    return promise;
+  }
   async function handleCommandCenterStream(req, res) {
     // SSE Origin gate (belt-and-suspenders: the top-level dispatcher has
     // already rejected disallowed origins on POST, but validate again here
@@ -6106,6 +6226,11 @@ What would you like to discuss or change? When you're happy, say "approve" and I
             tabSessionId = null;
             sessionReset = true;
             sessionResetReason = 'promptChanged';
+            // Sub-task C of W-mp2w003600196c51: matched dashboard reload with
+            // a new prompt template — evict the persistent ACP worker so the
+            // next turn rebuilds against the new prompt. (See same hook in
+            // handleCommandCenter above.)
+            try { ccWorkerPool.closeTab(body.tabId || 'default'); } catch { /* swallow */ }
           } else if (tabEntry.runtime && tabEntry.runtime !== currentRuntime) {
             tabSessionId = null;
             sessionReset = true;
@@ -6149,6 +6274,7 @@ What would you like to discuss or change? When you're happy, say "approve" and I
           model: streamModel, effort: streamEffort, maxTurns: ccMaxTurns,
           engineConfig: CONFIG.engine,
           systemPrompt: turnSystemPrompt,
+          tabId,
         });
         _ccStreamAbort = llmPromise.abort;
         liveState.abortFn = _ccStreamAbort;
@@ -6174,6 +6300,7 @@ What would you like to discuss or change? When you're happy, say "approve" and I
             model: streamModel, effort: streamEffort, maxTurns: ccMaxTurns,
             engineConfig: CONFIG.engine,
             systemPrompt: turnSystemPrompt,
+            tabId,
           });
           _ccStreamAbort = retryPromise.abort;
           liveState.abortFn = _ccStreamAbort;

package/engine/cli.js CHANGED Viewed

@@ -992,6 +992,7 @@ const commands = {
       'claudeBareMode', 'claudeFallbackModel',
       'copilotDisableBuiltinMcps', 'copilotSuppressAgentsMd', 'copilotStreamMode', 'copilotReasoningSummaries',
       'maxBudgetUsd', 'disableModelDiscovery',
+      'ccUseWorkerPool',
     ];
     const activeFlags = [];
     for (const f of flagFields) {

package/engine/shared.js CHANGED Viewed

@@ -1119,6 +1119,7 @@ const ENGINE_DEFAULTS = {
   copilotSuppressAgentsMd: true,     // Copilot --no-custom-instructions: stop AGENTS.md auto-load from fighting Minions playbook prompts
   copilotStreamMode: 'on',           // Copilot --stream <on|off>: 'on' streams assistant.message_delta events live; 'off' batches them
   copilotReasoningSummaries: false,  // Copilot --enable-reasoning-summaries (Anthropic-family models only)
+  ccUseWorkerPool: false,            // Sub-task C of W-mp2w003600196c51 (CC perf): when true, _invokeCcStream routes through engine/cc-worker-pool.js (persistent `copilot --acp` per CC tab) instead of spawning a fresh CLI per turn. Off by default — opt-in feature flag. Engine/agent dispatch path stays per-process regardless.
   maxBudgetUsd: undefined,       // fleet USD ceiling for --max-budget-usd (per-agent override: agents.<id>.maxBudgetUsd). Honors 0 via ?? so a literal cap of $0 works
   disableModelDiscovery: false,  // skip runtime.listModels() REST calls fleet-wide (settings UI falls back to free-text)
   maxPendingContexts: 20, // cap pendingContexts arrays in cooldowns.json to prevent unbounded growth

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@yemi33/minions",
-  "version": "0.1.1912",
+  "version": "0.1.1913",
   "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
   "bin": {
     "minions": "bin/minions.js"

package/engine/copilot-models.json DELETED Viewed

@@ -1,5 +0,0 @@
-{
-  "runtime": "copilot",
-  "models": null,
-  "cachedAt": "2026-05-13T19:11:26.190Z"
-}