npm - pikiloom - Versions diffs - 0.4.14 → 0.4.15 - Mend

pikiloom 0.4.14 → 0.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/dist/agent/drivers/hermes.js CHANGED Viewed

@@ -16,7 +16,7 @@ import { join, extname } from 'node:path';
 import { resolve as resolvePath } from 'node:path';
 import { registerDriver } from '../driver.js';
 import { AcpClient, toAcpMcpServers } from '../acp-client.js';
-import { agentLog, agentWarn, emptyUsage, normalizeErrorMessage, listPikiloomSessions, findPikiloomSession, buildStreamPreviewMeta, applyTurnWindow, pushRecentActivity, IMAGE_EXTS, mimeForExt, } from '../index.js';
+import { agentLog, agentWarn, emptyUsage, normalizeErrorMessage, listPikiloomSessions, managedRecordToSessionInfo, findPikiloomSession, buildStreamPreviewMeta, applyTurnWindow, pushRecentActivity, IMAGE_EXTS, mimeForExt, } from '../index.js';
 // Build the ACP `prompt` content array from the user's text + staged
 // attachments. Images become ImageContentBlocks (base64 + mimeType — the
 // shape Hermes' acp_adapter accepts and converts to OpenAI multimodal
@@ -370,32 +370,10 @@ async function getHermesSessions(workdir, limit) {
     // for the `hermes sessions` CLI but irrelevant to pikiloom, which always
     // creates its own ACP session per turn and records it under .pikiloom.
     const resolvedWorkdir = resolvePath(workdir);
+    // Canonical record→SessionInfo mapper (single source of truth) — see claude.ts.
+    // Hand-rolling dropped thinkingEffort/workflowEnabled/profileId.
     const records = listPikiloomSessions(resolvedWorkdir, 'hermes');
-    const sessions = records.map(record => ({
-        sessionId: record.sessionId,
-        agent: 'hermes',
-        workdir: record.workdir,
-        workspacePath: record.workspacePath,
-        threadId: record.threadId,
-        model: record.model,
-        createdAt: record.createdAt,
-        title: record.title,
-        running: record.runState === 'running',
-        runState: record.runState,
-        runDetail: record.runDetail,
-        runUpdatedAt: record.runUpdatedAt,
-        runPid: record.runPid,
-        classification: record.classification,
-        userStatus: record.userStatus,
-        userNote: record.userNote,
-        lastQuestion: record.lastQuestion,
-        lastAnswer: record.lastAnswer,
-        lastMessageText: record.lastMessageText,
-        migratedFrom: record.migratedFrom,
-        migratedTo: record.migratedTo,
-        linkedSessions: record.linkedSessions,
-        numTurns: record.numTurns ?? null,
-    }));
+    const sessions = records.map(managedRecordToSessionInfo);
     sessions.sort((a, b) => Date.parse(b.createdAt || '') - Date.parse(a.createdAt || ''));
     const sliced = typeof limit === 'number' ? sessions.slice(0, limit) : sessions;
     agentLog(`[sessions:hermes] workdir=${resolvedWorkdir} pikiloom=${records.length} returned=${sliced.length}`);

package/dist/agent/index.js CHANGED Viewed

@@ -23,7 +23,7 @@ export { attachAgentImage, attachInlineImage, materializeImage, rewriteImageBloc
 // ── Re-export: utilities ────────────────────────────────────────────────────
 export { Q, agentLog, agentWarn, agentError, dedupeStrings, numberOrNull, normalizeStreamPreviewPlan, parseTodoWriteAsPlan, normalizeActivityLine, pushRecentActivity, detectClaudeApiError, isRetryableClaudeApiError, detectClaudeModelError, claudeModelErrorMessage, firstNonEmptyLine, shortValue, normalizeErrorMessage, joinErrorMessages, appendSystemPrompt, mimeForExt, computeContext, buildStreamPreviewMeta, summarizeClaudeToolUse, summarizeClaudeToolResult, previewToolCallInput, previewToolCallResult, roundPercent, toIsoFromEpochSeconds, normalizeUsageStatus, labelFromWindowMinutes, usageWindowFromRateLimit, parseJsonTail, modelFamily, normalizeClaudeModelId, emptyUsage, readTailLines, stripInjectedPrompts, sanitizeSessionUserPreviewText, SESSION_PREVIEW_IMAGE_PLACEHOLDER_RE, CLAUDE_AT_MENTION_IMAGE_RE, extractClaudeAtMentionImagePaths, stripClaudeAtMentionImages, isPendingSessionId, emitSessionIdUpdate, sessionListDisplayTitle, } from './utils.js';
 // ── Re-export: session management ───────────────────────────────────────────
-export { updateSessionMeta, promoteSessionId, recordFork, listPikiloomSessions, findPikiloomSession, getSessionStoredConfig, ensureManagedSession, findManagedThreadSession, stageSessionFiles, mergeManagedAndNativeSessions, getSessions, getSessionTail, getSessionMessages, applyTurnWindow, applyTurnFilter, classifySession, deriveUserStatus, exportSession, importSession, deleteAgentSession, isProcessAlive, isRunningSessionStale, reconcileOrphanedRunningSessions, } from './session.js';
+export { updateSessionMeta, promoteSessionId, recordFork, listPikiloomSessions, findPikiloomSession, getSessionStoredConfig, ensureManagedSession, findManagedThreadSession, stageSessionFiles, mergeManagedAndNativeSessions, managedRecordToSessionInfo, getSessions, getSessionTail, getSessionMessages, applyTurnWindow, applyTurnFilter, classifySession, deriveUserStatus, exportSession, importSession, deleteAgentSession, isProcessAlive, isRunningSessionStale, reconcileOrphanedRunningSessions, } from './session.js';
 // ── Re-export: stream & detection ───────────────────────────────────────────
 export { detectAgentBin, listAgents, resolveDefaultAgent, run, doStream, listModels, resolveAgentModels, getUsage, getAgentBoundModelId, setAgentBoundModelId, } from './stream.js';
 // ── Re-export: driver registry ──────────────────────────────────────────────

package/dist/agent/session.js CHANGED Viewed

@@ -222,6 +222,7 @@ function normalizeSessionRecord(raw, workdir) {
         title: typeof raw?.title === 'string' && raw.title.trim() ? raw.title.trim() : null,
         model: typeof raw?.model === 'string' && raw.model.trim() ? raw.model.trim() : null,
         thinkingEffort: typeof raw?.thinkingEffort === 'string' && raw.thinkingEffort.trim() ? raw.thinkingEffort.trim() : null,
+        workflowEnabled: typeof raw?.workflowEnabled === 'boolean' ? raw.workflowEnabled : null,
         profileId: typeof raw?.profileId === 'string' && raw.profileId.trim() ? raw.profileId.trim() : null,
         stagedFiles: Array.isArray(raw?.stagedFiles) ? dedupeStrings(raw.stagedFiles.filter((v) => typeof v === 'string')) : [],
         lastUserAttachments: Array.isArray(raw?.lastUserAttachments)
@@ -299,7 +300,7 @@ function writeSessionMeta(record) {
         workspacePath: record.workspacePath,
         threadId: record.threadId,
         createdAt: record.createdAt, updatedAt: record.updatedAt,
-        title: record.title, model: record.model, thinkingEffort: record.thinkingEffort, stagedFiles: record.stagedFiles,
+        title: record.title, model: record.model, thinkingEffort: record.thinkingEffort, workflowEnabled: record.workflowEnabled, stagedFiles: record.stagedFiles,
         runState: record.runState, runDetail: record.runDetail, runUpdatedAt: record.runUpdatedAt,
         runPid: record.runPid,
         classification: record.classification,
@@ -583,7 +584,7 @@ export function ensureSessionWorkspace(opts) {
             workspacePath: sessionWorkspacePath(workdir, opts.agent, sessionId),
             threadId,
             createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
-            title: summarizePromptTitle(opts.title) || null, model: null, thinkingEffort: null, profileId: null, stagedFiles: [], lastUserAttachments: [],
+            title: summarizePromptTitle(opts.title) || null, model: null, thinkingEffort: null, workflowEnabled: null, profileId: null, stagedFiles: [], lastUserAttachments: [],
             runState: 'completed', runDetail: null, runUpdatedAt: new Date().toISOString(),
             runPid: null,
             classification: null, userStatus: null, userNote: null,
@@ -607,7 +608,7 @@ export function ensureSessionWorkspace(opts) {
 // ---------------------------------------------------------------------------
 // Record to SessionInfo
 // ---------------------------------------------------------------------------
-function managedRecordToSessionInfo(record) {
+export function managedRecordToSessionInfo(record) {
     // Collapse pre-fix records that stored the canonical skill expansion as the
     // title / lastQuestion / lastMessageText. New records get collapsed at write
     // time in `prepareStreamOpts`; this read-time pass keeps existing sessions
@@ -623,6 +624,7 @@ function managedRecordToSessionInfo(record) {
         threadId: record.threadId,
         model: record.model,
         thinkingEffort: record.thinkingEffort,
+        workflowEnabled: record.workflowEnabled ?? null,
         profileId: record.profileId ?? null,
         createdAt: record.createdAt,
         title,
@@ -729,6 +731,7 @@ export function getSessionStoredConfig(workdir, agent, sessionId) {
     return {
         model: record?.model ?? null,
         thinkingEffort: record?.thinkingEffort ?? null,
+        workflowEnabled: record?.workflowEnabled ?? null,
         profileId: record?.profileId ?? null,
     };
 }
@@ -825,6 +828,16 @@ export function mergeManagedAndNativeSessions(managedSessions, nativeSessions) {
             runUpdatedAt: useNativeTimeline ? (native.runUpdatedAt ?? managed.runUpdatedAt) : (managed.runUpdatedAt ?? native.runUpdatedAt),
             title: native.title || managed.title,
             model: native.model || managed.model,
+            // Pikiloom-owned metadata: the native session file (Claude JSONL etc.)
+            // carries none of these, so the `...native` spread would clobber them with
+            // `undefined`/`null`. The managed record (our centralized index) is the
+            // source of truth — recover each like `model` above. Without this the list
+            // silently drops the user's per-session choices: effort/Workflow fold back
+            // to the global default (per-send `ultra` → `max` after the turn) and the
+            // BYOK Profile binding is lost on resume.
+            thinkingEffort: managed.thinkingEffort ?? native.thinkingEffort ?? null,
+            workflowEnabled: managed.workflowEnabled ?? native.workflowEnabled ?? null,
+            profileId: managed.profileId ?? native.profileId ?? null,
             createdAt: native.createdAt || managed.createdAt,
             classification: managed.classification ?? native.classification ?? null,
             userStatus: managed.userStatus ?? native.userStatus ?? null,

package/dist/agent/stream.js CHANGED Viewed

@@ -7,7 +7,8 @@ import fs from 'node:fs';
 import path from 'node:path';
 import { restartManagedBrowser } from '../browser-supervisor.js';
 import { terminateProcessTree } from '../core/process-control.js';
-import { AGENT_DETECT_TIMEOUTS, AGENT_STREAM_HARD_KILL_GRACE_MS } from '../core/constants.js';
+import { AGENT_DETECT_TIMEOUTS, AGENT_STREAM_HARD_KILL_GRACE_MS, AGENT_UPDATE_TIMEOUTS } from '../core/constants.js';
+import { awaitAgentUpdateIdle } from './auto-update.js';
 import { getDriver, allDrivers, getAcceptedProviderKinds, hasDriver } from './driver.js';
 import { resolveAgentInjection, getActiveProfile, getActiveProfileId, getProvider, updateProfile, listProfiles, } from '../model/index.js';
 import { Q, agentLog, agentWarn, agentError, joinErrorMessages, normalizeErrorMessage, buildStreamPreviewMeta, computeContext, shortValue, isPendingSessionId, dedupeStrings, normalizeStreamPreviewPlan, } from './utils.js';
@@ -417,12 +418,18 @@ function prepareStreamOpts(opts) {
         },
     };
 }
-function finalizeStreamResult(result, workdir, prompt, session) {
+function finalizeStreamResult(result, workdir, prompt, session, workflowEnabled) {
     if (result.sessionId)
         syncManagedSessionIdentity(session, workdir, result.sessionId);
     session.record.model = result.model || session.record.model;
     if (result.thinkingEffort)
         session.record.thinkingEffort = result.thinkingEffort;
+    // Remember whether this turn ran with Workflow on so the synthetic `ultra`
+    // rung re-folds for display after the live stream ends and on resume — the
+    // stored `thinkingEffort` stays the concrete rung (e.g. `max`). `undefined`
+    // (driver invoked outside the bot) leaves the prior value untouched.
+    if (workflowEnabled !== undefined)
+        session.record.workflowEnabled = workflowEnabled;
     // Capture the BYOK Profile that was in effect for this run so a future
     // `session.switch` can re-bind it (null = native CLI auth).
     try {
@@ -544,13 +551,40 @@ export async function doStream(opts) {
     catch (e) {
         agentWarn(`[byok] failed to apply Profile injection: ${e?.message || e}`);
     }
+    // In-memory-first: stamp the turn's resolved reasoning rung + Workflow opt-in
+    // onto the centralized index NOW — before the agent CLI has flushed its own
+    // session file — so the session list/composer reflect the user's pick during
+    // the very first turn instead of only after finalizeStreamResult. The managed
+    // record is the single source of truth for this metadata and links to the
+    // native agent-session by id on promotion; finalize re-stamps it (plus the
+    // actual model) authoritatively at turn end.
+    try {
+        if (prepared.thinkingEffort) {
+            session.record.thinkingEffort = prepared.thinkingEffort.trim().toLowerCase() || session.record.thinkingEffort;
+        }
+        if (opts.claudeWorkflowEnabled !== undefined) {
+            session.record.workflowEnabled = opts.claudeWorkflowEnabled;
+        }
+        saveSessionRecord(opts.workdir, session.record);
+    }
+    catch (e) {
+        agentWarn(`[session] turn-start metadata stamp failed: ${e?.message || e}`);
+    }
     try {
         const driver = getDriver(prepared.agent);
         if (opts.forkOf && !driver.capabilities?.fork) {
             throw new Error(`Agent ${prepared.agent} does not support fork`);
         }
+        // A background agent-CLI auto-update (`npm install -g` / `brew upgrade`, by
+        // this process OR the `npx pikiloom@latest` self-bootstrap) briefly removes
+        // the bin while it relinks; exec'ing into that window fails with exit 127
+        // "command not found". Wait out any in-flight reinstall of THIS agent before
+        // dispatching to the driver — this is the one chokepoint every agent turn
+        // (claude -p, claude TUI, codex app-server, gemini) passes through. No-op
+        // when nothing is updating.
+        await awaitAgentUpdateIdle(prepared.agent, AGENT_UPDATE_TIMEOUTS.spawnWait);
         const result = await driver.doStream(prepared);
-        const finalized = finalizeStreamResult(result, opts.workdir, opts.prompt, session);
+        const finalized = finalizeStreamResult(result, opts.workdir, opts.prompt, session, opts.claudeWorkflowEnabled);
         // Once the child has its real session ID, link the lineage. We do this
         // after finalize so the child record is persisted with its native ID.
         if (opts.forkOf && finalized.sessionId) {

package/dist/bot/bot.js CHANGED Viewed

@@ -228,6 +228,12 @@ export class Bot {
      */
     enrichSnapshot(snap) {
         let next = snap;
+        // Attach the running turn's prompt so a watching terminal can render the
+        // user message for a follow-up it didn't originate (no local optimistic
+        // bubble). The RunningTask record is the source of truth while it's live.
+        const runningPrompt = next.taskId ? this.activeTasks.get(next.taskId)?.prompt : '';
+        if (runningPrompt)
+            next = { ...next, question: collapseSkillPrompt(runningPrompt) ?? runningPrompt };
         if (next.queuedTaskIds?.length) {
             const queuedTasks = next.queuedTaskIds.map(taskId => {
                 const raw = this.activeTasks.get(taskId)?.prompt || '';
@@ -446,8 +452,8 @@ export class Bot {
     emitStreamQueued(sessionKey, taskId) {
         this.emitStream(sessionKey, { type: 'queued', taskId, position: this.getQueuePosition(sessionKey, taskId) });
     }
-    emitStreamStart(taskId, session) {
-        const cfg = this.resolveSessionStreamConfig(session);
+    emitStreamStart(taskId, session, opts) {
+        const cfg = this.resolveSessionStreamConfig(session, opts);
         const key = this.liveSessionKey(taskId, session.key);
         this.debug(`[stream-lifecycle] start task=${taskId} key=${key} sessionId=${session.sessionId || '(pending)'} model=${cfg.model || '-'}`);
         this.emitStream(key, {
@@ -1422,7 +1428,10 @@ export class Bot {
                 this.finishTask(taskId);
                 return;
             }
-            this.emitStreamStart(taskId, session);
+            // Thread the per-send Workflow choice so the live divider folds to `ultra`
+            // immediately (the dashboard composer picks ultra per-send without flipping
+            // the agent-global flag resolveSessionStreamConfig would otherwise read).
+            this.emitStreamStart(taskId, session, { workflowEnabled: opts.workflowEnabled });
             // Wire up IM rendering for non-dashboard chats so /goal-driven tasks stream
             // to the same channel that submitted them, matching handleMessage's UX.
             const presenter = chatId !== 'dashboard'
@@ -1985,7 +1994,7 @@ export class Bot {
      * Mirrors the fallback chain used inside runStream() so callers (e.g. submitSessionTask
      * emitting a 'start' event) can label the active turn before runStream resolves it.
      */
-    resolveSessionStreamConfig(cs) {
+    resolveSessionStreamConfig(cs, opts) {
         const agentConfig = this.agentConfigs[cs.agent] || {};
         const sessionWorkdir = cs.workdir || this.workdir;
         const storedConfig = cs.sessionId && !isPendingSessionId(cs.sessionId)
@@ -2003,7 +2012,11 @@ export class Bot {
         // Fold to the synthetic 'ultra' rung for display when Workflow is on (mirrors
         // effortSelectionForAgent / the dashboard's foldUltraEffort), so the live reply
         // badge and IM running footer label the turn 'ultra' instead of a bare 'max'.
-        const displayEffort = effort && getDriverCapabilities(cs.agent).workflow && this.workflowEnabledForAgent(cs.agent)
+        // Prefer the per-turn workflow choice when the caller threads one (dashboard
+        // composer sends ultra per-send without flipping the agent-global flag);
+        // fall back to the agent-global flag (IM /mode, agent card).
+        const workflowOn = opts?.workflowEnabled ?? this.workflowEnabledForAgent(cs.agent);
+        const displayEffort = effort && getDriverCapabilities(cs.agent).workflow && workflowOn
             ? 'ultra'
             : effort;
         return { model: model || null, effort: displayEffort };

package/dist/channels/telegram/bot.js CHANGED Viewed

@@ -19,7 +19,7 @@ import { buildAgentsCommandView, buildModelsCommandView, buildModeCommandView, b
 import { buildSwitchWorkdirView, buildWorkspacesView, resolveRegisteredPath } from './directory.js';
 import { LivePreview } from './live-preview.js';
 import { registerProcessRuntime, buildRestartCommand, requestProcessRestart, } from '../../core/process-control.js';
-import { buildInitialPreviewHtml, buildHumanLoopPromptHtml, buildAnsweredHumanLoopPromptHtml, buildStreamPreviewHtml, buildFinalReplyRender, dispatchImageBlocks, escapeHtml, formatMenuLines, formatProviderUsageLines, renderCommandNoticeHtml, renderCommandSelectionHtml, renderCommandSelectionKeyboard, renderSessionTurnHtml, truncateMiddle, } from './render.js';
+import { buildInitialPreviewHtml, buildHumanLoopPromptHtml, buildAnsweredHumanLoopPromptHtml, buildStreamPreviewHtml, buildFinalReplyRender, dispatchImageBlocks, escapeHtml, formatMenuLines, formatProviderUsageLines, renderCommandNoticeHtml, renderCommandSelectionHtml, renderCommandSelectionKeyboard, renderSessionTurnHtml, truncateMiddle, unpackCallbackData, } from './render.js';
 import { currentHumanLoopQuestion, humanLoopOptionSelected } from '../../bot/human-loop.js';
 import { TelegramChannel } from './channel.js';
 import { splitText, supportsChannelCapability } from '../base.js';
@@ -1016,7 +1016,7 @@ export class TelegramBot extends Bot {
         return false;
     }
     async handleSessionsPageCallback(data, ctx) {
-        const action = decodeCommandAction(data);
+        const action = decodeCommandAction(unpackCallbackData(data));
         if (!action)
             return false;
         const result = await executeCommandAction(this, ctx.chatId, action, {

package/dist/channels/telegram/render.js CHANGED Viewed

@@ -85,13 +85,59 @@ export function renderCommandSelectionHtml(view) {
         lines.push('', `<i>${escapeHtml(view.helperText)}</i>`);
     return lines.join('\n');
 }
+/**
+ * Telegram caps `callback_data` at 64 bytes. Most encoded actions fit easily,
+ * but BYOK model rows encode as `md:p:<uuid>:<modelId>` (~42 bytes of overhead
+ * before the model id even starts), so a single long provider/model id blows
+ * the limit — and Telegram then rejects the *entire* message with
+ * BUTTON_DATA_INVALID, killing the whole menu. Mirror the PathRegistry idiom
+ * from directory.ts: stash the over-length payload and ship a short `r:<id>`
+ * token instead, resolving it back on the callback round-trip.
+ */
+const TELEGRAM_CALLBACK_LIMIT = 64;
+class CallbackDataRegistry {
+    idToData = new Map();
+    dataToId = new Map();
+    nextId = 1;
+    pack(data) {
+        if (Buffer.byteLength(data, 'utf8') <= TELEGRAM_CALLBACK_LIMIT)
+            return data;
+        let id = this.dataToId.get(data);
+        if (id == null) {
+            id = this.nextId++;
+            this.dataToId.set(data, id);
+            this.idToData.set(id, data);
+            if (this.idToData.size > 500) {
+                for (const oldId of [...this.idToData.keys()].slice(0, 200)) {
+                    const oldData = this.idToData.get(oldId);
+                    this.idToData.delete(oldId);
+                    this.dataToId.delete(oldData);
+                }
+            }
+        }
+        return `r:${id}`;
+    }
+    unpack(data) {
+        if (!data.startsWith('r:'))
+            return data;
+        const id = Number.parseInt(data.slice(2), 10);
+        if (!Number.isFinite(id))
+            return data;
+        return this.idToData.get(id) ?? data;
+    }
+}
+const callbackDataRegistry = new CallbackDataRegistry();
+/** Resolve a `r:<id>` token back to its original encoded action payload. */
+export function unpackCallbackData(data) {
+    return callbackDataRegistry.unpack(data);
+}
 export function renderCommandSelectionKeyboard(view) {
     if (!view.rows.length)
         return undefined;
     return {
         inline_keyboard: view.rows.map(row => row.map(button => ({
             text: formatCommandButtonLabel(button),
-            callback_data: encodeCommandAction(button.action),
+            callback_data: callbackDataRegistry.pack(encodeCommandAction(button.action)),
         }))),
     };
 }

package/dist/core/constants.js CHANGED Viewed

@@ -289,6 +289,14 @@ export const AGENT_UPDATE_TIMEOUTS = {
     npmPrefix: 10_000,
     /** Timeout for `npm view <pkg> version`. */
     npmView: 20_000,
+    /** Max time an agent spawn waits for an in-flight reinstall of that agent's
+     *  own CLI to finish before exec'ing. A concurrent `npm install -g` / `brew
+     *  upgrade` (this process OR the prod self-bootstrap) briefly removes the bin
+     *  symlink, so racing it yields exit 127 "command not found"; the wait
+     *  resolves early the instant the install ends. */
+    spawnWait: 2 * 60_000,
+    /** Poll interval while a spawn waits out an in-flight reinstall. */
+    spawnWaitPoll: 200,
 };
 // ---------------------------------------------------------------------------
 // Code agent (shared layer)

package/dist/dashboard/routes/models.js CHANGED Viewed

@@ -18,7 +18,7 @@
  *   POST   /api/models/agents/:agent/active             → bind/unbind a Profile
  */
 import { Hono } from 'hono';
-import { getModelsDevCatalog, searchCatalogProviders, listProviders, getProvider, addProvider, updateProvider, removeProvider, setProviderValidation, listProfiles, getProfile, addProfile, updateProfile, removeProfile, getActiveProfileId, setActiveProfile, validateProvider, getProviderModelList, invalidateProviderModels, } from '../../model/index.js';
+import { getModelsDevCatalog, searchCatalogProviders, listProviders, getProvider, addProvider, updateProvider, removeProvider, setProviderValidation, listProfiles, getProfile, addProfile, updateProfile, removeProfile, getActiveProfileId, setActiveProfile, prewarmLocalModel, validateProvider, getProviderModelList, invalidateProviderModels, } from '../../model/index.js';
 import { isCredentialRef, describeCredentialRef } from '../../core/secrets/index.js';
 import { allDriverIds } from '../../agent/index.js';
 const router = new Hono();
@@ -315,6 +315,14 @@ router.post('/api/models/agents/:agent/active', async (c) => {
         return c.json({ ok: false, error: 'profileId (string|null) is required' }, 400);
     try {
         setActiveProfile(agent, profileId);
+        // Warm a local backend the instant it's selected, so the user's first turn
+        // skips the model cold-load. Fire-and-forget; never blocks the bind.
+        if (profileId) {
+            const profile = getProfile(profileId);
+            const provider = profile ? getProvider(profile.providerId) : null;
+            if (profile && provider)
+                prewarmLocalModel(provider, profile.modelId);
+        }
         return c.json({ ok: true, agent, activeProfileId: profileId });
     }
     catch (e) {

package/dist/dashboard/routes/sessions.js CHANGED Viewed

@@ -64,6 +64,29 @@ function enrichWithRuntimeStatus(sessions, bot) {
         };
     });
 }
+// Session list cards render only the *head* of these text fields (previews via
+// firstMeaningfulLine / slice / sanitize) and use them for client-side substring
+// search. A session whose last turn dumped a huge tool output or long answer would
+// otherwise ship tens of KB per card that the list never displays — on a busy
+// workspace the swim-lane ballooned to ~600KB, dominated by these fields. Cap each
+// to a preview length: previews are unchanged and search still matches the head.
+// Full text remains available from the session-detail / messages endpoints.
+const LIST_PREVIEW_FIELD_CAP = 2048;
+function capPreviewField(value) {
+    return typeof value === 'string' && value.length > LIST_PREVIEW_FIELD_CAP
+        ? value.slice(0, LIST_PREVIEW_FIELD_CAP)
+        : value;
+}
+/** Thin a session for list/swim-lane responses by capping its heavy preview text. */
+export function projectSessionForList(session) {
+    return {
+        ...session,
+        lastQuestion: capPreviewField(session.lastQuestion),
+        lastAnswer: capPreviewField(session.lastAnswer),
+        lastMessageText: capPreviewField(session.lastMessageText),
+        runDetail: capPreviewField(session.runDetail),
+    };
+}
 function readStringField(value) {
     return typeof value === 'string' ? value.trim() : '';
 }
@@ -172,6 +195,7 @@ app.get('/api/sessions/:agent', async (c) => {
     const result = await querySessions({ workdir, agent });
     const enriched = enrichWithRuntimeStatus(result.sessions, botRef);
     const paged = paginateSessionResult(enriched, page, limit);
+    paged.sessions = paged.sessions.map(projectSessionForList);
     runtime.debug(`[sessions] endpoint=single agent=${agent} ok=${result.ok} total=${result.total} ` +
         `returned=${paged.sessions.length} error=${result.errors.join('; ') || '(none)'}`);
     return c.json({
@@ -195,6 +219,7 @@ app.get('/api/sessions', async (c) => {
         const result = await querySessions({ workdir, agent: a.agent });
         const enriched = enrichWithRuntimeStatus(result.sessions, botRef);
         const paged = paginateSessionResult(enriched, page, limit);
+        paged.sessions = paged.sessions.map(projectSessionForList);
         swimLane[a.agent] = {
             ok: result.ok,
             error: result.errors[0] || null,

package/dist/dashboard/server.js CHANGED Viewed

@@ -3,6 +3,7 @@
  */
 import http from 'node:http';
 import { Hono } from 'hono';
+import { compress } from 'hono/compress';
 import { getRequestListener } from '@hono/node-server';
 import { serveStatic } from '@hono/node-server/serve-static';
 import path from 'node:path';
@@ -87,6 +88,13 @@ export async function startDashboard(opts = {}) {
     if (opts.bot)
         runtime.attachBot(opts.bot);
     const app = new Hono();
+    // -- Compression --
+    // gzip/deflate every compressible response (JSON API payloads, JS/CSS bundles,
+    // the HTML shell). Session message/list endpoints ship hundreds of KB of JSON;
+    // Vite chunks are another few hundred KB raw. The middleware skips already-
+    // compressed binary types (png/ico) by content-type, so the immutable image
+    // assets pay no CPU cost. Registered first so it wraps both routes and static.
+    app.use('*', compress());
     // -- API routes --
     app.route('/', configRoutes);
     app.route('/', agentRoutes);

package/dist/model/index.js CHANGED Viewed

@@ -16,5 +16,5 @@
 export { getModelsDevCatalog, getCatalogProvider, getCatalogModel, searchCatalogProviders, } from './catalog.js';
 export { listProviders, getProvider, addProvider, updateProvider, removeProvider, setProviderValidation, listProfiles, getProfile, addProfile, updateProfile, removeProfile, getActiveProfileId, getActiveProfile, setActiveProfile, } from './store.js';
 export { validateProvider } from './validation.js';
-export { resolveAgentInjection, isAgentBoundToProfile, } from './injector.js';
+export { resolveAgentInjection, isAgentBoundToProfile, prewarmLocalModel, } from './injector.js';
 export { getProviderModelList, invalidateProviderModels, peekProviderModelList, peekProviderModelInfo, prefetchProviderModels, } from './provider-models.js';

package/dist/model/injector.js CHANGED Viewed

@@ -7,6 +7,7 @@
  * = adding one entry to AGENT_INJECT_TABLE.
  */
 import { resolveCredential } from '../core/secrets/index.js';
+import { writeScopedLog } from '../core/logging.js';
 import { getActiveProfile, getProvider } from './store.js';
 import { peekProviderModelInfo, prefetchProviderModels } from './provider-models.js';
 import { ensureResponsesBridge, upstreamToken } from './responses-bridge.js';
@@ -249,6 +250,46 @@ function codexLocalProvider(provider) {
         return 'lmstudio';
     return 'ollama';
 }
+/** Ollama keeps a prewarmed model resident for this long (its `keep_alive`). */
+const PREWARM_KEEP_ALIVE = '30m';
+/**
+ * Warm a localhost model backend so the user's first real turn doesn't pay the
+ * model cold-load (weights → memory). Fire-and-forget: never blocks the caller,
+ * never throws.
+ *
+ *  - Ollama has a native load endpoint — `POST /api/generate {model, keep_alive}`
+ *    with no prompt loads the weights and returns immediately; `keep_alive`
+ *    keeps them resident across the seed + real turns of a session.
+ *  - LM Studio JIT-loads on first request, so we nudge it with a 1-token
+ *    completion against its OpenAI-compatible endpoint.
+ *
+ * Called when a local Profile is bound (warm while the user reads / types) and
+ * again at spawn (re-assert keep_alive). Measured: a cold gemma3:4b spent ~12s
+ * before its first token; prewarmed, generation starts in ~2s.
+ */
+export function prewarmLocalModel(provider, modelId) {
+    if (!modelId || !isLocalProvider(provider))
+        return;
+    let origin;
+    try {
+        origin = new URL(provider.baseURL).origin;
+    }
+    catch {
+        return;
+    }
+    const swallow = () => { };
+    if (codexLocalProvider(provider) === 'lmstudio') {
+        void fetch(`${origin}/v1/chat/completions`, {
+            method: 'POST', headers: { 'content-type': 'application/json' },
+            body: JSON.stringify({ model: modelId, max_tokens: 1, messages: [{ role: 'user', content: 'hi' }] }),
+        }).then(swallow, swallow);
+        return;
+    }
+    void fetch(`${origin}/api/generate`, {
+        method: 'POST', headers: { 'content-type': 'application/json' },
+        body: JSON.stringify({ model: modelId, keep_alive: PREWARM_KEEP_ALIVE }),
+    }).then(r => { writeScopedLog('model-prewarm', `ollama load ${modelId} → ${r.status}`); }, e => { writeScopedLog('model-prewarm', `ollama load ${modelId} failed: ${e?.message || e}`, { level: 'warn', stream: 'stderr' }); });
+}
 /**
  * Decide how codex should reach a provider. Codex 0.140+ speaks ONLY the
  * Responses API, so the route depends on what the provider implements:
@@ -295,6 +336,7 @@ const codexInjector = async (provider, profile, apiKey) => {
     // providers cannot be overridden.")
     if (route === 'local-oss') {
         const local = codexLocalProvider(provider);
+        prewarmLocalModel(provider, model);
         return {
             env: {}, argvAppend: [],
             codexConfigOverrides: [`model_provider="${local}"`],