pikiloom 0.4.14 → 0.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dashboard/dist/assets/AgentTab-CKoy_-w4.js +1 -0
  2. package/dashboard/dist/assets/{DirBrowser-B5hxg2zn.js → DirBrowser-DpbuN0OL.js} +1 -1
  3. package/dashboard/dist/assets/{ExtensionsTab-C2FAUsui.js → ExtensionsTab-ymr7K8dU.js} +1 -1
  4. package/dashboard/dist/assets/{IMAccessTab-CS-2-ENn.js → IMAccessTab-CaTtCn3l.js} +1 -1
  5. package/dashboard/dist/assets/{Modal-BF2CycPZ.js → Modal-DA-9kJxp.js} +1 -1
  6. package/dashboard/dist/assets/{Modals-BHYtxTUE.js → Modals-BkLIRnNK.js} +1 -1
  7. package/dashboard/dist/assets/Select-B0pZtuzF.js +1 -0
  8. package/dashboard/dist/assets/SessionPanel-CYQtZZNX.js +1 -0
  9. package/dashboard/dist/assets/{SystemTab-B_hq7KIo.js → SystemTab-B9TcGMzc.js} +1 -1
  10. package/dashboard/dist/assets/codex-C6EwIzap.png +0 -0
  11. package/dashboard/dist/assets/deepseek-DOQzDJ-4.ico +0 -0
  12. package/dashboard/dist/assets/hermes-ClPe1RPI.png +0 -0
  13. package/dashboard/dist/assets/{index-Dws-2k-J.js → index-BCYshErN.js} +3 -3
  14. package/dashboard/dist/assets/index-C5irxzzD.js +23 -0
  15. package/dashboard/dist/assets/logo-wordmark-B0Z6VgSZ.png +0 -0
  16. package/dashboard/dist/assets/logo-wordmark-light-D9FCWeOH.png +0 -0
  17. package/dashboard/dist/assets/playwright-GP3HuCap.ico +0 -0
  18. package/dashboard/dist/assets/qwen-DKVAROae.png +0 -0
  19. package/dashboard/dist/assets/shared-i_XUH0xm.js +1 -0
  20. package/dashboard/dist/index.html +1 -1
  21. package/dashboard/dist/logo.png +0 -0
  22. package/dist/agent/auto-update.js +99 -4
  23. package/dist/agent/drivers/claude.js +6 -26
  24. package/dist/agent/drivers/codex.js +4 -26
  25. package/dist/agent/drivers/gemini.js +4 -26
  26. package/dist/agent/drivers/hermes.js +4 -26
  27. package/dist/agent/index.js +1 -1
  28. package/dist/agent/session.js +16 -3
  29. package/dist/agent/stream.js +37 -3
  30. package/dist/bot/bot.js +18 -5
  31. package/dist/channels/telegram/bot.js +2 -2
  32. package/dist/channels/telegram/render.js +47 -1
  33. package/dist/core/constants.js +8 -0
  34. package/dist/dashboard/routes/models.js +9 -1
  35. package/dist/dashboard/routes/sessions.js +25 -0
  36. package/dist/dashboard/server.js +8 -0
  37. package/dist/model/index.js +1 -1
  38. package/dist/model/injector.js +42 -0
  39. package/dist/model/responses-bridge.js +129 -88
  40. package/package.json +1 -1
  41. package/dashboard/dist/assets/AgentTab-Ce9nOgKB.js +0 -1
  42. package/dashboard/dist/assets/Select--CwQ1vbY.js +0 -1
  43. package/dashboard/dist/assets/SessionPanel-D0h4d0Nw.js +0 -1
  44. package/dashboard/dist/assets/codex-DYadqqp0.png +0 -0
  45. package/dashboard/dist/assets/deepseek-BeYNZEk0.ico +0 -0
  46. package/dashboard/dist/assets/hermes-BAarh-tH.png +0 -0
  47. package/dashboard/dist/assets/index-jCpvbF9B.js +0 -23
  48. package/dashboard/dist/assets/logo-wordmark-FzeBAUsd.png +0 -0
  49. package/dashboard/dist/assets/logo-wordmark-light-snSpARTN.png +0 -0
  50. package/dashboard/dist/assets/playwright-BldPFZgC.ico +0 -0
  51. package/dashboard/dist/assets/qwen-xykkX0_y.png +0 -0
  52. package/dashboard/dist/assets/shared-D1ruCzXL.js +0 -1
@@ -16,7 +16,7 @@ import { join, extname } from 'node:path';
16
16
  import { resolve as resolvePath } from 'node:path';
17
17
  import { registerDriver } from '../driver.js';
18
18
  import { AcpClient, toAcpMcpServers } from '../acp-client.js';
19
- import { agentLog, agentWarn, emptyUsage, normalizeErrorMessage, listPikiloomSessions, findPikiloomSession, buildStreamPreviewMeta, applyTurnWindow, pushRecentActivity, IMAGE_EXTS, mimeForExt, } from '../index.js';
19
+ import { agentLog, agentWarn, emptyUsage, normalizeErrorMessage, listPikiloomSessions, managedRecordToSessionInfo, findPikiloomSession, buildStreamPreviewMeta, applyTurnWindow, pushRecentActivity, IMAGE_EXTS, mimeForExt, } from '../index.js';
20
20
  // Build the ACP `prompt` content array from the user's text + staged
21
21
  // attachments. Images become ImageContentBlocks (base64 + mimeType — the
22
22
  // shape Hermes' acp_adapter accepts and converts to OpenAI multimodal
@@ -370,32 +370,10 @@ async function getHermesSessions(workdir, limit) {
370
370
  // for the `hermes sessions` CLI but irrelevant to pikiloom, which always
371
371
  // creates its own ACP session per turn and records it under .pikiloom.
372
372
  const resolvedWorkdir = resolvePath(workdir);
373
+ // Canonical record→SessionInfo mapper (single source of truth) — see claude.ts.
374
+ // Hand-rolling dropped thinkingEffort/workflowEnabled/profileId.
373
375
  const records = listPikiloomSessions(resolvedWorkdir, 'hermes');
374
- const sessions = records.map(record => ({
375
- sessionId: record.sessionId,
376
- agent: 'hermes',
377
- workdir: record.workdir,
378
- workspacePath: record.workspacePath,
379
- threadId: record.threadId,
380
- model: record.model,
381
- createdAt: record.createdAt,
382
- title: record.title,
383
- running: record.runState === 'running',
384
- runState: record.runState,
385
- runDetail: record.runDetail,
386
- runUpdatedAt: record.runUpdatedAt,
387
- runPid: record.runPid,
388
- classification: record.classification,
389
- userStatus: record.userStatus,
390
- userNote: record.userNote,
391
- lastQuestion: record.lastQuestion,
392
- lastAnswer: record.lastAnswer,
393
- lastMessageText: record.lastMessageText,
394
- migratedFrom: record.migratedFrom,
395
- migratedTo: record.migratedTo,
396
- linkedSessions: record.linkedSessions,
397
- numTurns: record.numTurns ?? null,
398
- }));
376
+ const sessions = records.map(managedRecordToSessionInfo);
399
377
  sessions.sort((a, b) => Date.parse(b.createdAt || '') - Date.parse(a.createdAt || ''));
400
378
  const sliced = typeof limit === 'number' ? sessions.slice(0, limit) : sessions;
401
379
  agentLog(`[sessions:hermes] workdir=${resolvedWorkdir} pikiloom=${records.length} returned=${sliced.length}`);
@@ -23,7 +23,7 @@ export { attachAgentImage, attachInlineImage, materializeImage, rewriteImageBloc
23
23
  // ── Re-export: utilities ────────────────────────────────────────────────────
24
24
  export { Q, agentLog, agentWarn, agentError, dedupeStrings, numberOrNull, normalizeStreamPreviewPlan, parseTodoWriteAsPlan, normalizeActivityLine, pushRecentActivity, detectClaudeApiError, isRetryableClaudeApiError, detectClaudeModelError, claudeModelErrorMessage, firstNonEmptyLine, shortValue, normalizeErrorMessage, joinErrorMessages, appendSystemPrompt, mimeForExt, computeContext, buildStreamPreviewMeta, summarizeClaudeToolUse, summarizeClaudeToolResult, previewToolCallInput, previewToolCallResult, roundPercent, toIsoFromEpochSeconds, normalizeUsageStatus, labelFromWindowMinutes, usageWindowFromRateLimit, parseJsonTail, modelFamily, normalizeClaudeModelId, emptyUsage, readTailLines, stripInjectedPrompts, sanitizeSessionUserPreviewText, SESSION_PREVIEW_IMAGE_PLACEHOLDER_RE, CLAUDE_AT_MENTION_IMAGE_RE, extractClaudeAtMentionImagePaths, stripClaudeAtMentionImages, isPendingSessionId, emitSessionIdUpdate, sessionListDisplayTitle, } from './utils.js';
25
25
  // ── Re-export: session management ───────────────────────────────────────────
26
- export { updateSessionMeta, promoteSessionId, recordFork, listPikiloomSessions, findPikiloomSession, getSessionStoredConfig, ensureManagedSession, findManagedThreadSession, stageSessionFiles, mergeManagedAndNativeSessions, getSessions, getSessionTail, getSessionMessages, applyTurnWindow, applyTurnFilter, classifySession, deriveUserStatus, exportSession, importSession, deleteAgentSession, isProcessAlive, isRunningSessionStale, reconcileOrphanedRunningSessions, } from './session.js';
26
+ export { updateSessionMeta, promoteSessionId, recordFork, listPikiloomSessions, findPikiloomSession, getSessionStoredConfig, ensureManagedSession, findManagedThreadSession, stageSessionFiles, mergeManagedAndNativeSessions, managedRecordToSessionInfo, getSessions, getSessionTail, getSessionMessages, applyTurnWindow, applyTurnFilter, classifySession, deriveUserStatus, exportSession, importSession, deleteAgentSession, isProcessAlive, isRunningSessionStale, reconcileOrphanedRunningSessions, } from './session.js';
27
27
  // ── Re-export: stream & detection ───────────────────────────────────────────
28
28
  export { detectAgentBin, listAgents, resolveDefaultAgent, run, doStream, listModels, resolveAgentModels, getUsage, getAgentBoundModelId, setAgentBoundModelId, } from './stream.js';
29
29
  // ── Re-export: driver registry ──────────────────────────────────────────────
@@ -222,6 +222,7 @@ function normalizeSessionRecord(raw, workdir) {
222
222
  title: typeof raw?.title === 'string' && raw.title.trim() ? raw.title.trim() : null,
223
223
  model: typeof raw?.model === 'string' && raw.model.trim() ? raw.model.trim() : null,
224
224
  thinkingEffort: typeof raw?.thinkingEffort === 'string' && raw.thinkingEffort.trim() ? raw.thinkingEffort.trim() : null,
225
+ workflowEnabled: typeof raw?.workflowEnabled === 'boolean' ? raw.workflowEnabled : null,
225
226
  profileId: typeof raw?.profileId === 'string' && raw.profileId.trim() ? raw.profileId.trim() : null,
226
227
  stagedFiles: Array.isArray(raw?.stagedFiles) ? dedupeStrings(raw.stagedFiles.filter((v) => typeof v === 'string')) : [],
227
228
  lastUserAttachments: Array.isArray(raw?.lastUserAttachments)
@@ -299,7 +300,7 @@ function writeSessionMeta(record) {
299
300
  workspacePath: record.workspacePath,
300
301
  threadId: record.threadId,
301
302
  createdAt: record.createdAt, updatedAt: record.updatedAt,
302
- title: record.title, model: record.model, thinkingEffort: record.thinkingEffort, stagedFiles: record.stagedFiles,
303
+ title: record.title, model: record.model, thinkingEffort: record.thinkingEffort, workflowEnabled: record.workflowEnabled, stagedFiles: record.stagedFiles,
303
304
  runState: record.runState, runDetail: record.runDetail, runUpdatedAt: record.runUpdatedAt,
304
305
  runPid: record.runPid,
305
306
  classification: record.classification,
@@ -583,7 +584,7 @@ export function ensureSessionWorkspace(opts) {
583
584
  workspacePath: sessionWorkspacePath(workdir, opts.agent, sessionId),
584
585
  threadId,
585
586
  createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
586
- title: summarizePromptTitle(opts.title) || null, model: null, thinkingEffort: null, profileId: null, stagedFiles: [], lastUserAttachments: [],
587
+ title: summarizePromptTitle(opts.title) || null, model: null, thinkingEffort: null, workflowEnabled: null, profileId: null, stagedFiles: [], lastUserAttachments: [],
587
588
  runState: 'completed', runDetail: null, runUpdatedAt: new Date().toISOString(),
588
589
  runPid: null,
589
590
  classification: null, userStatus: null, userNote: null,
@@ -607,7 +608,7 @@ export function ensureSessionWorkspace(opts) {
607
608
  // ---------------------------------------------------------------------------
608
609
  // Record to SessionInfo
609
610
  // ---------------------------------------------------------------------------
610
- function managedRecordToSessionInfo(record) {
611
+ export function managedRecordToSessionInfo(record) {
611
612
  // Collapse pre-fix records that stored the canonical skill expansion as the
612
613
  // title / lastQuestion / lastMessageText. New records get collapsed at write
613
614
  // time in `prepareStreamOpts`; this read-time pass keeps existing sessions
@@ -623,6 +624,7 @@ function managedRecordToSessionInfo(record) {
623
624
  threadId: record.threadId,
624
625
  model: record.model,
625
626
  thinkingEffort: record.thinkingEffort,
627
+ workflowEnabled: record.workflowEnabled ?? null,
626
628
  profileId: record.profileId ?? null,
627
629
  createdAt: record.createdAt,
628
630
  title,
@@ -729,6 +731,7 @@ export function getSessionStoredConfig(workdir, agent, sessionId) {
729
731
  return {
730
732
  model: record?.model ?? null,
731
733
  thinkingEffort: record?.thinkingEffort ?? null,
734
+ workflowEnabled: record?.workflowEnabled ?? null,
732
735
  profileId: record?.profileId ?? null,
733
736
  };
734
737
  }
@@ -825,6 +828,16 @@ export function mergeManagedAndNativeSessions(managedSessions, nativeSessions) {
825
828
  runUpdatedAt: useNativeTimeline ? (native.runUpdatedAt ?? managed.runUpdatedAt) : (managed.runUpdatedAt ?? native.runUpdatedAt),
826
829
  title: native.title || managed.title,
827
830
  model: native.model || managed.model,
831
+ // Pikiloom-owned metadata: the native session file (Claude JSONL etc.)
832
+ // carries none of these, so the `...native` spread would clobber them with
833
+ // `undefined`/`null`. The managed record (our centralized index) is the
834
+ // source of truth — recover each like `model` above. Without this the list
835
+ // silently drops the user's per-session choices: effort/Workflow fold back
836
+ // to the global default (per-send `ultra` → `max` after the turn) and the
837
+ // BYOK Profile binding is lost on resume.
838
+ thinkingEffort: managed.thinkingEffort ?? native.thinkingEffort ?? null,
839
+ workflowEnabled: managed.workflowEnabled ?? native.workflowEnabled ?? null,
840
+ profileId: managed.profileId ?? native.profileId ?? null,
828
841
  createdAt: native.createdAt || managed.createdAt,
829
842
  classification: managed.classification ?? native.classification ?? null,
830
843
  userStatus: managed.userStatus ?? native.userStatus ?? null,
@@ -7,7 +7,8 @@ import fs from 'node:fs';
7
7
  import path from 'node:path';
8
8
  import { restartManagedBrowser } from '../browser-supervisor.js';
9
9
  import { terminateProcessTree } from '../core/process-control.js';
10
- import { AGENT_DETECT_TIMEOUTS, AGENT_STREAM_HARD_KILL_GRACE_MS } from '../core/constants.js';
10
+ import { AGENT_DETECT_TIMEOUTS, AGENT_STREAM_HARD_KILL_GRACE_MS, AGENT_UPDATE_TIMEOUTS } from '../core/constants.js';
11
+ import { awaitAgentUpdateIdle } from './auto-update.js';
11
12
  import { getDriver, allDrivers, getAcceptedProviderKinds, hasDriver } from './driver.js';
12
13
  import { resolveAgentInjection, getActiveProfile, getActiveProfileId, getProvider, updateProfile, listProfiles, } from '../model/index.js';
13
14
  import { Q, agentLog, agentWarn, agentError, joinErrorMessages, normalizeErrorMessage, buildStreamPreviewMeta, computeContext, shortValue, isPendingSessionId, dedupeStrings, normalizeStreamPreviewPlan, } from './utils.js';
@@ -417,12 +418,18 @@ function prepareStreamOpts(opts) {
417
418
  },
418
419
  };
419
420
  }
420
- function finalizeStreamResult(result, workdir, prompt, session) {
421
+ function finalizeStreamResult(result, workdir, prompt, session, workflowEnabled) {
421
422
  if (result.sessionId)
422
423
  syncManagedSessionIdentity(session, workdir, result.sessionId);
423
424
  session.record.model = result.model || session.record.model;
424
425
  if (result.thinkingEffort)
425
426
  session.record.thinkingEffort = result.thinkingEffort;
427
+ // Remember whether this turn ran with Workflow on so the synthetic `ultra`
428
+ // rung re-folds for display after the live stream ends and on resume — the
429
+ // stored `thinkingEffort` stays the concrete rung (e.g. `max`). `undefined`
430
+ // (driver invoked outside the bot) leaves the prior value untouched.
431
+ if (workflowEnabled !== undefined)
432
+ session.record.workflowEnabled = workflowEnabled;
426
433
  // Capture the BYOK Profile that was in effect for this run so a future
427
434
  // `session.switch` can re-bind it (null = native CLI auth).
428
435
  try {
@@ -544,13 +551,40 @@ export async function doStream(opts) {
544
551
  catch (e) {
545
552
  agentWarn(`[byok] failed to apply Profile injection: ${e?.message || e}`);
546
553
  }
554
+ // In-memory-first: stamp the turn's resolved reasoning rung + Workflow opt-in
555
+ // onto the centralized index NOW — before the agent CLI has flushed its own
556
+ // session file — so the session list/composer reflect the user's pick during
557
+ // the very first turn instead of only after finalizeStreamResult. The managed
558
+ // record is the single source of truth for this metadata and links to the
559
+ // native agent-session by id on promotion; finalize re-stamps it (plus the
560
+ // actual model) authoritatively at turn end.
561
+ try {
562
+ if (prepared.thinkingEffort) {
563
+ session.record.thinkingEffort = prepared.thinkingEffort.trim().toLowerCase() || session.record.thinkingEffort;
564
+ }
565
+ if (opts.claudeWorkflowEnabled !== undefined) {
566
+ session.record.workflowEnabled = opts.claudeWorkflowEnabled;
567
+ }
568
+ saveSessionRecord(opts.workdir, session.record);
569
+ }
570
+ catch (e) {
571
+ agentWarn(`[session] turn-start metadata stamp failed: ${e?.message || e}`);
572
+ }
547
573
  try {
548
574
  const driver = getDriver(prepared.agent);
549
575
  if (opts.forkOf && !driver.capabilities?.fork) {
550
576
  throw new Error(`Agent ${prepared.agent} does not support fork`);
551
577
  }
578
+ // A background agent-CLI auto-update (`npm install -g` / `brew upgrade`, by
579
+ // this process OR the `npx pikiloom@latest` self-bootstrap) briefly removes
580
+ // the bin while it relinks; exec'ing into that window fails with exit 127
581
+ // "command not found". Wait out any in-flight reinstall of THIS agent before
582
+ // dispatching to the driver — this is the one chokepoint every agent turn
583
+ // (claude -p, claude TUI, codex app-server, gemini) passes through. No-op
584
+ // when nothing is updating.
585
+ await awaitAgentUpdateIdle(prepared.agent, AGENT_UPDATE_TIMEOUTS.spawnWait);
552
586
  const result = await driver.doStream(prepared);
553
- const finalized = finalizeStreamResult(result, opts.workdir, opts.prompt, session);
587
+ const finalized = finalizeStreamResult(result, opts.workdir, opts.prompt, session, opts.claudeWorkflowEnabled);
554
588
  // Once the child has its real session ID, link the lineage. We do this
555
589
  // after finalize so the child record is persisted with its native ID.
556
590
  if (opts.forkOf && finalized.sessionId) {
package/dist/bot/bot.js CHANGED
@@ -228,6 +228,12 @@ export class Bot {
228
228
  */
229
229
  enrichSnapshot(snap) {
230
230
  let next = snap;
231
+ // Attach the running turn's prompt so a watching terminal can render the
232
+ // user message for a follow-up it didn't originate (no local optimistic
233
+ // bubble). The RunningTask record is the source of truth while it's live.
234
+ const runningPrompt = next.taskId ? this.activeTasks.get(next.taskId)?.prompt : '';
235
+ if (runningPrompt)
236
+ next = { ...next, question: collapseSkillPrompt(runningPrompt) ?? runningPrompt };
231
237
  if (next.queuedTaskIds?.length) {
232
238
  const queuedTasks = next.queuedTaskIds.map(taskId => {
233
239
  const raw = this.activeTasks.get(taskId)?.prompt || '';
@@ -446,8 +452,8 @@ export class Bot {
446
452
  emitStreamQueued(sessionKey, taskId) {
447
453
  this.emitStream(sessionKey, { type: 'queued', taskId, position: this.getQueuePosition(sessionKey, taskId) });
448
454
  }
449
- emitStreamStart(taskId, session) {
450
- const cfg = this.resolveSessionStreamConfig(session);
455
+ emitStreamStart(taskId, session, opts) {
456
+ const cfg = this.resolveSessionStreamConfig(session, opts);
451
457
  const key = this.liveSessionKey(taskId, session.key);
452
458
  this.debug(`[stream-lifecycle] start task=${taskId} key=${key} sessionId=${session.sessionId || '(pending)'} model=${cfg.model || '-'}`);
453
459
  this.emitStream(key, {
@@ -1422,7 +1428,10 @@ export class Bot {
1422
1428
  this.finishTask(taskId);
1423
1429
  return;
1424
1430
  }
1425
- this.emitStreamStart(taskId, session);
1431
+ // Thread the per-send Workflow choice so the live divider folds to `ultra`
1432
+ // immediately (the dashboard composer picks ultra per-send without flipping
1433
+ // the agent-global flag resolveSessionStreamConfig would otherwise read).
1434
+ this.emitStreamStart(taskId, session, { workflowEnabled: opts.workflowEnabled });
1426
1435
  // Wire up IM rendering for non-dashboard chats so /goal-driven tasks stream
1427
1436
  // to the same channel that submitted them, matching handleMessage's UX.
1428
1437
  const presenter = chatId !== 'dashboard'
@@ -1985,7 +1994,7 @@ export class Bot {
1985
1994
  * Mirrors the fallback chain used inside runStream() so callers (e.g. submitSessionTask
1986
1995
  * emitting a 'start' event) can label the active turn before runStream resolves it.
1987
1996
  */
1988
- resolveSessionStreamConfig(cs) {
1997
+ resolveSessionStreamConfig(cs, opts) {
1989
1998
  const agentConfig = this.agentConfigs[cs.agent] || {};
1990
1999
  const sessionWorkdir = cs.workdir || this.workdir;
1991
2000
  const storedConfig = cs.sessionId && !isPendingSessionId(cs.sessionId)
@@ -2003,7 +2012,11 @@ export class Bot {
2003
2012
  // Fold to the synthetic 'ultra' rung for display when Workflow is on (mirrors
2004
2013
  // effortSelectionForAgent / the dashboard's foldUltraEffort), so the live reply
2005
2014
  // badge and IM running footer label the turn 'ultra' instead of a bare 'max'.
2006
- const displayEffort = effort && getDriverCapabilities(cs.agent).workflow && this.workflowEnabledForAgent(cs.agent)
2015
+ // Prefer the per-turn workflow choice when the caller threads one (dashboard
2016
+ // composer sends ultra per-send without flipping the agent-global flag);
2017
+ // fall back to the agent-global flag (IM /mode, agent card).
2018
+ const workflowOn = opts?.workflowEnabled ?? this.workflowEnabledForAgent(cs.agent);
2019
+ const displayEffort = effort && getDriverCapabilities(cs.agent).workflow && workflowOn
2007
2020
  ? 'ultra'
2008
2021
  : effort;
2009
2022
  return { model: model || null, effort: displayEffort };
@@ -19,7 +19,7 @@ import { buildAgentsCommandView, buildModelsCommandView, buildModeCommandView, b
19
19
  import { buildSwitchWorkdirView, buildWorkspacesView, resolveRegisteredPath } from './directory.js';
20
20
  import { LivePreview } from './live-preview.js';
21
21
  import { registerProcessRuntime, buildRestartCommand, requestProcessRestart, } from '../../core/process-control.js';
22
- import { buildInitialPreviewHtml, buildHumanLoopPromptHtml, buildAnsweredHumanLoopPromptHtml, buildStreamPreviewHtml, buildFinalReplyRender, dispatchImageBlocks, escapeHtml, formatMenuLines, formatProviderUsageLines, renderCommandNoticeHtml, renderCommandSelectionHtml, renderCommandSelectionKeyboard, renderSessionTurnHtml, truncateMiddle, } from './render.js';
22
+ import { buildInitialPreviewHtml, buildHumanLoopPromptHtml, buildAnsweredHumanLoopPromptHtml, buildStreamPreviewHtml, buildFinalReplyRender, dispatchImageBlocks, escapeHtml, formatMenuLines, formatProviderUsageLines, renderCommandNoticeHtml, renderCommandSelectionHtml, renderCommandSelectionKeyboard, renderSessionTurnHtml, truncateMiddle, unpackCallbackData, } from './render.js';
23
23
  import { currentHumanLoopQuestion, humanLoopOptionSelected } from '../../bot/human-loop.js';
24
24
  import { TelegramChannel } from './channel.js';
25
25
  import { splitText, supportsChannelCapability } from '../base.js';
@@ -1016,7 +1016,7 @@ export class TelegramBot extends Bot {
1016
1016
  return false;
1017
1017
  }
1018
1018
  async handleSessionsPageCallback(data, ctx) {
1019
- const action = decodeCommandAction(data);
1019
+ const action = decodeCommandAction(unpackCallbackData(data));
1020
1020
  if (!action)
1021
1021
  return false;
1022
1022
  const result = await executeCommandAction(this, ctx.chatId, action, {
@@ -85,13 +85,59 @@ export function renderCommandSelectionHtml(view) {
85
85
  lines.push('', `<i>${escapeHtml(view.helperText)}</i>`);
86
86
  return lines.join('\n');
87
87
  }
88
+ /**
89
+ * Telegram caps `callback_data` at 64 bytes. Most encoded actions fit easily,
90
+ * but BYOK model rows encode as `md:p:<uuid>:<modelId>` (~42 bytes of overhead
91
+ * before the model id even starts), so a single long provider/model id blows
92
+ * the limit — and Telegram then rejects the *entire* message with
93
+ * BUTTON_DATA_INVALID, killing the whole menu. Mirror the PathRegistry idiom
94
+ * from directory.ts: stash the over-length payload and ship a short `r:<id>`
95
+ * token instead, resolving it back on the callback round-trip.
96
+ */
97
+ const TELEGRAM_CALLBACK_LIMIT = 64;
98
+ class CallbackDataRegistry {
99
+ idToData = new Map();
100
+ dataToId = new Map();
101
+ nextId = 1;
102
+ pack(data) {
103
+ if (Buffer.byteLength(data, 'utf8') <= TELEGRAM_CALLBACK_LIMIT)
104
+ return data;
105
+ let id = this.dataToId.get(data);
106
+ if (id == null) {
107
+ id = this.nextId++;
108
+ this.dataToId.set(data, id);
109
+ this.idToData.set(id, data);
110
+ if (this.idToData.size > 500) {
111
+ for (const oldId of [...this.idToData.keys()].slice(0, 200)) {
112
+ const oldData = this.idToData.get(oldId);
113
+ this.idToData.delete(oldId);
114
+ this.dataToId.delete(oldData);
115
+ }
116
+ }
117
+ }
118
+ return `r:${id}`;
119
+ }
120
+ unpack(data) {
121
+ if (!data.startsWith('r:'))
122
+ return data;
123
+ const id = Number.parseInt(data.slice(2), 10);
124
+ if (!Number.isFinite(id))
125
+ return data;
126
+ return this.idToData.get(id) ?? data;
127
+ }
128
+ }
129
+ const callbackDataRegistry = new CallbackDataRegistry();
130
+ /** Resolve a `r:<id>` token back to its original encoded action payload. */
131
+ export function unpackCallbackData(data) {
132
+ return callbackDataRegistry.unpack(data);
133
+ }
88
134
  export function renderCommandSelectionKeyboard(view) {
89
135
  if (!view.rows.length)
90
136
  return undefined;
91
137
  return {
92
138
  inline_keyboard: view.rows.map(row => row.map(button => ({
93
139
  text: formatCommandButtonLabel(button),
94
- callback_data: encodeCommandAction(button.action),
140
+ callback_data: callbackDataRegistry.pack(encodeCommandAction(button.action)),
95
141
  }))),
96
142
  };
97
143
  }
@@ -289,6 +289,14 @@ export const AGENT_UPDATE_TIMEOUTS = {
289
289
  npmPrefix: 10_000,
290
290
  /** Timeout for `npm view <pkg> version`. */
291
291
  npmView: 20_000,
292
+ /** Max time an agent spawn waits for an in-flight reinstall of that agent's
293
+ * own CLI to finish before exec'ing. A concurrent `npm install -g` / `brew
294
+ * upgrade` (this process OR the prod self-bootstrap) briefly removes the bin
295
+ * symlink, so racing it yields exit 127 "command not found"; the wait
296
+ * resolves early the instant the install ends. */
297
+ spawnWait: 2 * 60_000,
298
+ /** Poll interval while a spawn waits out an in-flight reinstall. */
299
+ spawnWaitPoll: 200,
292
300
  };
293
301
  // ---------------------------------------------------------------------------
294
302
  // Code agent (shared layer)
@@ -18,7 +18,7 @@
18
18
  * POST /api/models/agents/:agent/active → bind/unbind a Profile
19
19
  */
20
20
  import { Hono } from 'hono';
21
- import { getModelsDevCatalog, searchCatalogProviders, listProviders, getProvider, addProvider, updateProvider, removeProvider, setProviderValidation, listProfiles, getProfile, addProfile, updateProfile, removeProfile, getActiveProfileId, setActiveProfile, validateProvider, getProviderModelList, invalidateProviderModels, } from '../../model/index.js';
21
+ import { getModelsDevCatalog, searchCatalogProviders, listProviders, getProvider, addProvider, updateProvider, removeProvider, setProviderValidation, listProfiles, getProfile, addProfile, updateProfile, removeProfile, getActiveProfileId, setActiveProfile, prewarmLocalModel, validateProvider, getProviderModelList, invalidateProviderModels, } from '../../model/index.js';
22
22
  import { isCredentialRef, describeCredentialRef } from '../../core/secrets/index.js';
23
23
  import { allDriverIds } from '../../agent/index.js';
24
24
  const router = new Hono();
@@ -315,6 +315,14 @@ router.post('/api/models/agents/:agent/active', async (c) => {
315
315
  return c.json({ ok: false, error: 'profileId (string|null) is required' }, 400);
316
316
  try {
317
317
  setActiveProfile(agent, profileId);
318
+ // Warm a local backend the instant it's selected, so the user's first turn
319
+ // skips the model cold-load. Fire-and-forget; never blocks the bind.
320
+ if (profileId) {
321
+ const profile = getProfile(profileId);
322
+ const provider = profile ? getProvider(profile.providerId) : null;
323
+ if (profile && provider)
324
+ prewarmLocalModel(provider, profile.modelId);
325
+ }
318
326
  return c.json({ ok: true, agent, activeProfileId: profileId });
319
327
  }
320
328
  catch (e) {
@@ -64,6 +64,29 @@ function enrichWithRuntimeStatus(sessions, bot) {
64
64
  };
65
65
  });
66
66
  }
67
+ // Session list cards render only the *head* of these text fields (previews via
68
+ // firstMeaningfulLine / slice / sanitize) and use them for client-side substring
69
+ // search. A session whose last turn dumped a huge tool output or long answer would
70
+ // otherwise ship tens of KB per card that the list never displays — on a busy
71
+ // workspace the swim-lane ballooned to ~600KB, dominated by these fields. Cap each
72
+ // to a preview length: previews are unchanged and search still matches the head.
73
+ // Full text remains available from the session-detail / messages endpoints.
74
+ const LIST_PREVIEW_FIELD_CAP = 2048;
75
+ function capPreviewField(value) {
76
+ return typeof value === 'string' && value.length > LIST_PREVIEW_FIELD_CAP
77
+ ? value.slice(0, LIST_PREVIEW_FIELD_CAP)
78
+ : value;
79
+ }
80
+ /** Thin a session for list/swim-lane responses by capping its heavy preview text. */
81
+ export function projectSessionForList(session) {
82
+ return {
83
+ ...session,
84
+ lastQuestion: capPreviewField(session.lastQuestion),
85
+ lastAnswer: capPreviewField(session.lastAnswer),
86
+ lastMessageText: capPreviewField(session.lastMessageText),
87
+ runDetail: capPreviewField(session.runDetail),
88
+ };
89
+ }
67
90
  function readStringField(value) {
68
91
  return typeof value === 'string' ? value.trim() : '';
69
92
  }
@@ -172,6 +195,7 @@ app.get('/api/sessions/:agent', async (c) => {
172
195
  const result = await querySessions({ workdir, agent });
173
196
  const enriched = enrichWithRuntimeStatus(result.sessions, botRef);
174
197
  const paged = paginateSessionResult(enriched, page, limit);
198
+ paged.sessions = paged.sessions.map(projectSessionForList);
175
199
  runtime.debug(`[sessions] endpoint=single agent=${agent} ok=${result.ok} total=${result.total} ` +
176
200
  `returned=${paged.sessions.length} error=${result.errors.join('; ') || '(none)'}`);
177
201
  return c.json({
@@ -195,6 +219,7 @@ app.get('/api/sessions', async (c) => {
195
219
  const result = await querySessions({ workdir, agent: a.agent });
196
220
  const enriched = enrichWithRuntimeStatus(result.sessions, botRef);
197
221
  const paged = paginateSessionResult(enriched, page, limit);
222
+ paged.sessions = paged.sessions.map(projectSessionForList);
198
223
  swimLane[a.agent] = {
199
224
  ok: result.ok,
200
225
  error: result.errors[0] || null,
@@ -3,6 +3,7 @@
3
3
  */
4
4
  import http from 'node:http';
5
5
  import { Hono } from 'hono';
6
+ import { compress } from 'hono/compress';
6
7
  import { getRequestListener } from '@hono/node-server';
7
8
  import { serveStatic } from '@hono/node-server/serve-static';
8
9
  import path from 'node:path';
@@ -87,6 +88,13 @@ export async function startDashboard(opts = {}) {
87
88
  if (opts.bot)
88
89
  runtime.attachBot(opts.bot);
89
90
  const app = new Hono();
91
+ // -- Compression --
92
+ // gzip/deflate every compressible response (JSON API payloads, JS/CSS bundles,
93
+ // the HTML shell). Session message/list endpoints ship hundreds of KB of JSON;
94
+ // Vite chunks are another few hundred KB raw. The middleware skips already-
95
+ // compressed binary types (png/ico) by content-type, so the immutable image
96
+ // assets pay no CPU cost. Registered first so it wraps both routes and static.
97
+ app.use('*', compress());
90
98
  // -- API routes --
91
99
  app.route('/', configRoutes);
92
100
  app.route('/', agentRoutes);
@@ -16,5 +16,5 @@
16
16
  export { getModelsDevCatalog, getCatalogProvider, getCatalogModel, searchCatalogProviders, } from './catalog.js';
17
17
  export { listProviders, getProvider, addProvider, updateProvider, removeProvider, setProviderValidation, listProfiles, getProfile, addProfile, updateProfile, removeProfile, getActiveProfileId, getActiveProfile, setActiveProfile, } from './store.js';
18
18
  export { validateProvider } from './validation.js';
19
- export { resolveAgentInjection, isAgentBoundToProfile, } from './injector.js';
19
+ export { resolveAgentInjection, isAgentBoundToProfile, prewarmLocalModel, } from './injector.js';
20
20
  export { getProviderModelList, invalidateProviderModels, peekProviderModelList, peekProviderModelInfo, prefetchProviderModels, } from './provider-models.js';
@@ -7,6 +7,7 @@
7
7
  * = adding one entry to AGENT_INJECT_TABLE.
8
8
  */
9
9
  import { resolveCredential } from '../core/secrets/index.js';
10
+ import { writeScopedLog } from '../core/logging.js';
10
11
  import { getActiveProfile, getProvider } from './store.js';
11
12
  import { peekProviderModelInfo, prefetchProviderModels } from './provider-models.js';
12
13
  import { ensureResponsesBridge, upstreamToken } from './responses-bridge.js';
@@ -249,6 +250,46 @@ function codexLocalProvider(provider) {
249
250
  return 'lmstudio';
250
251
  return 'ollama';
251
252
  }
253
+ /** Ollama keeps a prewarmed model resident for this long (its `keep_alive`). */
254
+ const PREWARM_KEEP_ALIVE = '30m';
255
+ /**
256
+ * Warm a localhost model backend so the user's first real turn doesn't pay the
257
+ * model cold-load (weights → memory). Fire-and-forget: never blocks the caller,
258
+ * never throws.
259
+ *
260
+ * - Ollama has a native load endpoint — `POST /api/generate {model, keep_alive}`
261
+ * with no prompt loads the weights and returns immediately; `keep_alive`
262
+ * keeps them resident across the seed + real turns of a session.
263
+ * - LM Studio JIT-loads on first request, so we nudge it with a 1-token
264
+ * completion against its OpenAI-compatible endpoint.
265
+ *
266
+ * Called when a local Profile is bound (warm while the user reads / types) and
267
+ * again at spawn (re-assert keep_alive). Measured: a cold gemma3:4b spent ~12s
268
+ * before its first token; prewarmed, generation starts in ~2s.
269
+ */
270
+ export function prewarmLocalModel(provider, modelId) {
271
+ if (!modelId || !isLocalProvider(provider))
272
+ return;
273
+ let origin;
274
+ try {
275
+ origin = new URL(provider.baseURL).origin;
276
+ }
277
+ catch {
278
+ return;
279
+ }
280
+ const swallow = () => { };
281
+ if (codexLocalProvider(provider) === 'lmstudio') {
282
+ void fetch(`${origin}/v1/chat/completions`, {
283
+ method: 'POST', headers: { 'content-type': 'application/json' },
284
+ body: JSON.stringify({ model: modelId, max_tokens: 1, messages: [{ role: 'user', content: 'hi' }] }),
285
+ }).then(swallow, swallow);
286
+ return;
287
+ }
288
+ void fetch(`${origin}/api/generate`, {
289
+ method: 'POST', headers: { 'content-type': 'application/json' },
290
+ body: JSON.stringify({ model: modelId, keep_alive: PREWARM_KEEP_ALIVE }),
291
+ }).then(r => { writeScopedLog('model-prewarm', `ollama load ${modelId} → ${r.status}`); }, e => { writeScopedLog('model-prewarm', `ollama load ${modelId} failed: ${e?.message || e}`, { level: 'warn', stream: 'stderr' }); });
292
+ }
252
293
  /**
253
294
  * Decide how codex should reach a provider. Codex 0.140+ speaks ONLY the
254
295
  * Responses API, so the route depends on what the provider implements:
@@ -295,6 +336,7 @@ const codexInjector = async (provider, profile, apiKey) => {
295
336
  // providers cannot be overridden.")
296
337
  if (route === 'local-oss') {
297
338
  const local = codexLocalProvider(provider);
339
+ prewarmLocalModel(provider, model);
298
340
  return {
299
341
  env: {}, argvAppend: [],
300
342
  codexConfigOverrides: [`model_provider="${local}"`],