pikiloom 0.4.14 → 0.4.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dashboard/dist/assets/AgentTab-B5tmLxa7.js +1 -0
  2. package/dashboard/dist/assets/{DirBrowser-B5hxg2zn.js → DirBrowser-CBp5nyfS.js} +1 -1
  3. package/dashboard/dist/assets/{ExtensionsTab-C2FAUsui.js → ExtensionsTab-w4pkrNas.js} +1 -1
  4. package/dashboard/dist/assets/{IMAccessTab-CS-2-ENn.js → IMAccessTab-37Po5LP1.js} +1 -1
  5. package/dashboard/dist/assets/{Modal-BF2CycPZ.js → Modal-CBMO5UcS.js} +1 -1
  6. package/dashboard/dist/assets/{Modals-BHYtxTUE.js → Modals-DMlEjJUG.js} +1 -1
  7. package/dashboard/dist/assets/Select-BiSTkS_t.js +1 -0
  8. package/dashboard/dist/assets/SessionPanel-BVC7kwlX.js +1 -0
  9. package/dashboard/dist/assets/{SystemTab-B_hq7KIo.js → SystemTab-Brzt5wTT.js} +1 -1
  10. package/dashboard/dist/assets/codex-C6EwIzap.png +0 -0
  11. package/dashboard/dist/assets/deepseek-DOQzDJ-4.ico +0 -0
  12. package/dashboard/dist/assets/hermes-ClPe1RPI.png +0 -0
  13. package/dashboard/dist/assets/index-5Q-Q7ByM.js +3 -0
  14. package/dashboard/dist/assets/index-Dw3ty4QY.js +23 -0
  15. package/dashboard/dist/assets/logo-wordmark-B0Z6VgSZ.png +0 -0
  16. package/dashboard/dist/assets/logo-wordmark-light-D9FCWeOH.png +0 -0
  17. package/dashboard/dist/assets/playwright-GP3HuCap.ico +0 -0
  18. package/dashboard/dist/assets/qwen-DKVAROae.png +0 -0
  19. package/dashboard/dist/assets/shared-P-W1OYQ6.js +1 -0
  20. package/dashboard/dist/index.html +1 -1
  21. package/dashboard/dist/logo.png +0 -0
  22. package/dist/agent/auto-update.js +99 -4
  23. package/dist/agent/drivers/claude.js +6 -26
  24. package/dist/agent/drivers/codex.js +4 -26
  25. package/dist/agent/drivers/gemini.js +4 -26
  26. package/dist/agent/drivers/hermes.js +4 -26
  27. package/dist/agent/index.js +1 -1
  28. package/dist/agent/mcp/bridge.js +201 -7
  29. package/dist/agent/mcp/extensions.js +20 -9
  30. package/dist/agent/session.js +16 -3
  31. package/dist/agent/stream.js +40 -5
  32. package/dist/bot/bot.js +18 -5
  33. package/dist/channels/telegram/bot.js +2 -2
  34. package/dist/channels/telegram/render.js +47 -1
  35. package/dist/core/constants.js +8 -0
  36. package/dist/dashboard/routes/config.js +134 -12
  37. package/dist/dashboard/routes/models.js +9 -1
  38. package/dist/dashboard/routes/sessions.js +25 -0
  39. package/dist/dashboard/server.js +8 -0
  40. package/dist/model/index.js +1 -1
  41. package/dist/model/injector.js +42 -0
  42. package/dist/model/responses-bridge.js +129 -88
  43. package/package.json +1 -1
  44. package/dashboard/dist/assets/AgentTab-Ce9nOgKB.js +0 -1
  45. package/dashboard/dist/assets/Select--CwQ1vbY.js +0 -1
  46. package/dashboard/dist/assets/SessionPanel-D0h4d0Nw.js +0 -1
  47. package/dashboard/dist/assets/codex-DYadqqp0.png +0 -0
  48. package/dashboard/dist/assets/deepseek-BeYNZEk0.ico +0 -0
  49. package/dashboard/dist/assets/hermes-BAarh-tH.png +0 -0
  50. package/dashboard/dist/assets/index-Dws-2k-J.js +0 -3
  51. package/dashboard/dist/assets/index-jCpvbF9B.js +0 -23
  52. package/dashboard/dist/assets/logo-wordmark-FzeBAUsd.png +0 -0
  53. package/dashboard/dist/assets/logo-wordmark-light-snSpARTN.png +0 -0
  54. package/dashboard/dist/assets/playwright-BldPFZgC.ico +0 -0
  55. package/dashboard/dist/assets/qwen-xykkX0_y.png +0 -0
  56. package/dashboard/dist/assets/shared-D1ruCzXL.js +0 -1
@@ -85,13 +85,59 @@ export function renderCommandSelectionHtml(view) {
85
85
  lines.push('', `<i>${escapeHtml(view.helperText)}</i>`);
86
86
  return lines.join('\n');
87
87
  }
88
+ /**
89
+ * Telegram caps `callback_data` at 64 bytes. Most encoded actions fit easily,
90
+ * but BYOK model rows encode as `md:p:<uuid>:<modelId>` (~42 bytes of overhead
91
+ * before the model id even starts), so a single long provider/model id blows
92
+ * the limit — and Telegram then rejects the *entire* message with
93
+ * BUTTON_DATA_INVALID, killing the whole menu. Mirror the PathRegistry idiom
94
+ * from directory.ts: stash the over-length payload and ship a short `r:<id>`
95
+ * token instead, resolving it back on the callback round-trip.
96
+ */
97
+ const TELEGRAM_CALLBACK_LIMIT = 64;
98
+ class CallbackDataRegistry {
99
+ idToData = new Map();
100
+ dataToId = new Map();
101
+ nextId = 1;
102
+ pack(data) {
103
+ if (Buffer.byteLength(data, 'utf8') <= TELEGRAM_CALLBACK_LIMIT)
104
+ return data;
105
+ let id = this.dataToId.get(data);
106
+ if (id == null) {
107
+ id = this.nextId++;
108
+ this.dataToId.set(data, id);
109
+ this.idToData.set(id, data);
110
+ if (this.idToData.size > 500) {
111
+ for (const oldId of [...this.idToData.keys()].slice(0, 200)) {
112
+ const oldData = this.idToData.get(oldId);
113
+ this.idToData.delete(oldId);
114
+ this.dataToId.delete(oldData);
115
+ }
116
+ }
117
+ }
118
+ return `r:${id}`;
119
+ }
120
+ unpack(data) {
121
+ if (!data.startsWith('r:'))
122
+ return data;
123
+ const id = Number.parseInt(data.slice(2), 10);
124
+ if (!Number.isFinite(id))
125
+ return data;
126
+ return this.idToData.get(id) ?? data;
127
+ }
128
+ }
129
+ const callbackDataRegistry = new CallbackDataRegistry();
130
+ /** Resolve a `r:<id>` token back to its original encoded action payload. */
131
+ export function unpackCallbackData(data) {
132
+ return callbackDataRegistry.unpack(data);
133
+ }
88
134
  export function renderCommandSelectionKeyboard(view) {
89
135
  if (!view.rows.length)
90
136
  return undefined;
91
137
  return {
92
138
  inline_keyboard: view.rows.map(row => row.map(button => ({
93
139
  text: formatCommandButtonLabel(button),
94
- callback_data: encodeCommandAction(button.action),
140
+ callback_data: callbackDataRegistry.pack(encodeCommandAction(button.action)),
95
141
  }))),
96
142
  };
97
143
  }
@@ -289,6 +289,14 @@ export const AGENT_UPDATE_TIMEOUTS = {
289
289
  npmPrefix: 10_000,
290
290
  /** Timeout for `npm view <pkg> version`. */
291
291
  npmView: 20_000,
292
+ /** Max time an agent spawn waits for an in-flight reinstall of that agent's
293
+ * own CLI to finish before exec'ing. A concurrent `npm install -g` / `brew
294
+ * upgrade` (this process OR the prod self-bootstrap) briefly removes the bin
295
+ * symlink, so racing it yields exit 127 "command not found"; the wait
296
+ * resolves early the instant the install ends. */
297
+ spawnWait: 2 * 60_000,
298
+ /** Poll interval while a spawn waits out an in-flight reinstall. */
299
+ spawnWaitPoll: 200,
292
300
  };
293
301
  // ---------------------------------------------------------------------------
294
302
  // Code agent (shared layer)
@@ -6,8 +6,9 @@ import fs from 'node:fs';
6
6
  import path from 'node:path';
7
7
  import os from 'node:os';
8
8
  import { spawn, spawnSync } from 'node:child_process';
9
+ import { fileURLToPath } from 'node:url';
9
10
  import { loadUserConfig, saveUserConfig, applyUserConfig, hasUserConfigFile } from '../../core/config/user-config.js';
10
- import { expandTilde } from '../../core/platform.js';
11
+ import { expandTilde, whichSync } from '../../core/platform.js';
11
12
  import { readGitStatus } from '../../core/git.js';
12
13
  import { isSetupReady } from '../../cli/onboarding.js';
13
14
  import { validateDingtalkConfig, validateDiscordConfig, validateFeishuConfig, validateSlackConfig, validateTelegramConfig, validateWecomConfig, validateWeixinConfig, } from '../../core/config/validation.js';
@@ -66,7 +67,96 @@ function runOpenCommand(command, args) {
66
67
  throw new Error(detail || `Failed to run ${command} ${args.join(' ')}`);
67
68
  }
68
69
  }
69
- function openPathWithTarget(filePath, target, isDirectory) {
70
+ function stripOpenPathWrapping(value) {
71
+ let text = value.trim();
72
+ const pairs = [['`', '`'], ['"', '"'], ["'", "'"], ['<', '>']];
73
+ let changed = true;
74
+ while (changed && text.length >= 2) {
75
+ changed = false;
76
+ for (const [left, right] of pairs) {
77
+ if (text.startsWith(left) && text.endsWith(right)) {
78
+ text = text.slice(left.length, -right.length).trim();
79
+ changed = true;
80
+ }
81
+ }
82
+ }
83
+ return text;
84
+ }
85
+ function decodeOpenPathInput(raw) {
86
+ const text = stripOpenPathWrapping(raw);
87
+ if (text.startsWith('file://')) {
88
+ try {
89
+ return fileURLToPath(text);
90
+ }
91
+ catch {
92
+ return decodeURI(text.slice('file://'.length));
93
+ }
94
+ }
95
+ if (text.startsWith('vscode://file/')) {
96
+ return decodeURI(`/${text.slice('vscode://file/'.length)}`);
97
+ }
98
+ return text;
99
+ }
100
+ function resolveOpenBasePath(basePath) {
101
+ const base = typeof basePath === 'string' && basePath.trim()
102
+ ? basePath.trim()
103
+ : runtime.getRuntimeWorkdir(loadUserConfig());
104
+ return path.resolve(expandTilde(base || process.cwd()));
105
+ }
106
+ function splitExistingLineSuffix(candidate) {
107
+ const normalized = path.normalize(candidate);
108
+ if (fs.existsSync(normalized))
109
+ return { filePath: normalized, line: null, column: null };
110
+ const match = /^(.*?)(?::(\d+)(?::(\d+))?)$/.exec(normalized);
111
+ if (!match || !match[1])
112
+ return { filePath: normalized, line: null, column: null };
113
+ const filePath = path.normalize(match[1]);
114
+ if (!fs.existsSync(filePath))
115
+ return { filePath: normalized, line: null, column: null };
116
+ return {
117
+ filePath,
118
+ line: Number(match[2]),
119
+ column: match[3] ? Number(match[3]) : null,
120
+ };
121
+ }
122
+ export function resolveOpenPathLocator(rawPath, basePath) {
123
+ const decoded = decodeOpenPathInput(rawPath);
124
+ const expanded = expandTilde(decoded);
125
+ const absolute = path.isAbsolute(expanded)
126
+ ? path.resolve(expanded)
127
+ : path.resolve(resolveOpenBasePath(basePath), expanded);
128
+ return splitExistingLineSuffix(absolute);
129
+ }
130
+ function editorGotoArg(filePath, location) {
131
+ if (!location?.line)
132
+ return null;
133
+ return `${filePath}:${location.line}${location.column ? `:${location.column}` : ''}`;
134
+ }
135
+ function tryOpenCommand(command, args) {
136
+ if (!whichSync(command))
137
+ return false;
138
+ try {
139
+ runOpenCommand(command, args);
140
+ return true;
141
+ }
142
+ catch {
143
+ return false;
144
+ }
145
+ }
146
+ function tryOpenVSCodeUrl(filePath, location) {
147
+ if (!location?.line)
148
+ return false;
149
+ const suffix = `:${location.line}${location.column ? `:${location.column}` : ''}`;
150
+ try {
151
+ runOpenCommand('open', [`vscode://file${encodeURI(filePath)}${suffix}`]);
152
+ return true;
153
+ }
154
+ catch {
155
+ return false;
156
+ }
157
+ }
158
+ function openPathWithTarget(filePath, target, isDirectory, location) {
159
+ const gotoArg = isDirectory ? null : editorGotoArg(filePath, location);
70
160
  if (process.platform === 'darwin') {
71
161
  switch (target) {
72
162
  case 'finder':
@@ -76,13 +166,21 @@ function openPathWithTarget(filePath, target, isDirectory) {
76
166
  runOpenCommand('open', [filePath]);
77
167
  return;
78
168
  case 'cursor':
169
+ if (gotoArg && tryOpenCommand('cursor', ['-g', gotoArg]))
170
+ return;
79
171
  runOpenCommand('open', ['-a', 'Cursor', filePath]);
80
172
  return;
81
173
  case 'windsurf':
174
+ if (gotoArg && tryOpenCommand('windsurf', ['-g', gotoArg]))
175
+ return;
82
176
  runOpenCommand('open', ['-a', 'Windsurf', filePath]);
83
177
  return;
84
178
  case 'vscode':
85
179
  default:
180
+ if (gotoArg && tryOpenCommand('code', ['-g', gotoArg]))
181
+ return;
182
+ if (gotoArg && tryOpenVSCodeUrl(filePath, location))
183
+ return;
86
184
  runOpenCommand('open', ['-a', 'Visual Studio Code', filePath]);
87
185
  return;
88
186
  }
@@ -90,10 +188,16 @@ function openPathWithTarget(filePath, target, isDirectory) {
90
188
  if (process.platform === 'win32') {
91
189
  switch (target) {
92
190
  case 'cursor':
93
- runOpenCommand('cursor', [filePath]);
191
+ if (gotoArg)
192
+ runOpenCommand('cursor', ['-g', gotoArg]);
193
+ else
194
+ runOpenCommand('cursor', [filePath]);
94
195
  return;
95
196
  case 'windsurf':
96
- runOpenCommand('windsurf', [filePath]);
197
+ if (gotoArg)
198
+ runOpenCommand('windsurf', ['-g', gotoArg]);
199
+ else
200
+ runOpenCommand('windsurf', [filePath]);
97
201
  return;
98
202
  case 'finder':
99
203
  case 'default':
@@ -101,16 +205,25 @@ function openPathWithTarget(filePath, target, isDirectory) {
101
205
  return;
102
206
  case 'vscode':
103
207
  default:
104
- runOpenCommand('code', [filePath]);
208
+ if (gotoArg)
209
+ runOpenCommand('code', ['-g', gotoArg]);
210
+ else
211
+ runOpenCommand('code', [filePath]);
105
212
  return;
106
213
  }
107
214
  }
108
215
  switch (target) {
109
216
  case 'cursor':
110
- runOpenCommand('cursor', [filePath]);
217
+ if (gotoArg)
218
+ runOpenCommand('cursor', ['-g', gotoArg]);
219
+ else
220
+ runOpenCommand('cursor', [filePath]);
111
221
  return;
112
222
  case 'windsurf':
113
- runOpenCommand('windsurf', [filePath]);
223
+ if (gotoArg)
224
+ runOpenCommand('windsurf', ['-g', gotoArg]);
225
+ else
226
+ runOpenCommand('windsurf', [filePath]);
114
227
  return;
115
228
  case 'finder':
116
229
  case 'default':
@@ -118,7 +231,10 @@ function openPathWithTarget(filePath, target, isDirectory) {
118
231
  return;
119
232
  case 'vscode':
120
233
  default:
121
- runOpenCommand('code', [filePath]);
234
+ if (gotoArg)
235
+ runOpenCommand('code', ['-g', gotoArg]);
236
+ else
237
+ runOpenCommand('code', [filePath]);
122
238
  return;
123
239
  }
124
240
  }
@@ -453,14 +569,20 @@ app.post('/api/open-in-editor', async (c) => {
453
569
  try {
454
570
  const body = await c.req.json();
455
571
  const filePath = typeof body?.filePath === 'string' ? body.filePath.trim() : '';
572
+ const basePath = typeof body?.basePath === 'string' && body.basePath.trim()
573
+ ? body.basePath.trim()
574
+ : typeof body?.workdir === 'string' && body.workdir.trim()
575
+ ? body.workdir.trim()
576
+ : null;
456
577
  const target = isOpenTarget(body?.target) ? body.target : 'vscode';
457
578
  if (!filePath)
458
579
  return c.json({ ok: false, error: 'filePath is required' }, 400);
459
- if (!fs.existsSync(filePath))
580
+ const resolved = resolveOpenPathLocator(filePath, basePath);
581
+ if (!fs.existsSync(resolved.filePath))
460
582
  return c.json({ ok: false, error: 'Path not found' }, 404);
461
- const stat = fs.statSync(filePath);
462
- openPathWithTarget(filePath, target, stat.isDirectory());
463
- return c.json({ ok: true });
583
+ const stat = fs.statSync(resolved.filePath);
584
+ openPathWithTarget(resolved.filePath, target, stat.isDirectory(), resolved);
585
+ return c.json({ ok: true, filePath: resolved.filePath, line: resolved.line, column: resolved.column });
464
586
  }
465
587
  catch (err) {
466
588
  const detail = err instanceof Error ? err.message : String(err);
@@ -18,7 +18,7 @@
18
18
  * POST /api/models/agents/:agent/active → bind/unbind a Profile
19
19
  */
20
20
  import { Hono } from 'hono';
21
- import { getModelsDevCatalog, searchCatalogProviders, listProviders, getProvider, addProvider, updateProvider, removeProvider, setProviderValidation, listProfiles, getProfile, addProfile, updateProfile, removeProfile, getActiveProfileId, setActiveProfile, validateProvider, getProviderModelList, invalidateProviderModels, } from '../../model/index.js';
21
+ import { getModelsDevCatalog, searchCatalogProviders, listProviders, getProvider, addProvider, updateProvider, removeProvider, setProviderValidation, listProfiles, getProfile, addProfile, updateProfile, removeProfile, getActiveProfileId, setActiveProfile, prewarmLocalModel, validateProvider, getProviderModelList, invalidateProviderModels, } from '../../model/index.js';
22
22
  import { isCredentialRef, describeCredentialRef } from '../../core/secrets/index.js';
23
23
  import { allDriverIds } from '../../agent/index.js';
24
24
  const router = new Hono();
@@ -315,6 +315,14 @@ router.post('/api/models/agents/:agent/active', async (c) => {
315
315
  return c.json({ ok: false, error: 'profileId (string|null) is required' }, 400);
316
316
  try {
317
317
  setActiveProfile(agent, profileId);
318
+ // Warm a local backend the instant it's selected, so the user's first turn
319
+ // skips the model cold-load. Fire-and-forget; never blocks the bind.
320
+ if (profileId) {
321
+ const profile = getProfile(profileId);
322
+ const provider = profile ? getProvider(profile.providerId) : null;
323
+ if (profile && provider)
324
+ prewarmLocalModel(provider, profile.modelId);
325
+ }
318
326
  return c.json({ ok: true, agent, activeProfileId: profileId });
319
327
  }
320
328
  catch (e) {
@@ -64,6 +64,29 @@ function enrichWithRuntimeStatus(sessions, bot) {
64
64
  };
65
65
  });
66
66
  }
67
+ // Session list cards render only the *head* of these text fields (previews via
68
+ // firstMeaningfulLine / slice / sanitize) and use them for client-side substring
69
+ // search. A session whose last turn dumped a huge tool output or long answer would
70
+ // otherwise ship tens of KB per card that the list never displays — on a busy
71
+ // workspace the swim-lane ballooned to ~600KB, dominated by these fields. Cap each
72
+ // to a preview length: previews are unchanged and search still matches the head.
73
+ // Full text remains available from the session-detail / messages endpoints.
74
+ const LIST_PREVIEW_FIELD_CAP = 2048;
75
+ function capPreviewField(value) {
76
+ return typeof value === 'string' && value.length > LIST_PREVIEW_FIELD_CAP
77
+ ? value.slice(0, LIST_PREVIEW_FIELD_CAP)
78
+ : value;
79
+ }
80
+ /** Thin a session for list/swim-lane responses by capping its heavy preview text. */
81
+ export function projectSessionForList(session) {
82
+ return {
83
+ ...session,
84
+ lastQuestion: capPreviewField(session.lastQuestion),
85
+ lastAnswer: capPreviewField(session.lastAnswer),
86
+ lastMessageText: capPreviewField(session.lastMessageText),
87
+ runDetail: capPreviewField(session.runDetail),
88
+ };
89
+ }
67
90
  function readStringField(value) {
68
91
  return typeof value === 'string' ? value.trim() : '';
69
92
  }
@@ -172,6 +195,7 @@ app.get('/api/sessions/:agent', async (c) => {
172
195
  const result = await querySessions({ workdir, agent });
173
196
  const enriched = enrichWithRuntimeStatus(result.sessions, botRef);
174
197
  const paged = paginateSessionResult(enriched, page, limit);
198
+ paged.sessions = paged.sessions.map(projectSessionForList);
175
199
  runtime.debug(`[sessions] endpoint=single agent=${agent} ok=${result.ok} total=${result.total} ` +
176
200
  `returned=${paged.sessions.length} error=${result.errors.join('; ') || '(none)'}`);
177
201
  return c.json({
@@ -195,6 +219,7 @@ app.get('/api/sessions', async (c) => {
195
219
  const result = await querySessions({ workdir, agent: a.agent });
196
220
  const enriched = enrichWithRuntimeStatus(result.sessions, botRef);
197
221
  const paged = paginateSessionResult(enriched, page, limit);
222
+ paged.sessions = paged.sessions.map(projectSessionForList);
198
223
  swimLane[a.agent] = {
199
224
  ok: result.ok,
200
225
  error: result.errors[0] || null,
@@ -3,6 +3,7 @@
3
3
  */
4
4
  import http from 'node:http';
5
5
  import { Hono } from 'hono';
6
+ import { compress } from 'hono/compress';
6
7
  import { getRequestListener } from '@hono/node-server';
7
8
  import { serveStatic } from '@hono/node-server/serve-static';
8
9
  import path from 'node:path';
@@ -87,6 +88,13 @@ export async function startDashboard(opts = {}) {
87
88
  if (opts.bot)
88
89
  runtime.attachBot(opts.bot);
89
90
  const app = new Hono();
91
+ // -- Compression --
92
+ // gzip/deflate every compressible response (JSON API payloads, JS/CSS bundles,
93
+ // the HTML shell). Session message/list endpoints ship hundreds of KB of JSON;
94
+ // Vite chunks are another few hundred KB raw. The middleware skips already-
95
+ // compressed binary types (png/ico) by content-type, so the immutable image
96
+ // assets pay no CPU cost. Registered first so it wraps both routes and static.
97
+ app.use('*', compress());
90
98
  // -- API routes --
91
99
  app.route('/', configRoutes);
92
100
  app.route('/', agentRoutes);
@@ -16,5 +16,5 @@
16
16
  export { getModelsDevCatalog, getCatalogProvider, getCatalogModel, searchCatalogProviders, } from './catalog.js';
17
17
  export { listProviders, getProvider, addProvider, updateProvider, removeProvider, setProviderValidation, listProfiles, getProfile, addProfile, updateProfile, removeProfile, getActiveProfileId, getActiveProfile, setActiveProfile, } from './store.js';
18
18
  export { validateProvider } from './validation.js';
19
- export { resolveAgentInjection, isAgentBoundToProfile, } from './injector.js';
19
+ export { resolveAgentInjection, isAgentBoundToProfile, prewarmLocalModel, } from './injector.js';
20
20
  export { getProviderModelList, invalidateProviderModels, peekProviderModelList, peekProviderModelInfo, prefetchProviderModels, } from './provider-models.js';
@@ -7,6 +7,7 @@
7
7
  * = adding one entry to AGENT_INJECT_TABLE.
8
8
  */
9
9
  import { resolveCredential } from '../core/secrets/index.js';
10
+ import { writeScopedLog } from '../core/logging.js';
10
11
  import { getActiveProfile, getProvider } from './store.js';
11
12
  import { peekProviderModelInfo, prefetchProviderModels } from './provider-models.js';
12
13
  import { ensureResponsesBridge, upstreamToken } from './responses-bridge.js';
@@ -249,6 +250,46 @@ function codexLocalProvider(provider) {
249
250
  return 'lmstudio';
250
251
  return 'ollama';
251
252
  }
253
+ /** Ollama keeps a prewarmed model resident for this long (its `keep_alive`). */
254
+ const PREWARM_KEEP_ALIVE = '30m';
255
+ /**
256
+ * Warm a localhost model backend so the user's first real turn doesn't pay the
257
+ * model cold-load (weights → memory). Fire-and-forget: never blocks the caller,
258
+ * never throws.
259
+ *
260
+ * - Ollama has a native load endpoint — `POST /api/generate {model, keep_alive}`
261
+ * with no prompt loads the weights and returns immediately; `keep_alive`
262
+ * keeps them resident across the seed + real turns of a session.
263
+ * - LM Studio JIT-loads on first request, so we nudge it with a 1-token
264
+ * completion against its OpenAI-compatible endpoint.
265
+ *
266
+ * Called when a local Profile is bound (warm while the user reads / types) and
267
+ * again at spawn (re-assert keep_alive). Measured: a cold gemma3:4b spent ~12s
268
+ * before its first token; prewarmed, generation starts in ~2s.
269
+ */
270
+ export function prewarmLocalModel(provider, modelId) {
271
+ if (!modelId || !isLocalProvider(provider))
272
+ return;
273
+ let origin;
274
+ try {
275
+ origin = new URL(provider.baseURL).origin;
276
+ }
277
+ catch {
278
+ return;
279
+ }
280
+ const swallow = () => { };
281
+ if (codexLocalProvider(provider) === 'lmstudio') {
282
+ void fetch(`${origin}/v1/chat/completions`, {
283
+ method: 'POST', headers: { 'content-type': 'application/json' },
284
+ body: JSON.stringify({ model: modelId, max_tokens: 1, messages: [{ role: 'user', content: 'hi' }] }),
285
+ }).then(swallow, swallow);
286
+ return;
287
+ }
288
+ void fetch(`${origin}/api/generate`, {
289
+ method: 'POST', headers: { 'content-type': 'application/json' },
290
+ body: JSON.stringify({ model: modelId, keep_alive: PREWARM_KEEP_ALIVE }),
291
+ }).then(r => { writeScopedLog('model-prewarm', `ollama load ${modelId} → ${r.status}`); }, e => { writeScopedLog('model-prewarm', `ollama load ${modelId} failed: ${e?.message || e}`, { level: 'warn', stream: 'stderr' }); });
292
+ }
252
293
  /**
253
294
  * Decide how codex should reach a provider. Codex 0.140+ speaks ONLY the
254
295
  * Responses API, so the route depends on what the provider implements:
@@ -295,6 +336,7 @@ const codexInjector = async (provider, profile, apiKey) => {
295
336
  // providers cannot be overridden.")
296
337
  if (route === 'local-oss') {
297
338
  const local = codexLocalProvider(provider);
339
+ prewarmLocalModel(provider, model);
298
340
  return {
299
341
  env: {}, argvAppend: [],
300
342
  codexConfigOverrides: [`model_provider="${local}"`],