ethagent 2.3.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/README.md +18 -4
  2. package/package.json +2 -1
  3. package/src/app/FirstRun.tsx +157 -15
  4. package/src/app/FirstRunTimeline.tsx +4 -0
  5. package/src/app/input/AppInputProvider.tsx +19 -0
  6. package/src/app/input/appInputParser.ts +19 -4
  7. package/src/chat/ChatBottomPane.tsx +12 -1
  8. package/src/chat/ChatScreen.tsx +17 -5
  9. package/src/chat/ConversationStack.tsx +25 -19
  10. package/src/chat/MessageList.tsx +194 -53
  11. package/src/chat/chatSessionState.ts +4 -1
  12. package/src/chat/chatTurnOrchestrator.ts +65 -2
  13. package/src/chat/input/ChatInput.tsx +28 -2
  14. package/src/chat/input/imageRefs.ts +30 -0
  15. package/src/chat/input/textCursor.ts +13 -3
  16. package/src/chat/transcript/TranscriptView.tsx +7 -5
  17. package/src/chat/transcript/transcriptViewport.ts +88 -17
  18. package/src/chat/views/PermissionPrompt.tsx +26 -26
  19. package/src/chat/views/PermissionsView.tsx +18 -12
  20. package/src/chat/views/ResumeView.tsx +16 -7
  21. package/src/chat/views/RewindView.tsx +3 -1
  22. package/src/cli/ResetConfirmView.tsx +24 -9
  23. package/src/identity/continuity/editor.ts +27 -2
  24. package/src/identity/continuity/envelope.ts +125 -0
  25. package/src/identity/continuity/publicSkills.ts +37 -1
  26. package/src/identity/continuity/skills/frontmatter.ts +183 -0
  27. package/src/identity/continuity/skills/loadSkills.ts +609 -0
  28. package/src/identity/continuity/skills/publicSkillsSync.ts +32 -0
  29. package/src/identity/continuity/skills/scaffold.ts +52 -0
  30. package/src/identity/continuity/skills/types.ts +30 -0
  31. package/src/identity/continuity/storage/defaults.ts +28 -47
  32. package/src/identity/continuity/storage/files.ts +1 -0
  33. package/src/identity/continuity/storage/paths.ts +1 -0
  34. package/src/identity/continuity/storage/scaffold.ts +25 -23
  35. package/src/identity/continuity/storage/status.ts +34 -5
  36. package/src/identity/continuity/storage/types.ts +3 -2
  37. package/src/identity/continuity/storage.ts +3 -0
  38. package/src/identity/hub/OperationalRoutes.tsx +105 -3
  39. package/src/identity/hub/Routes.tsx +5 -3
  40. package/src/identity/hub/continuity/ContinuityDashboardScreen.tsx +5 -51
  41. package/src/identity/hub/continuity/RecoveryConfirmScreen.tsx +1 -1
  42. package/src/identity/hub/continuity/SavePromptScreen.tsx +1 -0
  43. package/src/identity/hub/continuity/effects.ts +36 -5
  44. package/src/identity/hub/continuity/skills/DeleteSkillConfirmScreen.tsx +112 -0
  45. package/src/identity/hub/continuity/skills/DeleteSkillScreen.tsx +123 -0
  46. package/src/identity/hub/continuity/skills/NewSkillScreen.tsx +57 -0
  47. package/src/identity/hub/continuity/skills/NewSkillVisibilityScreen.tsx +52 -0
  48. package/src/identity/hub/continuity/skills/SkillVisibilityScreen.tsx +171 -0
  49. package/src/identity/hub/continuity/skills/SkillsTreeScreen.tsx +213 -0
  50. package/src/identity/hub/continuity/snapshot.ts +3 -0
  51. package/src/identity/hub/continuity/state.ts +3 -2
  52. package/src/identity/hub/continuity/vault.ts +42 -10
  53. package/src/identity/hub/custody/CustodyEditFlow.tsx +3 -3
  54. package/src/identity/hub/identityHubReducer.ts +21 -0
  55. package/src/identity/hub/profile/effects.ts +16 -3
  56. package/src/identity/hub/restore/RestoreFlow.tsx +43 -6
  57. package/src/identity/hub/restore/apply.ts +12 -1
  58. package/src/identity/hub/restore/recovery.ts +11 -1
  59. package/src/identity/hub/restore/resolve.ts +1 -1
  60. package/src/identity/hub/restore/useRestoreEffects.ts +4 -6
  61. package/src/identity/hub/shared/components/DetailsScreen.tsx +4 -1
  62. package/src/identity/hub/shared/components/IdentitySummary.tsx +97 -53
  63. package/src/identity/hub/shared/components/MenuScreen.tsx +18 -15
  64. package/src/identity/hub/shared/components/UnlinkedIdentityScreen.tsx +1 -1
  65. package/src/identity/hub/shared/components/menuFlagsFromReconciliation.ts +8 -12
  66. package/src/identity/hub/shared/effects/sync.ts +16 -3
  67. package/src/identity/hub/shared/model/copy.ts +2 -4
  68. package/src/identity/hub/transfer/effects.ts +15 -2
  69. package/src/identity/hub/useIdentityHubContinuity.ts +145 -23
  70. package/src/identity/hub/useIdentityHubController.ts +5 -1
  71. package/src/identity/hub/useIdentityHubSideEffects.ts +2 -4
  72. package/src/mcp/manager.ts +1 -1
  73. package/src/models/ModelPicker.tsx +211 -74
  74. package/src/models/huggingface.ts +180 -2
  75. package/src/models/llamacpp.ts +261 -17
  76. package/src/models/llamacppPreflight.ts +16 -12
  77. package/src/models/modelPickerOptions.ts +57 -38
  78. package/src/providers/anthropic.ts +36 -5
  79. package/src/providers/contracts.ts +10 -1
  80. package/src/providers/gemini.ts +29 -3
  81. package/src/providers/openai-chat.ts +131 -11
  82. package/src/providers/openai-responses-format.ts +29 -8
  83. package/src/providers/openai-responses.ts +41 -11
  84. package/src/providers/registry.ts +1 -0
  85. package/src/runtime/toolExecution.ts +4 -3
  86. package/src/runtime/turn.ts +61 -30
  87. package/src/storage/config.ts +1 -0
  88. package/src/storage/sessions.ts +14 -2
  89. package/src/tools/changeDirectoryTool.ts +1 -1
  90. package/src/tools/contracts.ts +10 -0
  91. package/src/tools/deleteFileTool.ts +1 -1
  92. package/src/tools/editTool.ts +1 -1
  93. package/src/tools/listDirectoryTool.ts +1 -1
  94. package/src/tools/listSkillFilesTool.ts +77 -0
  95. package/src/tools/listSkillsTool.ts +68 -0
  96. package/src/tools/mcpResourceTools.ts +2 -2
  97. package/src/tools/privateContinuityReadTool.ts +1 -1
  98. package/src/tools/readSkillTool.ts +107 -0
  99. package/src/tools/readTool.ts +1 -1
  100. package/src/tools/registry.ts +6 -0
  101. package/src/tools/writeFileTool.ts +22 -2
  102. package/src/ui/Spinner.tsx +15 -3
  103. package/src/ui/theme.ts +2 -0
  104. package/src/utils/images.ts +140 -0
  105. package/src/utils/messages.ts +2 -0
  106. package/src/identity/continuity/localBackup.ts +0 -249
  107. package/src/identity/continuity/zipWriter.ts +0 -95
  108. package/src/identity/hub/continuity/index.ts +0 -7
  109. package/src/identity/hub/ens/index.ts +0 -11
  110. package/src/identity/hub/restore/index.ts +0 -22
@@ -54,6 +54,12 @@ export type HfSafetyReview = {
54
54
  reasons: string[]
55
55
  }
56
56
 
57
+ export type HfMmprojCandidate = {
58
+ filename: string
59
+ sizeBytes: number
60
+ localPath: string
61
+ }
62
+
57
63
  export type HfDownloadPlan = {
58
64
  repo: HuggingFaceRepoInfo
59
65
  repoId: string
@@ -64,6 +70,8 @@ export type HfDownloadPlan = {
64
70
  localPath: string
65
71
  displayName: string
66
72
  review: HfSafetyReview
73
+ mmprojCandidate?: HfMmprojCandidate
74
+ includeMmproj?: boolean
67
75
  }
68
76
 
69
77
  export type LocalHfModel = {
@@ -90,6 +98,9 @@ export type LocalHfModel = {
90
98
  installedAt: string
91
99
  status: LocalHfStatus
92
100
  sha256?: string
101
+ mmprojPath?: string
102
+ mmprojAvailable?: boolean
103
+ mmprojSizeBytes?: number
93
104
  }
94
105
 
95
106
  export type HfDownloadProgress = {
@@ -291,6 +302,14 @@ export function ggufFiles(repo: HuggingFaceRepoInfo): HuggingFaceSibling[] {
291
302
  .sort((a, b) => a.filename.localeCompare(b.filename))
292
303
  }
293
304
 
305
+ export function isMmprojFilename(filename: string): boolean {
306
+ return filename.toLowerCase().startsWith('mmproj-') && filename.toLowerCase().endsWith('.gguf')
307
+ }
308
+
309
+ export function findMmprojSibling(repo: HuggingFaceRepoInfo): HuggingFaceSibling | undefined {
310
+ return repo.siblings.find(file => isMmprojFilename(file.filename))
311
+ }
312
+
294
313
  export async function createHfDownloadPlan(
295
314
  input: string,
296
315
  filename?: string,
@@ -320,6 +339,14 @@ export async function createHfDownloadPlan(
320
339
  requestedRevision,
321
340
  resolvedRevision,
322
341
  })
342
+ const mmprojSibling = findMmprojSibling(repo)
343
+ const mmprojCandidate: HfMmprojCandidate | undefined = mmprojSibling
344
+ ? {
345
+ filename: mmprojSibling.filename,
346
+ sizeBytes: mmprojSibling.sizeBytes ?? 0,
347
+ localPath: localPathFor(repo.repoId, resolvedRevision, mmprojSibling.filename),
348
+ }
349
+ : undefined
323
350
  return {
324
351
  repo,
325
352
  repoId: repo.repoId,
@@ -330,6 +357,7 @@ export async function createHfDownloadPlan(
330
357
  localPath: localPathFor(repo.repoId, resolvedRevision, selected.filename),
331
358
  displayName: displayNameFor(repo.repoId, selected.filename),
332
359
  review,
360
+ mmprojCandidate,
333
361
  }
334
362
  }
335
363
 
@@ -432,10 +460,151 @@ export async function* downloadHfModel(
432
460
  }
433
461
 
434
462
  await fs.rename(partialPath, plan.localPath)
435
- await upsertLocalHfModel(modelFromPlan(plan, hash.digest('hex'), 'ready'))
463
+
464
+ let mmprojPath: string | undefined
465
+ if (plan.includeMmproj && plan.mmprojCandidate) {
466
+ yield* downloadMmprojFile(plan.repoId, plan.resolvedRevision, plan.mmprojCandidate, signal, fetchImpl)
467
+ mmprojPath = plan.mmprojCandidate.localPath
468
+ }
469
+
470
+ await upsertLocalHfModel(modelFromPlan(plan, hash.digest('hex'), 'ready', mmprojPath))
436
471
  yield { status: 'success', completed, total: Number.isFinite(total) ? total : completed }
437
472
  }
438
473
 
474
+ async function* downloadMmprojFile(
475
+ repoId: string,
476
+ resolvedRevision: string,
477
+ candidate: HfMmprojCandidate,
478
+ signal: AbortSignal | undefined,
479
+ fetchImpl: FetchImpl,
480
+ ): AsyncIterable<HfDownloadProgress> {
481
+ await fs.mkdir(path.dirname(candidate.localPath), { recursive: true })
482
+ const partialPath = `${candidate.localPath}.partial`
483
+ const response = await fetchImpl(resolveUrl(repoId, resolvedRevision, candidate.filename), { signal })
484
+ if (!response.ok || !response.body) {
485
+ throw new Error(response.ok ? 'empty projector download body' : `projector download HTTP ${response.status}`)
486
+ }
487
+
488
+ const total = Number.parseInt(response.headers.get('content-length') ?? '', 10)
489
+ const handle = await fs.open(partialPath, 'w')
490
+ let completed = 0
491
+ let complete = false
492
+ let lastProgressAt = Date.now()
493
+ let lastProgressBytes = 0
494
+ yield { status: 'downloading-mmproj', completed, total: Number.isFinite(total) ? total : undefined }
495
+ try {
496
+ const reader = response.body.getReader()
497
+ while (true) {
498
+ const { done, value } = await reader.read()
499
+ if (done) break
500
+ if (signal?.aborted) throw new Error('Cancelled')
501
+ const buffer = Buffer.from(value)
502
+ await handle.write(buffer)
503
+ completed += buffer.byteLength
504
+ const now = Date.now()
505
+ if (shouldReportDownloadProgress(completed, lastProgressBytes, now, lastProgressAt)) {
506
+ lastProgressAt = now
507
+ lastProgressBytes = completed
508
+ yield { status: 'downloading-mmproj', completed, total: Number.isFinite(total) ? total : undefined }
509
+ }
510
+ }
511
+ complete = true
512
+ } finally {
513
+ await handle.close()
514
+ if (!complete) {
515
+ await fs.unlink(partialPath).catch(() => {})
516
+ }
517
+ }
518
+
519
+ await fs.rename(partialPath, candidate.localPath)
520
+ }
521
+
522
+ export async function backfillMmprojAvailability(
523
+ model: LocalHfModel,
524
+ fetchImpl: FetchImpl = fetch,
525
+ ): Promise<LocalHfModel> {
526
+ if (model.mmprojAvailable !== undefined) return model
527
+ try {
528
+ const repo = await fetchHuggingFaceRepoInfo({ repoId: model.repoId }, fetchImpl)
529
+ const sibling = findMmprojSibling(repo)
530
+ const next: LocalHfModel = {
531
+ ...model,
532
+ mmprojAvailable: Boolean(sibling),
533
+ mmprojSizeBytes: sibling?.sizeBytes,
534
+ }
535
+ await upsertLocalHfModel(next)
536
+ return next
537
+ } catch {
538
+ return model
539
+ }
540
+ }
541
+
542
+ export async function backfillMmprojForModels(
543
+ models: LocalHfModel[],
544
+ fetchImpl: FetchImpl = fetch,
545
+ ): Promise<LocalHfModel[]> {
546
+ const repoIdToProbe = new Map<string, Promise<HuggingFaceRepoInfo | null>>()
547
+ for (const model of models) {
548
+ if (model.mmprojAvailable !== undefined) continue
549
+ if (repoIdToProbe.has(model.repoId)) continue
550
+ repoIdToProbe.set(
551
+ model.repoId,
552
+ fetchHuggingFaceRepoInfo({ repoId: model.repoId }, fetchImpl).catch(() => null),
553
+ )
554
+ }
555
+ if (repoIdToProbe.size === 0) return models
556
+ const resolved = new Map<string, HuggingFaceRepoInfo | null>()
557
+ for (const [repoId, promise] of repoIdToProbe) {
558
+ resolved.set(repoId, await promise)
559
+ }
560
+ const out: LocalHfModel[] = []
561
+ for (const model of models) {
562
+ if (model.mmprojAvailable !== undefined) {
563
+ out.push(model)
564
+ continue
565
+ }
566
+ const repo = resolved.get(model.repoId)
567
+ if (!repo) {
568
+ out.push(model)
569
+ continue
570
+ }
571
+ const sibling = findMmprojSibling(repo)
572
+ const next: LocalHfModel = {
573
+ ...model,
574
+ mmprojAvailable: Boolean(sibling),
575
+ mmprojSizeBytes: sibling?.sizeBytes,
576
+ }
577
+ await upsertLocalHfModel(next)
578
+ out.push(next)
579
+ }
580
+ return out
581
+ }
582
+
583
+ export async function* addMmprojToInstalledModel(
584
+ modelId: string,
585
+ signal?: AbortSignal,
586
+ deps: { fetchImpl?: FetchImpl } = {},
587
+ ): AsyncIterable<HfDownloadProgress> {
588
+ const fetchImpl = deps.fetchImpl ?? fetch
589
+ const existing = await findLocalHfModel(modelId)
590
+ if (!existing) throw new Error(`model not installed: ${modelId}`)
591
+ if (existing.mmprojPath) {
592
+ yield { status: 'success', completed: 0 }
593
+ return
594
+ }
595
+ const repo = await fetchHuggingFaceRepoInfo({ repoId: existing.repoId }, fetchImpl)
596
+ const sibling = findMmprojSibling(repo)
597
+ if (!sibling) throw new Error(`no vision encoder available for ${existing.repoId}`)
598
+ const candidate: HfMmprojCandidate = {
599
+ filename: sibling.filename,
600
+ sizeBytes: sibling.sizeBytes ?? 0,
601
+ localPath: localPathFor(existing.repoId, existing.resolvedRevision, sibling.filename),
602
+ }
603
+ yield* downloadMmprojFile(existing.repoId, existing.resolvedRevision, candidate, signal, fetchImpl)
604
+ await upsertLocalHfModel({ ...existing, mmprojPath: candidate.localPath })
605
+ yield { status: 'success', completed: candidate.sizeBytes }
606
+ }
607
+
439
608
  export function shouldReportDownloadProgress(
440
609
  completed: number,
441
610
  lastCompleted: number,
@@ -446,7 +615,13 @@ export function shouldReportDownloadProgress(
446
615
  || completed - lastCompleted >= DOWNLOAD_PROGRESS_MIN_BYTES
447
616
  }
448
617
 
449
- export function modelFromPlan(plan: HfDownloadPlan, sha256: string | undefined, status: LocalHfStatus): LocalHfModel {
618
+ export function modelFromPlan(
619
+ plan: HfDownloadPlan,
620
+ sha256: string | undefined,
621
+ status: LocalHfStatus,
622
+ mmprojPath?: string,
623
+ ): LocalHfModel {
624
+ const mmprojAvailable = Boolean(plan.mmprojCandidate)
450
625
  const now = new Date().toISOString()
451
626
  return {
452
627
  id: localModelId(plan.repoId, plan.filename),
@@ -472,6 +647,9 @@ export function modelFromPlan(plan: HfDownloadPlan, sha256: string | undefined,
472
647
  installedAt: now,
473
648
  status,
474
649
  sha256,
650
+ mmprojPath,
651
+ mmprojAvailable,
652
+ mmprojSizeBytes: plan.mmprojCandidate?.sizeBytes,
475
653
  }
476
654
  }
477
655
 
@@ -72,6 +72,9 @@ type LlamaCppStartDeps = {
72
72
  access?: typeof fs.access
73
73
  binaryPath?: string
74
74
  spawnImpl?: (command: string, args: readonly string[], options: NonNullable<Parameters<typeof spawn>[2]>) => ReturnType<typeof spawn>
75
+ killRogue?: (host: string) => Promise<KillRogueResult>
76
+ rogueDrainTimeoutMs?: number
77
+ rogueDrainPollMs?: number
75
78
  }
76
79
 
77
80
  export type LocalRunnerConfig = {
@@ -362,25 +365,51 @@ export async function startLlamaCppServer(args: {
362
365
  modelAlias: string
363
366
  host?: string
364
367
  ctxSize?: number
368
+ mmprojPath?: string
365
369
  readinessTimeoutMs?: number
366
370
  pollMs?: number
367
371
  deps?: LlamaCppStartDeps
368
372
  }): Promise<LlamaCppStartResult> {
369
373
  const host = args.host ?? DEFAULT_LLAMA_HOST
370
- const initialStatus = await servedModelStatus(host, args.modelAlias)
371
- if (initialStatus.state === 'ready') return { ok: true, alreadyRunning: true }
374
+ let initialStatus = await servedModelStatus(host, args.modelAlias)
375
+ if (initialStatus.state === 'ready' && args.mmprojPath) {
376
+ const pid = await readPidFile()
377
+ if (!pid) {
378
+ await (args.deps?.killRogue ?? killRogueLlamaProcesses)(host).catch(() => null)
379
+ const drained = await waitForHostDown(host, args.deps?.rogueDrainTimeoutMs ?? 6000, args.deps?.rogueDrainPollMs ?? 200)
380
+ if (!drained) {
381
+ return startFailure('different-model-running', {
382
+ servedModels: initialStatus.models,
383
+ detail: 'another process is holding the local model port and could not be stopped automatically',
384
+ })
385
+ }
386
+ initialStatus = await servedModelStatus(host, args.modelAlias)
387
+ }
388
+ }
389
+ if (initialStatus.state === 'ready') {
390
+ return { ok: true, alreadyRunning: true }
391
+ }
372
392
  if (initialStatus.state === 'different') {
373
393
  return startFailure('different-model-running', {
374
394
  servedModels: initialStatus.models,
375
395
  })
376
396
  }
377
397
 
398
+ const accessFn = args.deps?.access ?? fs.access
378
399
  try {
379
- await (args.deps?.access ?? fs.access)(args.modelPath)
400
+ await accessFn(args.modelPath)
380
401
  } catch {
381
402
  return startFailure('model-file-missing', { detail: args.modelPath })
382
403
  }
383
404
 
405
+ if (args.mmprojPath) {
406
+ try {
407
+ await accessFn(args.mmprojPath)
408
+ } catch {
409
+ return startFailure('model-file-missing', { detail: args.mmprojPath })
410
+ }
411
+ }
412
+
384
413
  const binaryPath = args.deps?.binaryPath ?? (await findAndPersistLlamaCppServer()).path
385
414
  if (!binaryPath) {
386
415
  return startFailure('runner-not-installed')
@@ -390,21 +419,23 @@ export async function startLlamaCppServer(args: {
390
419
  const listenHost = url.hostname || '127.0.0.1'
391
420
  const port = url.port || (url.protocol === 'https:' ? '443' : '8080')
392
421
  const spawnImpl = args.deps?.spawnImpl ?? spawn
422
+ const spawnArgs: string[] = [
423
+ '-m',
424
+ args.modelPath,
425
+ '--host',
426
+ listenHost,
427
+ '--port',
428
+ port,
429
+ '--alias',
430
+ args.modelAlias,
431
+ '--ctx-size',
432
+ String(args.ctxSize ?? 32768),
433
+ '--jinja',
434
+ ]
435
+ if (args.mmprojPath) spawnArgs.push('--mmproj', args.mmprojPath)
393
436
  let child: ReturnType<typeof spawn>
394
437
  try {
395
- child = spawnImpl(binaryPath, [
396
- '-m',
397
- args.modelPath,
398
- '--host',
399
- listenHost,
400
- '--port',
401
- port,
402
- '--alias',
403
- args.modelAlias,
404
- '--ctx-size',
405
- String(args.ctxSize ?? 32768),
406
- '--jinja',
407
- ], {
438
+ child = spawnImpl(binaryPath, spawnArgs, {
408
439
  detached: true,
409
440
  stdio: ['ignore', 'pipe', 'pipe'],
410
441
  windowsHide: true,
@@ -424,6 +455,9 @@ export async function startLlamaCppServer(args: {
424
455
  })
425
456
  })
426
457
  child.unref()
458
+ if (typeof child.pid === 'number') {
459
+ await writePidFile(child.pid).catch(() => {})
460
+ }
427
461
 
428
462
  const ready = await waitForServedModel({
429
463
  host,
@@ -468,6 +502,84 @@ async function waitForServedModel(args: {
468
502
  return startFailure('readiness-timeout')
469
503
  }
470
504
 
505
+ function pidFilePath(): string {
506
+ return path.join(getConfigDir(), 'llamacpp.pid')
507
+ }
508
+
509
+ async function writePidFile(pid: number): Promise<void> {
510
+ await ensureConfigDir()
511
+ await atomicWriteText(pidFilePath(), String(pid))
512
+ }
513
+
514
+ async function readPidFile(): Promise<number | null> {
515
+ try {
516
+ const raw = await fs.readFile(pidFilePath(), 'utf8')
517
+ const pid = Number.parseInt(raw.trim(), 10)
518
+ return Number.isInteger(pid) && pid > 0 ? pid : null
519
+ } catch {
520
+ return null
521
+ }
522
+ }
523
+
524
+ async function clearPidFile(): Promise<void> {
525
+ await fs.rm(pidFilePath(), { force: true }).catch(() => {})
526
+ }
527
+
528
+ export async function stopLlamaCppServer(args: {
529
+ host?: string
530
+ timeoutMs?: number
531
+ pollMs?: number
532
+ killImpl?: (pid: number, signal?: NodeJS.Signals | number) => void
533
+ } = {}): Promise<
534
+ | { ok: true; stopped: boolean; reason?: 'untracked-server'; servedModels?: string[] }
535
+ | { ok: false; message: string }
536
+ > {
537
+ const pid = await readPidFile()
538
+ if (!pid) {
539
+ const host = args.host ?? DEFAULT_LLAMA_HOST
540
+ const { up, models } = await fetchServedModels(host, 1500)
541
+ if (up && models.length > 0) {
542
+ return { ok: true, stopped: false, reason: 'untracked-server', servedModels: models }
543
+ }
544
+ return { ok: true, stopped: false }
545
+ }
546
+ const kill = args.killImpl ?? ((p, signal) => process.kill(p, signal))
547
+ try {
548
+ kill(pid, 'SIGTERM')
549
+ } catch (err: unknown) {
550
+ const code = (err as NodeJS.ErrnoException).code
551
+ if (code === 'ESRCH') {
552
+ await clearPidFile()
553
+ return { ok: true, stopped: false }
554
+ }
555
+ return { ok: false, message: (err as Error).message }
556
+ }
557
+ const host = args.host ?? DEFAULT_LLAMA_HOST
558
+ const deadline = Date.now() + (args.timeoutMs ?? 5000)
559
+ const pollMs = args.pollMs ?? 250
560
+ while (Date.now() < deadline) {
561
+ const status = await servedModelStatus(host, '__nothing__')
562
+ if (status.state === 'not-up' || status.models.length === 0) {
563
+ await clearPidFile()
564
+ return { ok: true, stopped: true }
565
+ }
566
+ await new Promise<void>(resolve => setTimeout(resolve, pollMs))
567
+ }
568
+ await clearPidFile()
569
+ return { ok: true, stopped: true }
570
+ }
571
+
572
+ async function waitForHostDown(host: string, timeoutMs: number, pollMs: number): Promise<boolean> {
573
+ const deadline = Date.now() + timeoutMs
574
+ while (Date.now() < deadline) {
575
+ const { up } = await fetchServedModels(host, 800)
576
+ if (!up) return true
577
+ await new Promise<void>(resolve => setTimeout(resolve, pollMs))
578
+ }
579
+ const { up } = await fetchServedModels(host, 800)
580
+ return !up
581
+ }
582
+
471
583
  async function servedModelStatus(host: string, modelAlias: string): Promise<
472
584
  | { state: 'not-up'; models: string[] }
473
585
  | { state: 'ready'; models: string[] }
@@ -479,6 +591,136 @@ async function servedModelStatus(host: string, modelAlias: string): Promise<
479
591
  return { state: 'different', models }
480
592
  }
481
593
 
594
+ export type KillRogueResult = { killed: number; errors: string[] }
595
+
596
+ export async function killRogueLlamaProcesses(host?: string): Promise<KillRogueResult> {
597
+ const result: KillRogueResult = { killed: 0, errors: [] }
598
+ try {
599
+ await stopLlamaCppServer({ timeoutMs: 1500 })
600
+ } catch (err: unknown) {
601
+ result.errors.push(`tracked stop failed: ${(err as Error).message}`)
602
+ }
603
+ const platform = os.platform()
604
+ const portOutcome = await killProcessOnPort(platform, host ?? DEFAULT_LLAMA_HOST)
605
+ result.killed += portOutcome.killed
606
+ if (portOutcome.error) result.errors.push(portOutcome.error)
607
+ const targets = platform === 'win32'
608
+ ? ['llama-server.exe', 'llama-cli.exe']
609
+ : ['llama-server', 'llama-cli']
610
+ for (const target of targets) {
611
+ const outcome = await runKillCommand(platform, target)
612
+ result.killed += outcome.killed
613
+ if (outcome.error) result.errors.push(outcome.error)
614
+ }
615
+ await clearPidFile()
616
+ return result
617
+ }
618
+
619
+ export async function killProcessOnPort(
620
+ platform: NodeJS.Platform,
621
+ host: string,
622
+ ): Promise<{ killed: number; error?: string }> {
623
+ const port = extractHostPort(host)
624
+ if (!port) return { killed: 0, error: 'no port to scan' }
625
+ const pids = await listListeningPids(platform, port)
626
+ if (pids.length === 0) return { killed: 0 }
627
+ let killed = 0
628
+ const errors: string[] = []
629
+ for (const pid of pids) {
630
+ const outcome = await killByPid(platform, pid)
631
+ if (outcome.killed) killed++
632
+ if (outcome.error) errors.push(outcome.error)
633
+ }
634
+ return errors.length > 0 ? { killed, error: errors.join('; ') } : { killed }
635
+ }
636
+
637
+ function extractHostPort(host: string): number | null {
638
+ try {
639
+ const url = new URL(host)
640
+ if (url.port) return Number.parseInt(url.port, 10)
641
+ return url.protocol === 'https:' ? 443 : 80
642
+ } catch {
643
+ return null
644
+ }
645
+ }
646
+
647
+ async function listListeningPids(platform: NodeJS.Platform, port: number): Promise<number[]> {
648
+ if (platform === 'win32') {
649
+ const result = await runCommand('netstat', ['-ano', '-p', 'tcp'], 4000)
650
+ if (!result) return []
651
+ return parseNetstatPids(result.stdout, port)
652
+ }
653
+ const result = await runCommand('lsof', ['-nP', `-iTCP:${port}`, '-sTCP:LISTEN', '-t'], 4000)
654
+ if (!result || result.code !== 0) return []
655
+ return result.stdout.split(/\r?\n/).map(line => Number.parseInt(line.trim(), 10)).filter(n => Number.isInteger(n) && n > 0)
656
+ }
657
+
658
+ export function parseNetstatPids(output: string, port: number): number[] {
659
+ const pids: number[] = []
660
+ const seen = new Set<number>()
661
+ const portSuffix = `:${port}`
662
+ for (const raw of output.split(/\r?\n/)) {
663
+ const line = raw.trim()
664
+ if (!line || !line.toUpperCase().includes('LISTENING')) continue
665
+ const cols = line.split(/\s+/)
666
+ if (cols.length < 5) continue
667
+ const local = cols[1] ?? ''
668
+ if (!local.endsWith(portSuffix)) continue
669
+ const pid = Number.parseInt(cols[cols.length - 1] ?? '', 10)
670
+ if (!Number.isInteger(pid) || pid <= 0) continue
671
+ if (pid === process.pid) continue
672
+ if (seen.has(pid)) continue
673
+ seen.add(pid)
674
+ pids.push(pid)
675
+ }
676
+ return pids
677
+ }
678
+
679
+ async function killByPid(platform: NodeJS.Platform, pid: number): Promise<{ killed: boolean; error?: string }> {
680
+ return new Promise(resolve => {
681
+ const cmd = platform === 'win32' ? 'taskkill' : 'kill'
682
+ const args = platform === 'win32' ? ['/F', '/T', '/PID', String(pid)] : ['-9', String(pid)]
683
+ const child = spawn(cmd, args, { stdio: 'ignore' })
684
+ child.on('error', err => resolve({ killed: false, error: `${cmd} ${pid}: ${err.message}` }))
685
+ child.on('close', code => {
686
+ if (code === 0) {
687
+ resolve({ killed: true })
688
+ return
689
+ }
690
+ resolve({ killed: false, error: `${cmd} ${pid} exited ${code}` })
691
+ })
692
+ })
693
+ }
694
+
695
+ async function runKillCommand(
696
+ platform: NodeJS.Platform,
697
+ target: string,
698
+ ): Promise<{ killed: number; error?: string }> {
699
+ return new Promise(resolve => {
700
+ const cmd = platform === 'win32' ? 'taskkill' : 'pkill'
701
+ const args = platform === 'win32'
702
+ ? ['/F', '/T', '/IM', target]
703
+ : ['-f', target]
704
+ const child = spawn(cmd, args, { stdio: 'ignore' })
705
+ child.on('error', err => resolve({ killed: 0, error: `${cmd} ${target}: ${err.message}` }))
706
+ child.on('close', code => {
707
+ if (code === 0) {
708
+ resolve({ killed: 1 })
709
+ return
710
+ }
711
+ if (platform === 'win32' && code === 128) {
712
+ resolve({ killed: 0 })
713
+ return
714
+ }
715
+ if (platform !== 'win32' && code === 1) {
716
+ resolve({ killed: 0 })
717
+ return
718
+ }
719
+ resolve({ killed: 0, error: `${cmd} ${target} exited ${code}` })
720
+ })
721
+ })
722
+ }
723
+
482
724
  function startFailure(
483
725
  code: LlamaCppStartFailureCode,
484
726
  options: { detail?: string; servedModels?: string[] } = {},
@@ -500,7 +742,9 @@ function startFailureMessage(code: LlamaCppStartFailureCode, servedModels: strin
500
742
  case 'model-file-missing':
501
743
  return detail ? `model file not found: ${detail}` : 'model file was not found'
502
744
  case 'different-model-running':
503
- return `a different local model is already running (${servedModels.join(', ')}); stop it before switching models`
745
+ return servedModels.length > 0
746
+ ? `a different local model is already running (${servedModels.join(', ')}); stop it before switching models`
747
+ : detail ?? 'a different local model is already running; stop it before switching models'
504
748
  case 'spawn-failed':
505
749
  return 'local runner could not be started'
506
750
  case 'runner-exited':
@@ -1,5 +1,6 @@
1
1
  import {
2
2
  startLlamaCppServer,
3
+ stopLlamaCppServer,
3
4
  type LlamaCppStartFailureCode,
4
5
  type LlamaCppStartResult,
5
6
  } from './llamacpp.js'
@@ -21,6 +22,7 @@ export type LlamaCppPreflightDeps = {
21
22
  fetchImpl?: typeof fetch
22
23
  findLocalModel?: typeof findLocalHfModel
23
24
  startServer?: typeof startLlamaCppServer
25
+ stopServer?: typeof stopLlamaCppServer
24
26
  timeoutMs?: number
25
27
  }
26
28
 
@@ -50,25 +52,27 @@ export async function ensureLlamaCppRunnerReady(
50
52
 
51
53
  const probe = await probeLlamaCppModels(baseUrl, deps)
52
54
  if (probe.up) {
53
- if (probe.models.length === 0 || probe.models.includes(config.model)) {
54
- return { ok: true, alreadyRunning: true }
55
- }
56
- return {
57
- ok: false,
58
- code: 'different-model-running',
59
- message: formatPreflightFailure(
60
- 'local runner is serving a different model',
61
- config.model,
62
- `a different local model is already running (${probe.models.join(', ')}); stop it before switching models`,
63
- ),
64
- servedModels: probe.models,
55
+ if (probe.models.length > 0 && !probe.models.includes(config.model)) {
56
+ return {
57
+ ok: false,
58
+ code: 'different-model-running',
59
+ message: formatPreflightFailure(
60
+ 'local runner is serving a different model',
61
+ config.model,
62
+ `a different local model is already running (${probe.models.join(', ')}); stop it before switching models`,
63
+ ),
64
+ servedModels: probe.models,
65
+ }
65
66
  }
67
+ if (!local.mmprojPath) return { ok: true, alreadyRunning: true }
68
+ await (deps.stopServer ?? stopLlamaCppServer)().catch(() => null)
66
69
  }
67
70
 
68
71
  const result = await (deps.startServer ?? startLlamaCppServer)({
69
72
  modelPath: local.localPath,
70
73
  modelAlias: local.id,
71
74
  host: llamaCppServerHostFromBaseUrl(baseUrl),
75
+ mmprojPath: local.mmprojPath,
72
76
  })
73
77
  if (result.ok) return { ok: true, alreadyRunning: result.alreadyRunning }
74
78
  return withPreflightMessage(result, local)