ethagent 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -67,10 +67,10 @@ Each agent's continuity directory holds a small set of files. Private files are
67
67
  | --- | --- | --- |
68
68
  | `SOUL.md` | Private | Soul, boundaries, standing instructions, and identity framing. |
69
69
  | `MEMORY.md` | Private | Durable preferences, project context, decisions, and operating notes. |
70
- | `skills/` | Mixed | Skill folders. Each skill is private, discoverable, or public; new skills default to discoverable. |
70
+ | `skills/` | Private | Skill folders. The SKILL.md body never leaves your machine. The visibility flag only controls whether the skill's name and description get indexed in `skills.json`. New skills default to public. |
71
71
  | `skills.json` | Public | Machine-readable capabilities derived from public skills. |
72
72
 
73
- `SOUL.md`, `MEMORY.md`, and each `SKILL.md` are plain Markdown you edit through the Identity Hub under Continuity. Skills carry extra metadata: the frontmatter at the top of each `SKILL.md` (name, description, when_to_use, visibility, tags) tells the agent when to load it. Visibility is `private` (local-only, never shared), `discoverable` (indexed in `skills.json` so other agents can find it), or `public` (indexed and surfaced on the Agent Card).
73
+ `SOUL.md`, `MEMORY.md`, and each `SKILL.md` are plain Markdown you edit through the Identity Hub under Continuity. Skill frontmatter (name, description, when_to_use, visibility, tags) tells the agent when to load it. The body stays local; `visibility: public` indexes the name and description in `skills.json` and the Agent Card.
74
74
 
75
75
  - **Save Snapshot Now** encrypts the private files, pins them to IPFS, and rotates the onchain pointer to the new CID.
76
76
  - **Refetch Latest** reads the pointer back, signs the decrypt challenge with your wallet, and overwrites local files from the snapshot.
package/bin/ethagent.js CHANGED
@@ -6,6 +6,10 @@ import { dirname, join } from 'node:path'
6
6
  const __dirname = dirname(fileURLToPath(import.meta.url))
7
7
  const cli = join(__dirname, '..', 'src', 'cli', 'main.tsx')
8
8
 
9
+ if (process.platform === 'win32') {
10
+ try { execFileSync('cmd', ['/c', 'chcp', '65001'], { stdio: 'ignore' }) } catch {}
11
+ }
12
+
9
13
  try {
10
14
  const tsxPath = import.meta.resolve('tsx/esm')
11
15
  execFileSync('node', ['--import', tsxPath, cli, ...process.argv.slice(2)], { stdio: 'inherit' })
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ethagent",
3
- "version": "3.1.1",
3
+ "version": "3.2.0",
4
4
  "description": "A privacy-first AI agent with a portable Ethereum identity",
5
5
  "type": "module",
6
6
  "main": "bin/ethagent.js",
@@ -44,7 +44,9 @@ import {
44
44
  shouldConfirmContextUsage,
45
45
  type ContextUsage,
46
46
  } from '../runtime/compaction.js'
47
- import { saveConfig } from '../storage/config.js'
47
+ import { fetchLlamaCppContextSize, onLlamaCppContextSizeChange, setCachedLlamaCppContextSize } from '../models/llamacpp.js'
48
+ import { llamaCppServerHostFromBaseUrl } from '../models/llamacppPreflight.js'
49
+ import { localProviderBaseUrlFor, saveConfig } from '../storage/config.js'
48
50
  import { getCwd as getRuntimeCwd, setCwd as setRuntimeCwd, syncCwdFromProcess } from '../runtime/cwd.js'
49
51
  import { executeToolWithPermissions } from '../runtime/toolExecution.js'
50
52
  import { nextSessionMode, sessionModeLabel, type PermissionMode, type SessionMode } from '../runtime/sessionMode.js'
@@ -309,7 +311,7 @@ export const ChatScreen: React.FC<ChatScreenProps> = ({ config: initialConfig, o
309
311
  {
310
312
  role: 'progress',
311
313
  id: progressRowId,
312
- title: kind === 'plan' ? 'summarizing plan context' : 'compacting conversation',
314
+ title: kind === 'plan' ? 'summarizing plan context' : 'Compacting conversation',
313
315
  progress: 0,
314
316
  status: state.stage,
315
317
  suffix: 'esc to cancel',
@@ -449,6 +451,22 @@ export const ChatScreen: React.FC<ChatScreenProps> = ({ config: initialConfig, o
449
451
  [],
450
452
  )
451
453
 
454
+ useEffect(() => {
455
+ if (config.provider !== 'llamacpp') return
456
+ const host = llamaCppServerHostFromBaseUrl(localProviderBaseUrlFor('llamacpp', config.baseUrl))
457
+ void fetchLlamaCppContextSize(host)
458
+ const unsubscribe = onLlamaCppContextSizeChange(() => {
459
+ refreshVisibleStats(
460
+ sessionMessagesRef.current,
461
+ providerRef.current.supportsTools,
462
+ cwdRef.current,
463
+ configRef.current,
464
+ modeRef.current,
465
+ )
466
+ })
467
+ return unsubscribe
468
+ }, [config.provider, config.baseUrl, refreshVisibleStats])
469
+
452
470
  const warnIfContextPressure = useCallback(
453
471
  (usage: ContextUsage, configForUsage: EthagentConfig) => {
454
472
  if (!shouldConfirmContextUsage(usage, CONTEXT_CONFIRM_PERCENT)) return
@@ -880,6 +898,11 @@ export const ChatScreen: React.FC<ChatScreenProps> = ({ config: initialConfig, o
880
898
  pendingPlanRef.current = planCandidate
881
899
  setPendingPlan(planCandidate)
882
900
  },
901
+ onContextExceeded: ({ contextLimit }) => {
902
+ setCachedLlamaCppContextSize(contextLimit)
903
+ pushNote('Context full. Compacting transcript. Re-send your message once compaction finishes.', 'dim')
904
+ void runCompaction()
905
+ },
883
906
  pendingAssistantTextRef,
884
907
  pendingThinkingTextRef,
885
908
  streamFlushTimerRef,
@@ -56,6 +56,7 @@ export type TurnOrchestratorContext = {
56
56
  applySessionRule: (rule?: SessionPermissionRule, persistRule?: boolean) => Promise<void>
57
57
  preflightProvider?: () => Promise<{ ok: true } | { ok: false; message: string }>
58
58
  onPlanReady?: (plan: string) => void
59
+ onContextExceeded?: (info: { contextLimit: number }) => void
59
60
  pendingAssistantTextRef: MutableRef<string | null>
60
61
  pendingThinkingTextRef: MutableRef<string | null>
61
62
  streamFlushTimerRef: MutableRef<ReturnType<typeof setTimeout> | null>
@@ -89,6 +90,7 @@ export async function runStreamingTurn(
89
90
  applySessionRule,
90
91
  preflightProvider,
91
92
  onPlanReady,
93
+ onContextExceeded,
92
94
  pendingAssistantTextRef,
93
95
  pendingThinkingTextRef,
94
96
  streamFlushTimerRef,
@@ -311,6 +313,7 @@ export async function runStreamingTurn(
311
313
  nowIso,
312
314
  mode,
313
315
  onPlanReady,
316
+ onContextExceeded,
314
317
  turnId: activeCheckpoint.turnId,
315
318
  model: getConfig().model,
316
319
  onFinishedNormally: () => { finishedNormally = true },
@@ -356,6 +359,7 @@ type EventHandlerContext = {
356
359
  nowIso: () => string
357
360
  mode: SessionMode
358
361
  onPlanReady?: (plan: string) => void
362
+ onContextExceeded?: (info: { contextLimit: number }) => void
359
363
  turnId: string
360
364
  model: string
361
365
  onFinishedNormally: () => void
@@ -365,6 +369,13 @@ function isCancelledEvent(ev: TurnEvent): boolean {
365
369
  return ev.type === 'cancelled'
366
370
  }
367
371
 
372
+ export function parseContextExceededLimit(message: string): number | null {
373
+ const match = /exceeds the available context size \((\d+)\s*tokens?\)/i.exec(message)
374
+ if (!match) return null
375
+ const limit = Number.parseInt(match[1]!, 10)
376
+ return Number.isFinite(limit) && limit > 0 ? limit : null
377
+ }
378
+
368
379
  async function handleEvent(ev: TurnEvent, ctx: EventHandlerContext): Promise<void> {
369
380
  switch (ev.type) {
370
381
  case 'iteration_start': {
@@ -453,6 +464,12 @@ async function handleEvent(ev: TurnEvent, ctx: EventHandlerContext): Promise<voi
453
464
  return
454
465
  }
455
466
  case 'error': {
467
+ const contextLimit = parseContextExceededLimit(ev.message)
468
+ if (contextLimit !== null && ctx.onContextExceeded) {
469
+ ctx.discardStreamingRows()
470
+ ctx.onContextExceeded({ contextLimit })
471
+ return
472
+ }
456
473
  ctx.pushNote(ev.message, 'error')
457
474
  if (ev.discardAssistant) {
458
475
  ctx.discardStreamingRows()
@@ -293,7 +293,8 @@ const COMMANDS: CommandSpec[] = [
293
293
  const text = assistant[index] ?? ''
294
294
  const label = offset === 1 ? 'Latest reply' : `Reply #${offset} back`
295
295
  const segments = parseSegments(text)
296
- if (segments.length <= 1) {
296
+ const hasCode = segments.some(segment => segment.kind === 'code')
297
+ if (!hasCode) {
297
298
  const result = await copyToClipboard(text)
298
299
  if (!result.ok) {
299
300
  return { kind: 'note', variant: 'error', text: `Copy failed: ${result.error}` }
@@ -103,7 +103,7 @@ export function appendPublicSkillEntries(
103
103
  const appended: PublicSkill[] = []
104
104
  const usedIds = new Set(baselineIds)
105
105
  for (const entry of entries) {
106
- if (entry.visibility !== 'public' && entry.visibility !== 'discoverable') continue
106
+ if (entry.visibility !== 'public') continue
107
107
  const id = uniqueSkillId(entry.name, usedIds)
108
108
  usedIds.add(id)
109
109
  appended.push({
@@ -14,7 +14,8 @@ const SUPPORTED_KEYS = new Set([
14
14
  'visibility',
15
15
  ])
16
16
 
17
- const VISIBILITY_VALUES: SkillVisibility[] = ['private', 'public', 'discoverable']
17
+ const VISIBILITY_VALUES: SkillVisibility[] = ['private', 'public']
18
+ const LEGACY_VISIBILITY_TO_PRIVATE = new Set(['discoverable'])
18
19
 
19
20
  export type ParsedSkillFile = {
20
21
  frontmatter: SkillFrontmatter
@@ -104,6 +105,8 @@ function assignKey(out: SkillFrontmatter, key: keyof SkillFrontmatter, rawValue:
104
105
  const literal = parseScalar(stripped).toLowerCase()
105
106
  if ((VISIBILITY_VALUES as string[]).includes(literal)) {
106
107
  out.visibility = literal as SkillVisibility
108
+ } else if (LEGACY_VISIBILITY_TO_PRIVATE.has(literal)) {
109
+ out.visibility = 'private'
107
110
  }
108
111
  return
109
112
  }
@@ -215,8 +215,11 @@ export async function loadSkillsTree(identity: EthagentIdentity): Promise<Contin
215
215
  const rel = `${skillEnt.name}/${file.relativePath}`
216
216
  if (!isValidSkillFilePath(rel)) continue
217
217
  if (file.sizeBytes > MAX_SKILL_FILE_BYTES) continue
218
- const content = await fs.readFile(file.absolutePath, 'utf8').catch(() => null)
219
- if (content === null) continue
218
+ const rawContent = await fs.readFile(file.absolutePath, 'utf8').catch(() => null)
219
+ if (rawContent === null) continue
220
+ const content = file.relativePath === SKILL_FILE_NAME
221
+ ? await ensureSkillVisibilityWritten(file.absolutePath, rawContent)
222
+ : rawContent
220
223
  tree[rel] = content
221
224
  totalFiles++
222
225
  }
@@ -359,7 +362,12 @@ export async function migrateLegacySkillFiles(skillsRoot: string): Promise<void>
359
362
  return
360
363
  }
361
364
  for (const topEnt of topDirents) {
362
- if (!topEnt.isDirectory() || topEnt.isSymbolicLink()) continue
365
+ if (topEnt.isSymbolicLink()) continue
366
+ if (topEnt.isFile() && /\.md$/i.test(topEnt.name)) {
367
+ await adoptBareSkillFile(skillsRoot, topEnt.name)
368
+ continue
369
+ }
370
+ if (!topEnt.isDirectory()) continue
363
371
  if (!isValidSegment(topEnt.name)) continue
364
372
  const topDir = path.join(skillsRoot, topEnt.name)
365
373
  let children: import('node:fs').Dirent[]
@@ -403,6 +411,39 @@ export async function migrateLegacySkillFiles(skillsRoot: string): Promise<void>
403
411
  }
404
412
  }
405
413
 
414
+ async function adoptBareSkillFile(skillsRoot: string, fileName: string): Promise<void> {
415
+ const sourcePath = path.join(skillsRoot, fileName)
416
+ let baseName: string
417
+ if (/^SKILL\.md$/i.test(fileName)) {
418
+ let parsedName: string | undefined
419
+ try {
420
+ const raw = await fs.readFile(sourcePath, 'utf8')
421
+ const parsed = parseSkillFile(raw)
422
+ const fmName = parsed.frontmatter.name?.trim()
423
+ if (fmName && isValidSegment(fmName)) parsedName = fmName
424
+ } catch {
425
+ }
426
+ baseName = parsedName ?? 'imported-skill'
427
+ } else {
428
+ const slug = fileName.replace(/\.md$/i, '')
429
+ if (!isValidSegment(slug)) return
430
+ baseName = slug
431
+ }
432
+ let target: string
433
+ try {
434
+ target = await chooseFlatTarget(skillsRoot, baseName)
435
+ } catch {
436
+ return
437
+ }
438
+ const targetDir = path.join(skillsRoot, target)
439
+ const targetFile = path.join(targetDir, SKILL_FILE_NAME)
440
+ try {
441
+ await fs.mkdir(targetDir, { recursive: true, mode: 0o700 })
442
+ await fs.rename(sourcePath, targetFile)
443
+ } catch {
444
+ }
445
+ }
446
+
406
447
  async function chooseFlatTarget(skillsRoot: string, base: string): Promise<string> {
407
448
  let candidate = base
408
449
  let suffix = 2
@@ -476,6 +517,32 @@ async function pathExists(file: string): Promise<boolean> {
476
517
  }
477
518
  }
478
519
 
520
+ const DEFAULT_PASTED_VISIBILITY: SkillVisibility = 'public'
521
+ const LEGACY_DISCOVERABLE_RE = /^\s*visibility\s*:\s*['"]?discoverable['"]?\s*$/im
522
+
523
+ async function ensureSkillVisibilityWritten(skillFile: string, raw: string): Promise<string> {
524
+ let parsed: { frontmatter: import('./types.js').SkillFrontmatter; body: string }
525
+ try {
526
+ parsed = parseSkillFile(raw)
527
+ } catch {
528
+ return raw
529
+ }
530
+ let target: SkillVisibility | null = null
531
+ if (LEGACY_DISCOVERABLE_RE.test(raw)) {
532
+ target = 'private'
533
+ } else if (parsed.frontmatter.visibility === undefined) {
534
+ target = DEFAULT_PASTED_VISIBILITY
535
+ }
536
+ if (target === null) return raw
537
+ const next = rewriteVisibility(raw, target)
538
+ if (next === raw) return raw
539
+ try {
540
+ await atomicWriteText(skillFile, next, { mode: 0o600 })
541
+ } catch {
542
+ }
543
+ return next
544
+ }
545
+
479
546
  async function collectSkillEntries(root: string): Promise<SkillIndexEntry[]> {
480
547
  const out: SkillIndexEntry[] = []
481
548
  let topDirents: import('node:fs').Dirent[]
@@ -493,7 +560,8 @@ async function collectSkillEntries(root: string): Promise<SkillIndexEntry[]> {
493
560
  const stat = await fs.stat(skillFile)
494
561
  if (!stat.isFile()) continue
495
562
  if (stat.size > MAX_SKILL_FILE_BYTES) continue
496
- const raw = await fs.readFile(skillFile, 'utf8')
563
+ const rawInitial = await fs.readFile(skillFile, 'utf8')
564
+ const raw = await ensureSkillVisibilityWritten(skillFile, rawInitial)
497
565
  const parsed = parseSkillFile(raw)
498
566
  const relativePath = `${skillEnt.name}/${SKILL_FILE_NAME}`
499
567
  out.push(buildIndexEntry({
@@ -519,7 +587,7 @@ function buildIndexEntry(args: {
519
587
  const derivedName = folder || segments.join('/')
520
588
  const fm = args.parsed.frontmatter
521
589
  const description = pickDescription(fm.description, args.parsed.body)
522
- const visibility: SkillVisibility = fm.visibility ?? 'discoverable'
590
+ const visibility: SkillVisibility = fm.visibility ?? DEFAULT_PASTED_VISIBILITY
523
591
  return {
524
592
  name: derivedName,
525
593
  ...(fm.name ? { displayName: fm.name } : {}),
@@ -11,7 +11,7 @@ import type { SkillIndexEntry } from './types.js'
11
11
 
12
12
  export async function derivePublicSkillEntries(identity: EthagentIdentity): Promise<SkillIndexEntry[]> {
13
13
  const entries = await listSkills(identity)
14
- return entries.filter(entry => entry.visibility === 'public' || entry.visibility === 'discoverable')
14
+ return entries.filter(entry => entry.visibility === 'public')
15
15
  }
16
16
 
17
17
  export async function renderPublicSkillsJsonForIdentity(identity: EthagentIdentity): Promise<string> {
@@ -5,7 +5,7 @@ export type SkillScaffoldArgs = {
5
5
  visibility?: SkillVisibility
6
6
  }
7
7
 
8
- export function defaultSkillScaffold({ name, visibility = 'discoverable' }: SkillScaffoldArgs): string {
8
+ export function defaultSkillScaffold({ name, visibility = 'public' }: SkillScaffoldArgs): string {
9
9
  return [
10
10
  '---',
11
11
  `name: ${name}`,
@@ -1,4 +1,4 @@
1
- export type SkillVisibility = 'private' | 'public' | 'discoverable'
1
+ export type SkillVisibility = 'private' | 'public'
2
2
 
3
3
  export type SkillFrontmatter = {
4
4
  name?: string
@@ -22,7 +22,7 @@ export const NewSkillVisibilityScreen: React.FC<NewSkillVisibilityScreenProps> =
22
22
  }) => (
23
23
  <Surface
24
24
  title={`Visibility · ${name}`}
25
- subtitle="Discoverable is the default. You can change it later from Change Visibility."
25
+ subtitle="Public is the default. You can change it later from Change Visibility."
26
26
  footer={footer}
27
27
  >
28
28
  {error && (
@@ -34,8 +34,7 @@ export const NewSkillVisibilityScreen: React.FC<NewSkillVisibilityScreenProps> =
34
34
  <Select<SkillVisibility | 'back'>
35
35
  options={[
36
36
  { value: 'private', label: 'Private', hint: 'Local-only. Not in skills.json.' },
37
- { value: 'discoverable', label: 'Discoverable', hint: 'Default. Indexed in skills.json with description.' },
38
- { value: 'public', label: 'Public', hint: 'Indexed in skills.json and Agent Card.' },
37
+ { value: 'public', label: 'Public', hint: 'Default. Indexed in skills.json and Agent Card.' },
39
38
  { value: 'back', role: 'section', label: 'Navigation' },
40
39
  { value: 'back', label: 'Back', hint: 'Return to the name step', role: 'utility' },
41
40
  ]}
@@ -74,7 +74,6 @@ export const SkillActionsScreen: React.FC<SkillActionsScreenProps> = ({
74
74
 
75
75
  options.push({ value: noop, role: 'section', label: 'Visibility' })
76
76
  options.push(visibilityOption('private', visibility))
77
- options.push(visibilityOption('discoverable', visibility))
78
77
  options.push(visibilityOption('public', visibility))
79
78
 
80
79
  options.push({ value: noop, role: 'section', label: 'Manage' })
@@ -83,6 +82,7 @@ export const SkillActionsScreen: React.FC<SkillActionsScreenProps> = ({
83
82
  label: 'Delete',
84
83
  hint: 'Remove this skill folder and its supporting files',
85
84
  })
85
+ options.push({ value: noop, role: 'section', label: 'Return' })
86
86
  options.push({
87
87
  value: { kind: 'back' },
88
88
  label: 'Back',
@@ -141,8 +141,7 @@ function visibilityOption(level: SkillVisibility, current?: SkillVisibility): Se
141
141
 
142
142
  function visibilityHint(level: SkillVisibility): string {
143
143
  if (level === 'private') return 'Local-only. Not in skills.json.'
144
- if (level === 'discoverable') return 'Default. Indexed with description.'
145
- return 'Indexed with description and Agent Card link.'
144
+ return 'Default. Indexed with description and Agent Card link.'
146
145
  }
147
146
 
148
147
  function capitalize(value: string): string {
@@ -73,7 +73,7 @@ export const SkillsTreeScreen: React.FC<SkillsTreeScreenProps> = ({
73
73
  }
74
74
  }, [identity, editorOpened])
75
75
 
76
- const subtitle = notice ?? 'Open a skill, create one, or remove one.'
76
+ const subtitle = notice ?? 'Select a skill to open, change visibility, or delete.'
77
77
  const isLoading = tree === null
78
78
  const skills = tree?.skills ?? []
79
79
  const supportingCounts = tree?.supportingCounts ?? {}
@@ -145,6 +145,7 @@ function buildOptions(
145
145
  const supportCount = supportingCounts[skill.name] ?? 0
146
146
  const meta = [capitalize(skill.visibility)]
147
147
  if (supportCount > 0) meta.push(`${supportCount + 1} files`)
148
+ meta.push('enter for actions')
148
149
  rows.push({
149
150
  value: { kind: 'skill', relativePath: skill.relativePath },
150
151
  label: `${branch}${skill.name}/SKILL.md`,
@@ -256,12 +256,68 @@ async function fetchServedModels(host: string = DEFAULT_LLAMA_HOST, timeoutMs =
256
256
  }
257
257
  }
258
258
 
259
+ let cachedLlamaCppContextSize: number | null = null
260
+ const llamaCppContextSizeListeners = new Set<(size: number) => void>()
261
+
262
+ export async function fetchLlamaCppContextSize(
263
+ host: string = DEFAULT_LLAMA_HOST,
264
+ timeoutMs = 1500,
265
+ ): Promise<number | null> {
266
+ const response = await fetchWithTimeout(`${host.replace(/\/+$/, '')}/props`, timeoutMs)
267
+ if (!response || !response.ok) return null
268
+ try {
269
+ const data = await response.json() as {
270
+ n_ctx?: unknown
271
+ default_generation_settings?: { n_ctx?: unknown }
272
+ }
273
+ const raw = typeof data.n_ctx === 'number'
274
+ ? data.n_ctx
275
+ : typeof data.default_generation_settings?.n_ctx === 'number'
276
+ ? data.default_generation_settings.n_ctx
277
+ : null
278
+ if (typeof raw === 'number' && raw > 0) {
279
+ const changed = cachedLlamaCppContextSize !== raw
280
+ cachedLlamaCppContextSize = raw
281
+ if (changed) {
282
+ for (const listener of llamaCppContextSizeListeners) {
283
+ try { listener(raw) } catch { void 0 }
284
+ }
285
+ }
286
+ return raw
287
+ }
288
+ return null
289
+ } catch {
290
+ return null
291
+ }
292
+ }
293
+
294
+ export function getCachedLlamaCppContextSize(): number | null {
295
+ return cachedLlamaCppContextSize
296
+ }
297
+
298
+ export function setCachedLlamaCppContextSize(size: number): void {
299
+ if (!(size > 0)) return
300
+ const changed = cachedLlamaCppContextSize !== size
301
+ cachedLlamaCppContextSize = size
302
+ if (changed) {
303
+ for (const listener of llamaCppContextSizeListeners) {
304
+ try { listener(size) } catch { void 0 }
305
+ }
306
+ }
307
+ }
308
+
309
+ export function onLlamaCppContextSizeChange(listener: (size: number) => void): () => void {
310
+ llamaCppContextSizeListeners.add(listener)
311
+ return () => { llamaCppContextSizeListeners.delete(listener) }
312
+ }
313
+
259
314
  export async function detectLlamaCpp(host: string = DEFAULT_LLAMA_HOST): Promise<LlamaCppStatus> {
260
315
  const [binary, serverUp] = await Promise.all([
261
316
  detectLlamaCppServerBinary(),
262
317
  isLlamaCppServerUp(host),
263
318
  ])
264
319
  const servedModels = serverUp ? await listServedModels(host) : []
320
+ if (serverUp) void fetchLlamaCppContextSize(host)
265
321
  return {
266
322
  binaryPresent: binary.path !== null,
267
323
  binaryPath: binary.path,
@@ -298,6 +354,7 @@ export async function startLlamaCppServer(args: {
298
354
  }
299
355
  }
300
356
  if (initialStatus.state === 'ready') {
357
+ void fetchLlamaCppContextSize(host)
301
358
  return { ok: true, alreadyRunning: true }
302
359
  }
303
360
  if (initialStatus.state === 'different') {
@@ -377,7 +434,10 @@ export async function startLlamaCppServer(args: {
377
434
  pollMs: args.pollMs ?? 500,
378
435
  childFailure: () => childFailure,
379
436
  })
380
- if (ready.ok) return { ok: true, alreadyRunning: false }
437
+ if (ready.ok) {
438
+ void fetchLlamaCppContextSize(host)
439
+ return { ok: true, alreadyRunning: false }
440
+ }
381
441
  if (ready.code === 'readiness-timeout') {
382
442
  return startFailure('readiness-timeout', { detail: capture() })
383
443
  }
@@ -1,4 +1,5 @@
1
1
  import {
2
+ fetchLlamaCppContextSize,
2
3
  startLlamaCppServer,
3
4
  stopLlamaCppServer,
4
5
  type LlamaCppStartFailureCode,
@@ -64,7 +65,10 @@ export async function ensureLlamaCppRunnerReady(
64
65
  servedModels: probe.models,
65
66
  }
66
67
  }
67
- if (!local.mmprojPath) return { ok: true, alreadyRunning: true }
68
+ if (!local.mmprojPath) {
69
+ void fetchLlamaCppContextSize(llamaCppServerHostFromBaseUrl(baseUrl))
70
+ return { ok: true, alreadyRunning: true }
71
+ }
68
72
  await (deps.stopServer ?? stopLlamaCppServer)().catch(() => null)
69
73
  }
70
74
 
@@ -1,6 +1,7 @@
1
1
  import type { Message, Provider } from '../providers/contracts.js'
2
2
  import { approximateTokens, messageTextContent } from '../utils/messages.js'
3
3
  import type { SessionMessage } from '../storage/sessions.js'
4
+ import { getCachedLlamaCppContextSize } from '../models/llamacpp.js'
4
5
 
5
6
  const COMPACT_SYSTEM = `Create a continuation handoff for this coding-agent conversation.
6
7
  Keep it concise but complete. Preserve the current goal, user constraints, key decisions, relevant files, tool results, pending tasks, and known failures. Do not claim unverified work was completed. No preamble.`
@@ -17,8 +18,7 @@ const CLOUD_MESSAGE_CHAR_LIMIT = 2_000
17
18
  export type CompactionStage =
18
19
  | 'preparing transcript'
19
20
  | 'compressing long context'
20
- | 'summarizing with local model'
21
- | 'summarizing with provider'
21
+ | 'summarizing transcript'
22
22
 
23
23
  export type CompactTranscriptOptions = {
24
24
  signal?: AbortSignal
@@ -60,6 +60,12 @@ export function contextWindow(model: string): number {
60
60
  export function contextWindowInfo(provider: string, model: string): ContextWindowInfo {
61
61
  const lower = model.toLowerCase()
62
62
  const providerLower = provider.toLowerCase()
63
+ if (providerLower === 'llamacpp') {
64
+ const cached = getCachedLlamaCppContextSize()
65
+ if (cached) {
66
+ return { tokens: cached, confidence: 'exact', source: 'llama.cpp /props' }
67
+ }
68
+ }
63
69
  if (lower.startsWith('qwen3:4b') || lower.startsWith('qwen3:30b') || lower.startsWith('qwen3:235b')) {
64
70
  return { tokens: 256_000, confidence: 'inferred', source: 'qwen3 long-context tag' }
65
71
  }
@@ -138,7 +144,7 @@ export async function compactTranscript(
138
144
  const signal = options.signal ?? controller!.signal
139
145
  let summary = ''
140
146
  const local = isLocalProviderId(provider.id)
141
- options.onStage?.(local ? 'summarizing with local model' : 'summarizing with provider')
147
+ options.onStage?.('summarizing transcript')
142
148
  try {
143
149
  for await (const ev of provider.complete(prompt, signal, {
144
150
  maxTokens: options.maxOutputTokens ?? (local ? LOCAL_COMPACTION_OUTPUT_TOKENS : CLOUD_COMPACTION_OUTPUT_TOKENS),
@@ -168,7 +174,7 @@ export function buildCompactionSource(
168
174
  const nonSystem = transcript.filter(m => m.role !== 'system')
169
175
  const local = isLocalProviderId(providerId)
170
176
  const tokenBudget = options.maxInputTokens ?? (local ? LOCAL_COMPACTION_INPUT_TOKENS : CLOUD_COMPACTION_INPUT_TOKENS)
171
- const charBudget = Math.max(1_000, tokenBudget * 4)
177
+ const charBudget = Math.max(1_000, tokenBudget * 3)
172
178
  const recentMessageCount = local ? LOCAL_RECENT_MESSAGE_COUNT : CLOUD_RECENT_MESSAGE_COUNT
173
179
  const messageCharLimit = local ? LOCAL_MESSAGE_CHAR_LIMIT : CLOUD_MESSAGE_CHAR_LIMIT
174
180
  const rawTokenEstimate = approximateTokens(nonSystem)
@@ -385,5 +391,5 @@ function limitCompactionText(text: string, charBudget: number): string {
385
391
  }
386
392
 
387
393
  function approximateTextTokens(text: string): number {
388
- return Math.ceil(text.length / 4)
394
+ return Math.ceil(text.length / 3)
389
395
  }
@@ -96,7 +96,7 @@ async function tryReadNative(): Promise<ReadResult> {
96
96
  return readFrom('pbpaste', [], 'pbpaste')
97
97
  }
98
98
  if (process.platform === 'win32') {
99
- return readFrom('powershell', ['-NoProfile', '-Command', 'Get-Clipboard -Raw'], 'powershell Get-Clipboard')
99
+ return readFrom('powershell', ['-NoProfile', '-Command', '[Console]::OutputEncoding=[Text.Encoding]::UTF8; Get-Clipboard -Raw'], 'powershell Get-Clipboard')
100
100
  }
101
101
  if (process.env['WAYLAND_DISPLAY']) {
102
102
  const wl = await probe('wl-paste', ['--version'])
@@ -33,5 +33,5 @@ export function blocksToText(blocks: MessageContentBlock[]): string {
33
33
  export function approximateTokens(messages: Message[]): number {
34
34
  let chars = 0
35
35
  for (const m of messages) chars += messageTextContent(m).length
36
- return Math.ceil(chars / 4)
36
+ return Math.ceil(chars / 3)
37
37
  }