@poncho-ai/harness 0.59.5 → 0.59.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.59.5 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.59.7 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,9 +8,9 @@
8
8
  CLI tsup v8.5.1
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
- ESM dist/index.js 557.00 KB
12
11
  ESM dist/isolate-F2PPSUL6.js 53.82 KB
13
- ESM ⚡️ Build success in 252ms
12
+ ESM dist/index.js 558.06 KB
13
+ ESM ⚡️ Build success in 257ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 7698ms
15
+ DTS ⚡️ Build success in 7680ms
16
16
  DTS dist/index.d.ts 101.66 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,33 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.59.7
4
+
5
+ ### Patch Changes
6
+
7
+ - [`c73cb19`](https://github.com/cesr/poncho-ai/commit/c73cb19ec8bf61fe0598262ae4d050fb84c939b5) Thanks [@cesr](https://github.com/cesr)! - Auto-compaction never fired on cached conversations: the per-step context
8
+ measure (`latestContextTokens`) used `usage.inputTokens`, which with
9
+ Anthropic prompt caching is only the NON-cached slice — a real 190k+
10
+ conversation reported ~12k of "context", so the trigger comparison never
11
+ tripped and transcripts grew past the model's window. Context now counts
12
+ input + cache-read + cache-write tokens (everything the model read). Also
13
+ pins claude-fable-5 / opus-4-8 / opus-4-7 in the context-window registry
14
+ (previously relying on the silent 200k default).
15
+
16
+ ## 0.59.6
17
+
18
+ ### Patch Changes
19
+
20
+ - [`e573f72`](https://github.com/cesr/poncho-ai/commit/e573f72ca31627e48dbdbf296946a72c59a488db) Thanks [@cesr](https://github.com/cesr)! - Preserve the LLM transcript when a turn dies. The errored branch of
21
+ runConversationTurn persisted only the display draft — `_harnessMessages`
22
+ was never updated, so the model's next turn had no memory of the entire
23
+ failed interaction (its user message included), even though the user could
24
+ see it on screen. Both the errored branch and the cancelled-without-
25
+ `run:cancelled.messages` fallback now append a faithful plain-text
26
+ reconstruction (user message + assistant text-so-far + tool activity + an
27
+ interruption note) to the transcript. Plain text on purpose: replaying real
28
+ tool_use blocks would need paired results or the next API call rejects the
29
+ dangling pair.
30
+
3
31
  ## 0.59.5
4
32
 
5
33
  ### Patch Changes
package/dist/index.js CHANGED
@@ -7490,6 +7490,12 @@ var completeOpenAICodexDeviceAuth = async (request) => {
7490
7490
 
7491
7491
  // src/model-factory.ts
7492
7492
  var MODEL_CONTEXT_WINDOWS = {
7493
+ // Pinned conservatively at 200k. The API has accepted >204k for fable-5
7494
+ // (its real window is larger), but compacting at trigger×200k keeps
7495
+ // long-conversation cost bounded; raise deliberately, not by omission.
7496
+ "claude-fable-5": 2e5,
7497
+ "claude-opus-4-8": 2e5,
7498
+ "claude-opus-4-7": 2e5,
7493
7499
  "claude-opus-4-6": 2e5,
7494
7500
  "claude-sonnet-4-6": 2e5,
7495
7501
  "claude-opus-4-5": 2e5,
@@ -11362,7 +11368,7 @@ ${textContent}` };
11362
11368
  totalOutputTokens += usage.outputTokens ?? 0;
11363
11369
  totalCachedTokens += stepCachedTokens;
11364
11370
  totalCacheWriteTokens += stepCacheWriteTokens;
11365
- latestContextTokens = stepInputTokens;
11371
+ latestContextTokens = stepInputTokens + stepCachedTokens + stepCacheWriteTokens;
11366
11372
  toolOutputEstimateSinceModel = 0;
11367
11373
  yield pushEvent({
11368
11374
  type: "model:response",
@@ -14498,6 +14504,20 @@ var runConversationTurn = async (opts) => {
14498
14504
  };
14499
14505
  } catch (error) {
14500
14506
  flushTurnDraft(draft);
14507
+ const reconstructTranscriptTail = (reason) => {
14508
+ const parts = [];
14509
+ if (draft.assistantResponse.length > 0) parts.push(draft.assistantResponse);
14510
+ if (draft.toolTimeline.length > 0) {
14511
+ parts.push(`Tool activity before interruption:
14512
+ ${draft.toolTimeline.join("\n")}`);
14513
+ }
14514
+ parts.push(`[This turn was interrupted: ${reason}. The work above may be incomplete.]`);
14515
+ return [
14516
+ ...conversation._harnessMessages ?? [],
14517
+ userMessage,
14518
+ { role: "assistant", content: parts.join("\n\n") }
14519
+ ];
14520
+ };
14501
14521
  const aborted = opts.abortSignal?.aborted === true;
14502
14522
  if (aborted || runCancelled) {
14503
14523
  if (draft.assistantResponse.length > 0 || draft.toolTimeline.length > 0 || draft.sections.length > 0) {
@@ -14508,7 +14528,7 @@ var runConversationTurn = async (opts) => {
14508
14528
  latestRunId,
14509
14529
  contextTokens: 0,
14510
14530
  contextWindow: 0,
14511
- harnessMessages: cancelHarnessMessages,
14531
+ harnessMessages: cancelHarnessMessages ?? reconstructTranscriptTail("cancelled"),
14512
14532
  toolResultArchive: opts.harness.getToolResultArchive(opts.conversationId)
14513
14533
  },
14514
14534
  { shouldRebuildCanonical: true }
@@ -14551,6 +14571,9 @@ var runConversationTurn = async (opts) => {
14551
14571
  }
14552
14572
  if (draft.assistantResponse.length > 0 || draft.toolTimeline.length > 0 || draft.sections.length > 0) {
14553
14573
  conversation.messages = buildMessages(false);
14574
+ conversation._harnessMessages = reconstructTranscriptTail(
14575
+ error instanceof Error ? `error \u2014 ${error.message}` : "error"
14576
+ );
14554
14577
  conversation.updatedAt = Date.now();
14555
14578
  await opts.conversationStore.update(conversation);
14556
14579
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.59.5",
3
+ "version": "0.59.7",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
package/src/harness.ts CHANGED
@@ -3208,7 +3208,14 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3208
3208
  totalOutputTokens += usage.outputTokens ?? 0;
3209
3209
  totalCachedTokens += stepCachedTokens;
3210
3210
  totalCacheWriteTokens += stepCacheWriteTokens;
3211
- latestContextTokens = stepInputTokens;
3211
+ // Context size = EVERYTHING the model read this step. With prompt
3212
+ // caching, Anthropic's `usage.input_tokens` is only the non-cached
3213
+ // slice — the bulk of a long conversation arrives as cache reads.
3214
+ // Counting input alone made the auto-compaction check see ~12k of
3215
+ // "context" on a real 190k+ conversation, so compaction never fired
3216
+ // and the transcript grew unbounded (observed 2026-06-12: 205k real
3217
+ // context, trigger at 190k, no compaction).
3218
+ latestContextTokens = stepInputTokens + stepCachedTokens + stepCacheWriteTokens;
3212
3219
  toolOutputEstimateSinceModel = 0;
3213
3220
 
3214
3221
  yield pushEvent({
@@ -9,6 +9,12 @@ import {
9
9
  export type ModelProviderFactory = (modelName: string) => LanguageModel;
10
10
 
11
11
  const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
12
+ // Pinned conservatively at 200k. The API has accepted >204k for fable-5
13
+ // (its real window is larger), but compacting at trigger×200k keeps
14
+ // long-conversation cost bounded; raise deliberately, not by omission.
15
+ "claude-fable-5": 200_000,
16
+ "claude-opus-4-8": 200_000,
17
+ "claude-opus-4-7": 200_000,
12
18
  "claude-opus-4-6": 200_000,
13
19
  "claude-sonnet-4-6": 200_000,
14
20
  "claude-opus-4-5": 200_000,
@@ -420,6 +420,31 @@ export const runConversationTurn = async (
420
420
  };
421
421
  } catch (error) {
422
422
  flushTurnDraft(draft);
423
+
424
+ // The LLM transcript (`_harnessMessages`) is normally only written at a
425
+ // clean finalize / a cancel that delivered `run:cancelled.messages`. A
426
+ // turn that dies any other way (in-process error, abort that never
427
+ // surfaced the cancel event) would leave the transcript WITHOUT this
428
+ // turn at all — the display shows the partial work but the model has
429
+ // amnesia about the whole interaction on the next turn. Reconstruct a
430
+ // faithful plain-text record from the draft instead: the user message
431
+ // plus an assistant message carrying the text-so-far + tool activity.
432
+ // Plain text on purpose — replaying real tool_use blocks would need
433
+ // paired results or the next API call 400s on the dangling pair.
434
+ const reconstructTranscriptTail = (reason: string): Message[] => {
435
+ const parts: string[] = [];
436
+ if (draft.assistantResponse.length > 0) parts.push(draft.assistantResponse);
437
+ if (draft.toolTimeline.length > 0) {
438
+ parts.push(`Tool activity before interruption:\n${draft.toolTimeline.join("\n")}`);
439
+ }
440
+ parts.push(`[This turn was interrupted: ${reason}. The work above may be incomplete.]`);
441
+ return [
442
+ ...(conversation._harnessMessages ?? []),
443
+ userMessage,
444
+ { role: "assistant" as const, content: parts.join("\n\n") },
445
+ ];
446
+ };
447
+
423
448
  const aborted = opts.abortSignal?.aborted === true;
424
449
  if (aborted || runCancelled) {
425
450
  if (
@@ -434,7 +459,8 @@ export const runConversationTurn = async (
434
459
  latestRunId,
435
460
  contextTokens: 0,
436
461
  contextWindow: 0,
437
- harnessMessages: cancelHarnessMessages,
462
+ harnessMessages:
463
+ cancelHarnessMessages ?? reconstructTranscriptTail("cancelled"),
438
464
  toolResultArchive: opts.harness.getToolResultArchive(opts.conversationId),
439
465
  },
440
466
  { shouldRebuildCanonical: true },
@@ -484,6 +510,12 @@ export const runConversationTurn = async (
484
510
  draft.sections.length > 0
485
511
  ) {
486
512
  conversation.messages = buildMessages(false); // terminal: errored
513
+ // Keep the LLM transcript faithful too (see reconstructTranscriptTail
514
+ // above) — without this, the next turn's model context skipped the
515
+ // whole errored interaction.
516
+ conversation._harnessMessages = reconstructTranscriptTail(
517
+ error instanceof Error ? `error — ${error.message}` : "error",
518
+ );
487
519
  conversation.updatedAt = Date.now();
488
520
  await opts.conversationStore.update(conversation);
489
521
  }