@poncho-ai/harness 0.59.5 → 0.59.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -4
- package/CHANGELOG.md +28 -0
- package/dist/index.js +25 -2
- package/package.json +1 -1
- package/src/harness.ts +8 -1
- package/src/model-factory.ts +6 -0
- package/src/orchestrator/run-conversation-turn.ts +33 -1
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @poncho-ai/harness@0.59.
|
|
2
|
+
> @poncho-ai/harness@0.59.7 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
|
|
3
3
|
> node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
|
|
4
4
|
|
|
5
5
|
[embed-docs] Generated poncho-docs.ts with 4 topics
|
|
@@ -8,9 +8,9 @@
|
|
|
8
8
|
[34mCLI[39m tsup v8.5.1
|
|
9
9
|
[34mCLI[39m Target: es2022
|
|
10
10
|
[34mESM[39m Build start
|
|
11
|
-
[32mESM[39m [1mdist/index.js [22m[32m557.00 KB[39m
|
|
12
11
|
[32mESM[39m [1mdist/isolate-F2PPSUL6.js [22m[32m53.82 KB[39m
|
|
13
|
-
[32mESM[39m
|
|
12
|
+
[32mESM[39m [1mdist/index.js [22m[32m558.06 KB[39m
|
|
13
|
+
[32mESM[39m ⚡️ Build success in 257ms
|
|
14
14
|
[34mDTS[39m Build start
|
|
15
|
-
[32mDTS[39m ⚡️ Build success in
|
|
15
|
+
[32mDTS[39m ⚡️ Build success in 7680ms
|
|
16
16
|
[32mDTS[39m [1mdist/index.d.ts [22m[32m101.66 KB[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
# @poncho-ai/harness
|
|
2
2
|
|
|
3
|
+
## 0.59.7
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- [`c73cb19`](https://github.com/cesr/poncho-ai/commit/c73cb19ec8bf61fe0598262ae4d050fb84c939b5) Thanks [@cesr](https://github.com/cesr)! - Auto-compaction never fired on cached conversations: the per-step context
|
|
8
|
+
measure (`latestContextTokens`) used `usage.inputTokens`, which with
|
|
9
|
+
Anthropic prompt caching is only the NON-cached slice — a real 190k+
|
|
10
|
+
conversation reported ~12k of "context", so the trigger comparison never
|
|
11
|
+
tripped and transcripts grew past the model's window. Context now counts
|
|
12
|
+
input + cache-read + cache-write tokens (everything the model read). Also
|
|
13
|
+
pins claude-fable-5 / opus-4-8 / opus-4-7 in the context-window registry
|
|
14
|
+
(previously relying on the silent 200k default).
|
|
15
|
+
|
|
16
|
+
## 0.59.6
|
|
17
|
+
|
|
18
|
+
### Patch Changes
|
|
19
|
+
|
|
20
|
+
- [`e573f72`](https://github.com/cesr/poncho-ai/commit/e573f72ca31627e48dbdbf296946a72c59a488db) Thanks [@cesr](https://github.com/cesr)! - Preserve the LLM transcript when a turn dies. The errored branch of
|
|
21
|
+
runConversationTurn persisted only the display draft — `_harnessMessages`
|
|
22
|
+
was never updated, so the model's next turn had no memory of the entire
|
|
23
|
+
failed interaction (its user message included), even though the user could
|
|
24
|
+
see it on screen. Both the errored branch and the cancelled-without-
|
|
25
|
+
`run:cancelled.messages` fallback now append a faithful plain-text
|
|
26
|
+
reconstruction (user message + assistant text-so-far + tool activity + an
|
|
27
|
+
interruption note) to the transcript. Plain text on purpose: replaying real
|
|
28
|
+
tool_use blocks would need paired results or the next API call rejects the
|
|
29
|
+
dangling pair.
|
|
30
|
+
|
|
3
31
|
## 0.59.5
|
|
4
32
|
|
|
5
33
|
### Patch Changes
|
package/dist/index.js
CHANGED
|
@@ -7490,6 +7490,12 @@ var completeOpenAICodexDeviceAuth = async (request) => {
|
|
|
7490
7490
|
|
|
7491
7491
|
// src/model-factory.ts
|
|
7492
7492
|
var MODEL_CONTEXT_WINDOWS = {
|
|
7493
|
+
// Pinned conservatively at 200k. The API has accepted >204k for fable-5
|
|
7494
|
+
// (its real window is larger), but compacting at trigger×200k keeps
|
|
7495
|
+
// long-conversation cost bounded; raise deliberately, not by omission.
|
|
7496
|
+
"claude-fable-5": 2e5,
|
|
7497
|
+
"claude-opus-4-8": 2e5,
|
|
7498
|
+
"claude-opus-4-7": 2e5,
|
|
7493
7499
|
"claude-opus-4-6": 2e5,
|
|
7494
7500
|
"claude-sonnet-4-6": 2e5,
|
|
7495
7501
|
"claude-opus-4-5": 2e5,
|
|
@@ -11362,7 +11368,7 @@ ${textContent}` };
|
|
|
11362
11368
|
totalOutputTokens += usage.outputTokens ?? 0;
|
|
11363
11369
|
totalCachedTokens += stepCachedTokens;
|
|
11364
11370
|
totalCacheWriteTokens += stepCacheWriteTokens;
|
|
11365
|
-
latestContextTokens = stepInputTokens;
|
|
11371
|
+
latestContextTokens = stepInputTokens + stepCachedTokens + stepCacheWriteTokens;
|
|
11366
11372
|
toolOutputEstimateSinceModel = 0;
|
|
11367
11373
|
yield pushEvent({
|
|
11368
11374
|
type: "model:response",
|
|
@@ -14498,6 +14504,20 @@ var runConversationTurn = async (opts) => {
|
|
|
14498
14504
|
};
|
|
14499
14505
|
} catch (error) {
|
|
14500
14506
|
flushTurnDraft(draft);
|
|
14507
|
+
const reconstructTranscriptTail = (reason) => {
|
|
14508
|
+
const parts = [];
|
|
14509
|
+
if (draft.assistantResponse.length > 0) parts.push(draft.assistantResponse);
|
|
14510
|
+
if (draft.toolTimeline.length > 0) {
|
|
14511
|
+
parts.push(`Tool activity before interruption:
|
|
14512
|
+
${draft.toolTimeline.join("\n")}`);
|
|
14513
|
+
}
|
|
14514
|
+
parts.push(`[This turn was interrupted: ${reason}. The work above may be incomplete.]`);
|
|
14515
|
+
return [
|
|
14516
|
+
...conversation._harnessMessages ?? [],
|
|
14517
|
+
userMessage,
|
|
14518
|
+
{ role: "assistant", content: parts.join("\n\n") }
|
|
14519
|
+
];
|
|
14520
|
+
};
|
|
14501
14521
|
const aborted = opts.abortSignal?.aborted === true;
|
|
14502
14522
|
if (aborted || runCancelled) {
|
|
14503
14523
|
if (draft.assistantResponse.length > 0 || draft.toolTimeline.length > 0 || draft.sections.length > 0) {
|
|
@@ -14508,7 +14528,7 @@ var runConversationTurn = async (opts) => {
|
|
|
14508
14528
|
latestRunId,
|
|
14509
14529
|
contextTokens: 0,
|
|
14510
14530
|
contextWindow: 0,
|
|
14511
|
-
harnessMessages: cancelHarnessMessages,
|
|
14531
|
+
harnessMessages: cancelHarnessMessages ?? reconstructTranscriptTail("cancelled"),
|
|
14512
14532
|
toolResultArchive: opts.harness.getToolResultArchive(opts.conversationId)
|
|
14513
14533
|
},
|
|
14514
14534
|
{ shouldRebuildCanonical: true }
|
|
@@ -14551,6 +14571,9 @@ var runConversationTurn = async (opts) => {
|
|
|
14551
14571
|
}
|
|
14552
14572
|
if (draft.assistantResponse.length > 0 || draft.toolTimeline.length > 0 || draft.sections.length > 0) {
|
|
14553
14573
|
conversation.messages = buildMessages(false);
|
|
14574
|
+
conversation._harnessMessages = reconstructTranscriptTail(
|
|
14575
|
+
error instanceof Error ? `error \u2014 ${error.message}` : "error"
|
|
14576
|
+
);
|
|
14554
14577
|
conversation.updatedAt = Date.now();
|
|
14555
14578
|
await opts.conversationStore.update(conversation);
|
|
14556
14579
|
}
|
package/package.json
CHANGED
package/src/harness.ts
CHANGED
|
@@ -3208,7 +3208,14 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
3208
3208
|
totalOutputTokens += usage.outputTokens ?? 0;
|
|
3209
3209
|
totalCachedTokens += stepCachedTokens;
|
|
3210
3210
|
totalCacheWriteTokens += stepCacheWriteTokens;
|
|
3211
|
-
|
|
3211
|
+
// Context size = EVERYTHING the model read this step. With prompt
|
|
3212
|
+
// caching, Anthropic's `usage.input_tokens` is only the non-cached
|
|
3213
|
+
// slice — the bulk of a long conversation arrives as cache reads.
|
|
3214
|
+
// Counting input alone made the auto-compaction check see ~12k of
|
|
3215
|
+
// "context" on a real 190k+ conversation, so compaction never fired
|
|
3216
|
+
// and the transcript grew unbounded (observed 2026-06-12: 205k real
|
|
3217
|
+
// context, trigger at 190k, no compaction).
|
|
3218
|
+
latestContextTokens = stepInputTokens + stepCachedTokens + stepCacheWriteTokens;
|
|
3212
3219
|
toolOutputEstimateSinceModel = 0;
|
|
3213
3220
|
|
|
3214
3221
|
yield pushEvent({
|
package/src/model-factory.ts
CHANGED
|
@@ -9,6 +9,12 @@ import {
|
|
|
9
9
|
export type ModelProviderFactory = (modelName: string) => LanguageModel;
|
|
10
10
|
|
|
11
11
|
const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
|
|
12
|
+
// Pinned conservatively at 200k. The API has accepted >204k for fable-5
|
|
13
|
+
// (its real window is larger), but compacting at trigger×200k keeps
|
|
14
|
+
// long-conversation cost bounded; raise deliberately, not by omission.
|
|
15
|
+
"claude-fable-5": 200_000,
|
|
16
|
+
"claude-opus-4-8": 200_000,
|
|
17
|
+
"claude-opus-4-7": 200_000,
|
|
12
18
|
"claude-opus-4-6": 200_000,
|
|
13
19
|
"claude-sonnet-4-6": 200_000,
|
|
14
20
|
"claude-opus-4-5": 200_000,
|
|
@@ -420,6 +420,31 @@ export const runConversationTurn = async (
|
|
|
420
420
|
};
|
|
421
421
|
} catch (error) {
|
|
422
422
|
flushTurnDraft(draft);
|
|
423
|
+
|
|
424
|
+
// The LLM transcript (`_harnessMessages`) is normally only written at a
|
|
425
|
+
// clean finalize / a cancel that delivered `run:cancelled.messages`. A
|
|
426
|
+
// turn that dies any other way (in-process error, abort that never
|
|
427
|
+
// surfaced the cancel event) would leave the transcript WITHOUT this
|
|
428
|
+
// turn at all — the display shows the partial work but the model has
|
|
429
|
+
// amnesia about the whole interaction on the next turn. Reconstruct a
|
|
430
|
+
// faithful plain-text record from the draft instead: the user message
|
|
431
|
+
// plus an assistant message carrying the text-so-far + tool activity.
|
|
432
|
+
// Plain text on purpose — replaying real tool_use blocks would need
|
|
433
|
+
// paired results or the next API call 400s on the dangling pair.
|
|
434
|
+
const reconstructTranscriptTail = (reason: string): Message[] => {
|
|
435
|
+
const parts: string[] = [];
|
|
436
|
+
if (draft.assistantResponse.length > 0) parts.push(draft.assistantResponse);
|
|
437
|
+
if (draft.toolTimeline.length > 0) {
|
|
438
|
+
parts.push(`Tool activity before interruption:\n${draft.toolTimeline.join("\n")}`);
|
|
439
|
+
}
|
|
440
|
+
parts.push(`[This turn was interrupted: ${reason}. The work above may be incomplete.]`);
|
|
441
|
+
return [
|
|
442
|
+
...(conversation._harnessMessages ?? []),
|
|
443
|
+
userMessage,
|
|
444
|
+
{ role: "assistant" as const, content: parts.join("\n\n") },
|
|
445
|
+
];
|
|
446
|
+
};
|
|
447
|
+
|
|
423
448
|
const aborted = opts.abortSignal?.aborted === true;
|
|
424
449
|
if (aborted || runCancelled) {
|
|
425
450
|
if (
|
|
@@ -434,7 +459,8 @@ export const runConversationTurn = async (
|
|
|
434
459
|
latestRunId,
|
|
435
460
|
contextTokens: 0,
|
|
436
461
|
contextWindow: 0,
|
|
437
|
-
harnessMessages:
|
|
462
|
+
harnessMessages:
|
|
463
|
+
cancelHarnessMessages ?? reconstructTranscriptTail("cancelled"),
|
|
438
464
|
toolResultArchive: opts.harness.getToolResultArchive(opts.conversationId),
|
|
439
465
|
},
|
|
440
466
|
{ shouldRebuildCanonical: true },
|
|
@@ -484,6 +510,12 @@ export const runConversationTurn = async (
|
|
|
484
510
|
draft.sections.length > 0
|
|
485
511
|
) {
|
|
486
512
|
conversation.messages = buildMessages(false); // terminal: errored
|
|
513
|
+
// Keep the LLM transcript faithful too (see reconstructTranscriptTail
|
|
514
|
+
// above) — without this, the next turn's model context skipped the
|
|
515
|
+
// whole errored interaction.
|
|
516
|
+
conversation._harnessMessages = reconstructTranscriptTail(
|
|
517
|
+
error instanceof Error ? `error — ${error.message}` : "error",
|
|
518
|
+
);
|
|
487
519
|
conversation.updatedAt = Date.now();
|
|
488
520
|
await opts.conversationStore.update(conversation);
|
|
489
521
|
}
|