@poncho-ai/harness 0.59.6 → 0.59.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.59.6 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.59.8 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -9,8 +9,8 @@
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
11
  ESM dist/isolate-F2PPSUL6.js 53.82 KB
12
- ESM dist/index.js 557.73 KB
13
- ESM ⚡️ Build success in 256ms
12
+ ESM dist/index.js 558.23 KB
13
+ ESM ⚡️ Build success in 235ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 8276ms
15
+ DTS ⚡️ Build success in 8463ms
16
16
  DTS dist/index.d.ts 101.66 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,29 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.59.8
4
+
5
+ ### Patch Changes
6
+
7
+ - [`fb07954`](https://github.com/cesr/poncho-ai/commit/fb07954ee7edfa614bdd5ed27474f4d3be7c8f1f) Thanks [@cesr](https://github.com/cesr)! - Fix conversations.rename on Postgres: the JSONB `data` column usually
8
+ holds a JSON-encoded string scalar (update() binds JSON.stringify output),
9
+ so the 0.59.3 in-blob title update threw `cannot set path in scalar` and
10
+ every rename 500'd. The UPDATE now branches on jsonb_typeof(data) and
11
+ preserves each row's encoding (objects via jsonb_set; string scalars
12
+ unwrapped, set, and re-serialized).
13
+
14
+ ## 0.59.7
15
+
16
+ ### Patch Changes
17
+
18
+ - [`c73cb19`](https://github.com/cesr/poncho-ai/commit/c73cb19ec8bf61fe0598262ae4d050fb84c939b5) Thanks [@cesr](https://github.com/cesr)! - Auto-compaction never fired on cached conversations: the per-step context
19
+ measure (`latestContextTokens`) used `usage.inputTokens`, which with
20
+ Anthropic prompt caching is only the NON-cached slice — a real 190k+
21
+ conversation reported ~12k of "context", so the trigger comparison never
22
+ tripped and transcripts grew past the model's window. Context now counts
23
+ input + cache-read + cache-write tokens (everything the model read). Also
24
+ pins claude-fable-5 / opus-4-8 / opus-4-7 in the context-window registry
25
+ (previously relying on the silent 200k default).
26
+
3
27
  ## 0.59.6
4
28
 
5
29
  ### Patch Changes
package/dist/index.js CHANGED
@@ -3918,7 +3918,10 @@ var SqlStorageEngine = class {
3918
3918
  },
3919
3919
  rename: async (conversationId, title) => {
3920
3920
  const normalized = normalizeTitle2(title);
3921
- const dataExpr = this.dialect.tag === "sqlite" ? `json_set(data, '$.title', $2)` : `jsonb_set(data, '{title}', to_jsonb($2::text))`;
3921
+ const dataExpr = this.dialect.tag === "sqlite" ? `json_set(data, '$.title', $2)` : `CASE WHEN jsonb_typeof(data) = 'object'
3922
+ THEN jsonb_set(data, '{title}', to_jsonb($2::text))
3923
+ ELSE to_jsonb(jsonb_set((data #>> '{}')::jsonb, '{title}', to_jsonb($2::text))::text)
3924
+ END`;
3922
3925
  await this.executor.run(
3923
3926
  rewrite(
3924
3927
  `UPDATE conversations SET title = $1, data = ${dataExpr}, updated_at = $3 WHERE id = $4`,
@@ -7490,6 +7493,12 @@ var completeOpenAICodexDeviceAuth = async (request) => {
7490
7493
 
7491
7494
  // src/model-factory.ts
7492
7495
  var MODEL_CONTEXT_WINDOWS = {
7496
+ // Pinned conservatively at 200k. The API has accepted >204k for fable-5
7497
+ // (its real window is larger), but compacting at trigger×200k keeps
7498
+ // long-conversation cost bounded; raise deliberately, not by omission.
7499
+ "claude-fable-5": 2e5,
7500
+ "claude-opus-4-8": 2e5,
7501
+ "claude-opus-4-7": 2e5,
7493
7502
  "claude-opus-4-6": 2e5,
7494
7503
  "claude-sonnet-4-6": 2e5,
7495
7504
  "claude-opus-4-5": 2e5,
@@ -11362,7 +11371,7 @@ ${textContent}` };
11362
11371
  totalOutputTokens += usage.outputTokens ?? 0;
11363
11372
  totalCachedTokens += stepCachedTokens;
11364
11373
  totalCacheWriteTokens += stepCacheWriteTokens;
11365
- latestContextTokens = stepInputTokens;
11374
+ latestContextTokens = stepInputTokens + stepCachedTokens + stepCacheWriteTokens;
11366
11375
  toolOutputEstimateSinceModel = 0;
11367
11376
  yield pushEvent({
11368
11377
  type: "model:response",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.59.6",
3
+ "version": "0.59.8",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
package/src/harness.ts CHANGED
@@ -3208,7 +3208,14 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3208
3208
  totalOutputTokens += usage.outputTokens ?? 0;
3209
3209
  totalCachedTokens += stepCachedTokens;
3210
3210
  totalCacheWriteTokens += stepCacheWriteTokens;
3211
- latestContextTokens = stepInputTokens;
3211
+ // Context size = EVERYTHING the model read this step. With prompt
3212
+ // caching, Anthropic's `usage.input_tokens` is only the non-cached
3213
+ // slice — the bulk of a long conversation arrives as cache reads.
3214
+ // Counting input alone made the auto-compaction check see ~12k of
3215
+ // "context" on a real 190k+ conversation, so compaction never fired
3216
+ // and the transcript grew unbounded (observed 2026-06-12: 205k real
3217
+ // context, trigger at 190k, no compaction).
3218
+ latestContextTokens = stepInputTokens + stepCachedTokens + stepCacheWriteTokens;
3212
3219
  toolOutputEstimateSinceModel = 0;
3213
3220
 
3214
3221
  yield pushEvent({
@@ -9,6 +9,12 @@ import {
9
9
  export type ModelProviderFactory = (modelName: string) => LanguageModel;
10
10
 
11
11
  const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
12
+ // Pinned conservatively at 200k. The API has accepted >204k for fable-5
13
+ // (its real window is larger), but compacting at trigger×200k keeps
14
+ // long-conversation cost bounded; raise deliberately, not by omission.
15
+ "claude-fable-5": 200_000,
16
+ "claude-opus-4-8": 200_000,
17
+ "claude-opus-4-7": 200_000,
12
18
  "claude-opus-4-6": 200_000,
13
19
  "claude-sonnet-4-6": 200_000,
14
20
  "claude-opus-4-5": 200_000,
@@ -571,9 +571,21 @@ export abstract class SqlStorageEngine implements StorageEngine {
571
571
  // Distinct placeholders for the two title occurrences: rewrite()
572
572
  // converts $N → ? positionally for sqlite, so reusing $1 would
573
573
  // desync the param array.
574
+ //
575
+ // Postgres: the JSONB column usually holds a JSON-encoded STRING
576
+ // scalar, not an object — update() binds `JSON.stringify(conv)` and
577
+ // the driver serializes that JS string as a JSON string. A bare
578
+ // jsonb_set on those rows throws `cannot set path in scalar`
579
+ // (observed in prod 2026-06-12: every rename 500'd). Branch on the
580
+ // stored shape and preserve each row's encoding: objects get a plain
581
+ // jsonb_set; string scalars get unwrapped (#>> '{}'), parsed, set,
582
+ // and re-serialized back to a string scalar.
574
583
  const dataExpr = this.dialect.tag === "sqlite"
575
584
  ? `json_set(data, '$.title', $2)`
576
- : `jsonb_set(data, '{title}', to_jsonb($2::text))`;
585
+ : `CASE WHEN jsonb_typeof(data) = 'object'
586
+ THEN jsonb_set(data, '{title}', to_jsonb($2::text))
587
+ ELSE to_jsonb(jsonb_set((data #>> '{}')::jsonb, '{title}', to_jsonb($2::text))::text)
588
+ END`;
577
589
  await this.executor.run(
578
590
  rewrite(
579
591
  `UPDATE conversations SET title = $1, data = ${dataExpr}, updated_at = $3 WHERE id = $4`,