@poncho-ai/harness 0.58.0 → 0.59.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.58.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.59.1 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,9 +8,9 @@
8
8
  CLI tsup v8.5.1
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
- ESM dist/index.js 567.32 KB
12
11
  ESM dist/isolate-F2PPSUL6.js 53.82 KB
13
- ESM ⚡️ Build success in 177ms
12
+ ESM dist/index.js 567.42 KB
13
+ ESM ⚡️ Build success in 233ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 7320ms
15
+ DTS ⚡️ Build success in 7196ms
16
16
  DTS dist/index.d.ts 104.68 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,43 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.59.1
4
+
5
+ ### Patch Changes
6
+
7
+ - [`299f574`](https://github.com/cesr/poncho-ai/commit/299f574a2f2f0d4873f42bbcffdf604e9cc4c29c) Thanks [@cesr](https://github.com/cesr)! - Mark in-flight assistant drafts with `metadata.incomplete = true`.
8
+
9
+ The orchestrator's per-step draft persist (`persistDraft`) and the
10
+ approval/device checkpoint and continuation writes now stamp the trailing
11
+ assistant message `metadata.incomplete = true`; the three terminal writes
12
+ (normal finalize, cancelled, errored) clear it. This lets a consumer that
13
+ reconciles a persisted snapshot against a live event stream (e.g. a
14
+ WebSocket layer) strip the in-flight draft from the authoritative snapshot
15
+ and rebuild that turn from the event log instead — so the snapshot and the
16
+ replayed events never both carry the in-flight turn, eliminating
17
+ reconnect-time duplication. Additive + backwards-compatible: consumers that
18
+ ignore the flag are unaffected.
19
+
20
+ - Updated dependencies [[`299f574`](https://github.com/cesr/poncho-ai/commit/299f574a2f2f0d4873f42bbcffdf604e9cc4c29c)]:
21
+ - @poncho-ai/sdk@1.15.1
22
+
23
+ ## 0.59.0
24
+
25
+ ### Minor Changes
26
+
27
+ - [#157](https://github.com/cesr/poncho-ai/pull/157) [`3f65382`](https://github.com/cesr/poncho-ai/commit/3f653820c9e0c66a12b544842c1ad3ddefdfd4a6) Thanks [@cesr](https://github.com/cesr)! - storage: scope the entry read-cutover to pendingSubagentResults only
28
+
29
+ The append-only read rebuild now overrides ONLY `pendingSubagentResults`
30
+ from the entry log — the single conversation field with a write race (a
31
+ subagent finishing mid-turn vs. the parent turn's whole-blob write). Each
32
+ result is a race-free INSERT (subagent_result entry) and consumption is a
33
+ callback_started entry, so reading it from entries means the parent
34
+ clobbering the blob copy is harmless — that's the clobber-race kill.
35
+
36
+ Message history (`messages` / `_harnessMessages`) is written solely by the
37
+ serialized turn finalize and is never raced, so it stays on the blob
38
+ (known-good; far simpler than faithfully rebuilding the LLM transcript
39
+ from entries, which the callback path did not capture correctly).
40
+
3
41
  ## 0.58.0
4
42
 
5
43
  ### Minor Changes
package/dist/index.js CHANGED
@@ -2838,19 +2838,16 @@ function getPendingSubagentResults(entries) {
2838
2838
  }
2839
2839
  return entries.filter((e) => e.type === "subagent_result").filter((e) => !consumed.has(e.seq)).map((e) => e.result);
2840
2840
  }
2841
- var FULL_TRANSCRIPT_TAIL = 1e5;
2842
2841
  async function rebuildConversationFromEntries(conversation, readEntries) {
2843
2842
  if (process.env.PONCHO_READ_ENTRIES === "0") return conversation;
2844
2843
  try {
2845
2844
  const entries = await readEntries(conversation.conversationId);
2846
2845
  if (entries.length === 0) return conversation;
2847
- conversation._harnessMessages = buildLlmContext(entries);
2848
- conversation.messages = buildDisplaySnapshot(entries, FULL_TRANSCRIPT_TAIL).messages;
2849
2846
  conversation.pendingSubagentResults = getPendingSubagentResults(entries);
2850
2847
  return conversation;
2851
2848
  } catch (err) {
2852
2849
  entriesReadLog.warn(
2853
- `[entries-read] ${conversation.conversationId} rebuild failed, using blob: ${err instanceof Error ? err.message : String(err)}`
2850
+ `[entries-read] ${conversation.conversationId} pendingSubagentResults rebuild failed, using blob: ${err instanceof Error ? err.message : String(err)}`
2854
2851
  );
2855
2852
  return conversation;
2856
2853
  }
@@ -14480,7 +14477,7 @@ var runConversationTurn = async (opts) => {
14480
14477
  let runContinuationMessages;
14481
14478
  let cancelHarnessMessages;
14482
14479
  let checkpointedRun = false;
14483
- const buildMessages = () => {
14480
+ const buildMessages = (incomplete = true) => {
14484
14481
  const draftSections = cloneSections(draft.sections);
14485
14482
  if (draft.currentTools.length > 0) {
14486
14483
  draftSections.push({ type: "tools", content: [...draft.currentTools] });
@@ -14499,10 +14496,15 @@ var runConversationTurn = async (opts) => {
14499
14496
  {
14500
14497
  role: "assistant",
14501
14498
  content: draft.assistantResponse,
14502
- metadata: buildAssistantMetadata(draft, draftSections, {
14503
- id: assistantId,
14504
- timestamp: turnTimestamp
14505
- })
14499
+ metadata: {
14500
+ ...buildAssistantMetadata(draft, draftSections, {
14501
+ id: assistantId,
14502
+ timestamp: turnTimestamp
14503
+ }),
14504
+ // Only stamp the flag when true; finalize omits it so completed
14505
+ // assistants stay clean (no `incomplete: false` noise on the row).
14506
+ ...incomplete ? { incomplete: true } : {}
14507
+ }
14506
14508
  }
14507
14509
  ];
14508
14510
  };
@@ -14724,7 +14726,7 @@ var runConversationTurn = async (opts) => {
14724
14726
  flushTurnDraft(draft);
14725
14727
  latestRunId = execution.latestRunId || latestRunId;
14726
14728
  if (!checkpointedRun && !runContinuationMessages) {
14727
- conversation.messages = buildMessages();
14729
+ conversation.messages = buildMessages(false);
14728
14730
  applyTurnMetadata(
14729
14731
  conversation,
14730
14732
  {
@@ -14782,7 +14784,7 @@ var runConversationTurn = async (opts) => {
14782
14784
  const aborted = opts.abortSignal?.aborted === true;
14783
14785
  if (aborted || runCancelled) {
14784
14786
  if (draft.assistantResponse.length > 0 || draft.toolTimeline.length > 0 || draft.sections.length > 0) {
14785
- conversation.messages = buildMessages();
14787
+ conversation.messages = buildMessages(false);
14786
14788
  applyTurnMetadata(
14787
14789
  conversation,
14788
14790
  {
@@ -14831,7 +14833,7 @@ var runConversationTurn = async (opts) => {
14831
14833
  }
14832
14834
  }
14833
14835
  if (draft.assistantResponse.length > 0 || draft.toolTimeline.length > 0 || draft.sections.length > 0) {
14834
- conversation.messages = buildMessages();
14836
+ conversation.messages = buildMessages(false);
14835
14837
  conversation.updatedAt = Date.now();
14836
14838
  await opts.conversationStore.update(conversation);
14837
14839
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.58.0",
3
+ "version": "0.59.1",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
@@ -34,7 +34,7 @@
34
34
  "mustache": "^4.2.0",
35
35
  "yaml": "^2.4.0",
36
36
  "zod": "^3.22.0",
37
- "@poncho-ai/sdk": "1.15.0"
37
+ "@poncho-ai/sdk": "1.15.1"
38
38
  },
39
39
  "peerDependencies": {
40
40
  "esbuild": ">=0.17.0",
@@ -157,7 +157,15 @@ export const runConversationTurn = async (
157
157
  let cancelHarnessMessages: Message[] | undefined;
158
158
  let checkpointedRun = false;
159
159
 
160
- const buildMessages = (): Message[] => {
160
+ // `incomplete: true` (the default) marks the trailing assistant message as
161
+ // an in-flight DRAFT — content for a turn that hasn't finished. A consumer
162
+ // (e.g. PonchOS's WS snapshot) uses this to strip the draft from the
163
+ // authoritative snapshot: the in-flight turn is delivered by the event
164
+ // stream instead, so the snapshot and the event log never both carry it
165
+ // (no reconnect duplication). The three TERMINAL writes (normal finalize,
166
+ // cancelled, errored) pass `incomplete: false` — at that point the turn is
167
+ // done and the assistant message is authoritative.
168
+ const buildMessages = (incomplete = true): Message[] => {
161
169
  const draftSections = cloneSections(draft.sections);
162
170
  if (draft.currentTools.length > 0) {
163
171
  draftSections.push({ type: "tools", content: [...draft.currentTools] });
@@ -179,10 +187,15 @@ export const runConversationTurn = async (
179
187
  {
180
188
  role: "assistant" as const,
181
189
  content: draft.assistantResponse,
182
- metadata: buildAssistantMetadata(draft, draftSections, {
183
- id: assistantId,
184
- timestamp: turnTimestamp,
185
- }),
190
+ metadata: {
191
+ ...buildAssistantMetadata(draft, draftSections, {
192
+ id: assistantId,
193
+ timestamp: turnTimestamp,
194
+ }),
195
+ // Only stamp the flag when true; finalize omits it so completed
196
+ // assistants stay clean (no `incomplete: false` noise on the row).
197
+ ...(incomplete ? { incomplete: true } : {}),
198
+ },
186
199
  },
187
200
  ];
188
201
  };
@@ -442,7 +455,7 @@ export const runConversationTurn = async (
442
455
  latestRunId = execution.latestRunId || latestRunId;
443
456
 
444
457
  if (!checkpointedRun && !runContinuationMessages) {
445
- conversation.messages = buildMessages();
458
+ conversation.messages = buildMessages(false); // terminal: turn complete
446
459
  applyTurnMetadata(
447
460
  conversation,
448
461
  {
@@ -515,7 +528,7 @@ export const runConversationTurn = async (
515
528
  draft.toolTimeline.length > 0 ||
516
529
  draft.sections.length > 0
517
530
  ) {
518
- conversation.messages = buildMessages();
531
+ conversation.messages = buildMessages(false); // terminal: cancelled
519
532
  applyTurnMetadata(
520
533
  conversation,
521
534
  {
@@ -571,7 +584,7 @@ export const runConversationTurn = async (
571
584
  draft.toolTimeline.length > 0 ||
572
585
  draft.sections.length > 0
573
586
  ) {
574
- conversation.messages = buildMessages();
587
+ conversation.messages = buildMessages(false); // terminal: errored
575
588
  conversation.updatedAt = Date.now();
576
589
  await opts.conversationStore.update(conversation);
577
590
  }
@@ -218,10 +218,6 @@ export function getPendingSubagentResults(
218
218
  .map((e) => e.result);
219
219
  }
220
220
 
221
- // A very large tail so the rebuilt display snapshot is the full transcript.
222
- // Display callers slice to whatever window they actually render.
223
- const FULL_TRANSCRIPT_TAIL = 100_000;
224
-
225
221
  /**
226
222
  * Phase 3c read cutover: rebuild a conversation's reader-facing fields from
227
223
  * the append-only entry log, with a blob fallback for conversations that
@@ -251,19 +247,25 @@ export async function rebuildConversationFromEntries(
251
247
  conversation: Conversation,
252
248
  readEntries: (conversationId: string) => Promise<ConversationEntry[]>,
253
249
  ): Promise<Conversation> {
254
- // Kill-switch: ON by default; PONCHO_READ_ENTRIES="0" reverts to blob reads.
250
+ // Targeted append-only: only `pendingSubagentResults` is read from the
251
+ // entry log, because it's the ONLY conversation field with a write race
252
+ // (a subagent finishing mid-turn vs. the parent turn's whole-blob write).
253
+ // The message history (`messages` / `_harnessMessages`) is written solely
254
+ // by the turn finalize, which the orchestrator serializes per
255
+ // conversation — never raced — so it stays on the blob (known-good, and
256
+ // far simpler than faithfully rebuilding the LLM transcript from entries).
257
+ //
258
+ // Kill-switch: ON by default; PONCHO_READ_ENTRIES="0" reverts to the blob.
255
259
  if (process.env.PONCHO_READ_ENTRIES === "0") return conversation;
256
260
 
257
261
  try {
258
262
  const entries = await readEntries(conversation.conversationId);
259
263
  if (entries.length === 0) return conversation; // fallback: pre-dual-write
260
- conversation._harnessMessages = buildLlmContext(entries);
261
- conversation.messages = buildDisplaySnapshot(entries, FULL_TRANSCRIPT_TAIL).messages;
262
264
  conversation.pendingSubagentResults = getPendingSubagentResults(entries);
263
265
  return conversation;
264
266
  } catch (err) {
265
267
  entriesReadLog.warn(
266
- `[entries-read] ${conversation.conversationId} rebuild failed, using blob: ${
268
+ `[entries-read] ${conversation.conversationId} pendingSubagentResults rebuild failed, using blob: ${
267
269
  err instanceof Error ? err.message : String(err)
268
270
  }`,
269
271
  );
@@ -1,96 +1,80 @@
1
1
  import { describe, it, expect, afterEach } from "vitest";
2
2
  import { InMemoryConversationStore } from "../src/state.js";
3
- import {
4
- buildLlmContext,
5
- buildDisplaySnapshot,
6
- type NewConversationEntry,
7
- } from "../src/storage/entries.js";
3
+ import type { NewConversationEntry } from "../src/storage/entries.js";
8
4
  import type { Message } from "@poncho-ai/sdk";
9
5
 
10
6
  const msg = (role: Message["role"], content: string): Message => ({ role, content });
11
7
 
12
- // A turn's worth of entries: a user display message, the harness (LLM
13
- // transcript) messages for that turn, and the final assistant bubble.
14
- function turnEntries(): NewConversationEntry[] {
8
+ // Targeted cutover: ONLY pendingSubagentResults is read from entries. Two
9
+ // subagent results; one later consumed by a callback_started entry.
10
+ function subagentEntries(): NewConversationEntry[] {
15
11
  return [
16
- { type: "user_message", id: "u1", message: msg("user", "hello"), turnId: "t1" },
17
- { type: "harness_message", id: "h1", message: msg("user", "hello"), turnId: "t1" },
18
- { type: "harness_message", id: "h2", message: msg("assistant", "hi there"), turnId: "t1" },
19
- {
20
- type: "assistant_message",
21
- id: "a1",
22
- message: msg("assistant", "hi there"),
23
- turnId: "t1",
24
- runId: "r1",
25
- },
12
+ { type: "subagent_result", id: "sr1", result: { subagentId: "s1", task: "a", status: "completed", timestamp: 1 } },
13
+ { type: "subagent_result", id: "sr2", result: { subagentId: "s2", task: "b", status: "completed", timestamp: 2 } },
26
14
  ];
27
15
  }
28
16
 
29
- describe("Phase 3c read cutover", () => {
17
+ describe("Phase 3 targeted read cutover (pendingSubagentResults only)", () => {
30
18
  const prevFlag = process.env.PONCHO_READ_ENTRIES;
31
-
32
19
  afterEach(() => {
33
20
  if (prevFlag === undefined) delete process.env.PONCHO_READ_ENTRIES;
34
21
  else process.env.PONCHO_READ_ENTRIES = prevFlag;
35
22
  });
36
23
 
37
- it("get() rebuilds _harnessMessages/messages from entries when present", async () => {
24
+ it("rebuilds pendingSubagentResults from entries, leaving message history on the blob", async () => {
38
25
  delete process.env.PONCHO_READ_ENTRIES; // ON by default
39
26
  const store = new InMemoryConversationStore();
40
27
  const conv = await store.create("owner", "title", null);
41
-
42
- // Seed the blob with stale messages so we can prove the override happened.
43
- conv.messages = [msg("assistant", "STALE BLOB")];
44
- conv._harnessMessages = [msg("assistant", "STALE BLOB HARNESS")];
28
+ // Blob message history must be preserved (never raced, stays authoritative).
29
+ conv.messages = [msg("user", "hi"), msg("assistant", "hello")];
30
+ conv._harnessMessages = [msg("user", "hi"), msg("assistant", "hello")];
31
+ conv.pendingSubagentResults = []; // stale blob value
45
32
  await store.update(conv);
46
33
 
47
- const entries = await store.appendEntries(conv.conversationId, "agent", null, turnEntries());
34
+ await store.appendEntries(conv.conversationId, "agent", null, subagentEntries());
48
35
 
49
36
  const loaded = await store.get(conv.conversationId);
50
37
  expect(loaded).toBeDefined();
51
- expect(loaded!._harnessMessages).toEqual(buildLlmContext(entries));
52
- expect(loaded!.messages).toEqual(buildDisplaySnapshot(entries, 100000).messages);
53
- // Display transcript drops the harness-only messages; keeps user + assistant bubble.
54
- expect(loaded!.messages.map((m) => m.content)).toEqual(["hello", "hi there"]);
38
+ // pendingSubagentResults comes from entries
39
+ expect(loaded!.pendingSubagentResults?.map((r) => r.subagentId)).toEqual(["s1", "s2"]);
40
+ // message history is UNTOUCHED (still the blob)
41
+ expect(loaded!.messages.map((m) => m.content)).toEqual(["hi", "hello"]);
42
+ expect(loaded!._harnessMessages?.map((m) => m.content)).toEqual(["hi", "hello"]);
55
43
  });
56
44
 
57
- it("get() falls back to the blob when there are no entries", async () => {
45
+ it("excludes results consumed by a callback_started entry", async () => {
58
46
  delete process.env.PONCHO_READ_ENTRIES;
59
47
  const store = new InMemoryConversationStore();
60
48
  const conv = await store.create("owner", "title", null);
61
- conv.messages = [msg("user", "blob only")];
62
- conv._harnessMessages = [msg("user", "blob only harness")];
63
- await store.update(conv);
49
+ const stored = await store.appendEntries(conv.conversationId, "agent", null, subagentEntries());
50
+ await store.appendEntries(conv.conversationId, "agent", null, [
51
+ { type: "callback_started", id: "cb1", consumedSeqs: [stored[0]!.seq] },
52
+ ]);
64
53
 
65
54
  const loaded = await store.get(conv.conversationId);
66
- expect(loaded!.messages).toEqual([msg("user", "blob only")]);
67
- expect(loaded!._harnessMessages).toEqual([msg("user", "blob only harness")]);
55
+ expect(loaded!.pendingSubagentResults?.map((r) => r.subagentId)).toEqual(["s2"]);
68
56
  });
69
57
 
70
- it("kill-switch PONCHO_READ_ENTRIES=0 reverts to blob reads even with entries", async () => {
71
- process.env.PONCHO_READ_ENTRIES = "0";
58
+ it("falls back to the blob pendingSubagentResults when there are no entries", async () => {
59
+ delete process.env.PONCHO_READ_ENTRIES;
72
60
  const store = new InMemoryConversationStore();
73
61
  const conv = await store.create("owner", "title", null);
74
- conv.messages = [msg("user", "blob wins")];
62
+ conv.pendingSubagentResults = [{ subagentId: "blob", task: "x", status: "completed", timestamp: 0 }];
75
63
  await store.update(conv);
76
- await store.appendEntries(conv.conversationId, "agent", null, turnEntries());
77
64
 
78
65
  const loaded = await store.get(conv.conversationId);
79
- expect(loaded!.messages).toEqual([msg("user", "blob wins")]);
66
+ expect(loaded!.pendingSubagentResults?.map((r) => r.subagentId)).toEqual(["blob"]);
80
67
  });
81
68
 
82
- it("get() does not mutate the stored blob conversation (clone)", async () => {
83
- delete process.env.PONCHO_READ_ENTRIES;
69
+ it("kill-switch PONCHO_READ_ENTRIES=0 reverts to blob even with entries", async () => {
70
+ process.env.PONCHO_READ_ENTRIES = "0";
84
71
  const store = new InMemoryConversationStore();
85
72
  const conv = await store.create("owner", "title", null);
86
- conv.messages = [msg("assistant", "STALE BLOB")];
73
+ conv.pendingSubagentResults = [{ subagentId: "blobwins", task: "x", status: "completed", timestamp: 0 }];
87
74
  await store.update(conv);
88
- await store.appendEntries(conv.conversationId, "agent", null, turnEntries());
75
+ await store.appendEntries(conv.conversationId, "agent", null, subagentEntries());
89
76
 
90
- await store.get(conv.conversationId);
91
- // Re-read with the kill-switch on: should still see the untouched blob.
92
- process.env.PONCHO_READ_ENTRIES = "0";
93
- const blob = await store.get(conv.conversationId);
94
- expect(blob!.messages).toEqual([msg("assistant", "STALE BLOB")]);
77
+ const loaded = await store.get(conv.conversationId);
78
+ expect(loaded!.pendingSubagentResults?.map((r) => r.subagentId)).toEqual(["blobwins"]);
95
79
  });
96
80
  });