@poncho-ai/harness 0.59.9 → 0.59.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.59.9 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.59.11 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,9 +8,9 @@
8
8
  CLI tsup v8.5.1
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
- ESM dist/index.js 558.75 KB
12
11
  ESM dist/isolate-F2PPSUL6.js 53.82 KB
13
- ESM ⚡️ Build success in 233ms
12
+ ESM dist/index.js 560.62 KB
13
+ ESM ⚡️ Build success in 239ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 7974ms
15
+ DTS ⚡️ Build success in 7569ms
16
16
  DTS dist/index.d.ts 102.06 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.59.11
4
+
5
+ ### Patch Changes
6
+
7
+ - [`7464ad0`](https://github.com/cesr/poncho-ai/commit/7464ad04448095b34c4b1cbd52da559cf6bd6024) Thanks [@cesr](https://github.com/cesr)! - The user's memory file gets its own 1-hour Anthropic cache breakpoint.
8
+ It previously rode the uncached dynamic system tail (with todos + time),
9
+ which re-wrote the memory block — typically the bulk of a new
10
+ conversation's one-time cache cost — on every cold prefix, despite memory
11
+ only changing on explicit writes. System prompt is now three tiers:
12
+ static (1h), memory (1h), volatile todos+time (uncached).
13
+
14
+ ## 0.59.10
15
+
16
+ ### Patch Changes
17
+
18
+ - [`fad3918`](https://github.com/cesr/poncho-ai/commit/fad3918302114f76a29080cf28e9c003c61ef0d9) Thanks [@cesr](https://github.com/cesr)! - Stamp `session.id` / `user.id` on EVERY span, not just the invoke_agent
19
+ root. Observability backends resolve a span's identity from its own
20
+ attributes — Latitude's console session/conversation views key on the LLM
21
+ generation spans, so root-only attributes grouped the API-level trace but
22
+ left the console showing one session per turn and no user. The identity now
23
+ rides the OTel Context and an IdentityAttributeSpanProcessor injects it
24
+ into every descendant span (LLM steps, tool executions) at start.
25
+
3
26
  ## 0.59.9
4
27
 
5
28
  ### Patch Changes
package/dist/index.js CHANGED
@@ -8745,7 +8745,7 @@ var createSubagentTools = (manager) => [
8745
8745
  ];
8746
8746
 
8747
8747
  // src/harness.ts
8748
- import { trace, context as otelContext, SpanStatusCode, SpanKind, diag, DiagConsoleLogger, DiagLogLevel } from "@opentelemetry/api";
8748
+ import { trace, context as otelContext, createContextKey, SpanStatusCode, SpanKind, diag, DiagConsoleLogger, DiagLogLevel } from "@opentelemetry/api";
8749
8749
  import { NodeTracerProvider, BatchSpanProcessor } from "@opentelemetry/sdk-trace-node";
8750
8750
  import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
8751
8751
 
@@ -8928,6 +8928,28 @@ var telemetryLog2 = createLogger7("telemetry");
8928
8928
  var costLog = createLogger7("cost");
8929
8929
  var mcpLog2 = createLogger7("mcp");
8930
8930
  var modelLog = createLogger7("model");
8931
+ var TELEMETRY_SESSION_ID_KEY = createContextKey("poncho.telemetry.session_id");
8932
+ var TELEMETRY_USER_ID_KEY = createContextKey("poncho.telemetry.user_id");
8933
+ var IdentityAttributeSpanProcessor = class {
8934
+ onStart(span, parentContext) {
8935
+ const sessionId = parentContext.getValue(TELEMETRY_SESSION_ID_KEY);
8936
+ if (typeof sessionId === "string" && sessionId) {
8937
+ span.setAttribute("session.id", sessionId);
8938
+ }
8939
+ const userId = parentContext.getValue(TELEMETRY_USER_ID_KEY);
8940
+ if (typeof userId === "string" && userId) {
8941
+ span.setAttribute("user.id", userId);
8942
+ }
8943
+ }
8944
+ onEnd() {
8945
+ }
8946
+ forceFlush() {
8947
+ return Promise.resolve();
8948
+ }
8949
+ shutdown() {
8950
+ return Promise.resolve();
8951
+ }
8952
+ };
8931
8953
  function formatOtlpError(err) {
8932
8954
  if (!(err instanceof Error)) return String(err);
8933
8955
  const parts = [];
@@ -10339,7 +10361,9 @@ var AgentHarness = class _AgentHarness {
10339
10361
  const processor = new BatchSpanProcessor(exporter);
10340
10362
  this.otlpSpanProcessor = processor;
10341
10363
  const provider2 = new NodeTracerProvider({
10342
- spanProcessors: [processor]
10364
+ // Identity injector FIRST so every span (root, LLM steps, tool
10365
+ // executions) carries session.id/user.id before batching/export.
10366
+ spanProcessors: [new IdentityAttributeSpanProcessor(), processor]
10343
10367
  });
10344
10368
  provider2.register();
10345
10369
  this.otlpTracerProvider = provider2;
@@ -10506,7 +10530,13 @@ var AgentHarness = class _AgentHarness {
10506
10530
  ...input.tenantId ? { "tenant.id": input.tenantId } : {}
10507
10531
  }
10508
10532
  });
10509
- const spanContext = trace.setSpan(otelContext.active(), rootSpan);
10533
+ let spanContext = trace.setSpan(otelContext.active(), rootSpan);
10534
+ if (input.conversationId) {
10535
+ spanContext = spanContext.setValue(TELEMETRY_SESSION_ID_KEY, input.conversationId);
10536
+ }
10537
+ if (this.telemetryUserId) {
10538
+ spanContext = spanContext.setValue(TELEMETRY_USER_ID_KEY, this.telemetryUserId);
10539
+ }
10510
10540
  try {
10511
10541
  const gen = this.run(input);
10512
10542
  let next;
@@ -10690,11 +10720,11 @@ ${skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentP
10690
10720
  const timeContext = `
10691
10721
 
10692
10722
  Current UTC time (hour precision): ${hourlyTime}`;
10693
- const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
10694
- return { staticPart, dynamicPart };
10723
+ const dynamicPart = `${todoContext}${timeContext}`;
10724
+ return { staticPart, memoryPart: memoryContext, dynamicPart };
10695
10725
  };
10696
- let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts();
10697
- let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
10726
+ let { staticPart: staticSystemPart, memoryPart: memorySystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts();
10727
+ let systemPrompt = `${staticSystemPart}${memorySystemPart}${dynamicSystemPart}`;
10698
10728
  let lastPromptFingerprint = `${this.agentFileFingerprint}
10699
10729
  ${this.skillFingerprint}`;
10700
10730
  const pushEvent = (event) => {
@@ -11173,6 +11203,18 @@ ${textContent}` };
11173
11203
  anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } }
11174
11204
  }
11175
11205
  },
11206
+ // Memory: per-user, conversation-independent, changes only on
11207
+ // an explicit memory write — its own 1h breakpoint means a
11208
+ // memory edit busts THIS block forward but a normal turn reads
11209
+ // it (plus everything before it) from cache. Breakpoint budget:
11210
+ // Anthropic allows 4; this is #2 of 3 (static, memory, tail).
11211
+ ...memorySystemPart.length > 0 ? [{
11212
+ role: "system",
11213
+ content: memorySystemPart,
11214
+ providerOptions: {
11215
+ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } }
11216
+ }
11217
+ }] : [],
11176
11218
  ...dynamicSystemPart.length > 0 ? [{ role: "system", content: dynamicSystemPart }] : [],
11177
11219
  ...cachedMessages
11178
11220
  ] : cachedMessages;
@@ -11814,8 +11856,8 @@ ${textContent}` };
11814
11856
  const currentFingerprint = `${this.agentFileFingerprint}
11815
11857
  ${this.skillFingerprint}`;
11816
11858
  if (currentFingerprint !== lastPromptFingerprint) {
11817
- ({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts());
11818
- systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
11859
+ ({ staticPart: staticSystemPart, memoryPart: memorySystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts());
11860
+ systemPrompt = `${staticSystemPart}${memorySystemPart}${dynamicSystemPart}`;
11819
11861
  lastPromptFingerprint = currentFingerprint;
11820
11862
  }
11821
11863
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.59.9",
3
+ "version": "0.59.11",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
package/src/harness.ts CHANGED
@@ -67,9 +67,41 @@ import { createSkillTools, normalizeScriptPolicyPath } from "./skill-tools.js";
67
67
  import { createSearchTools } from "./search-tools.js";
68
68
  import { createSubagentTools } from "./subagent-tools.js";
69
69
  import type { SubagentManager } from "./subagent-manager.js";
70
- import { trace, context as otelContext, SpanStatusCode, SpanKind, diag, DiagConsoleLogger, DiagLogLevel } from "@opentelemetry/api";
70
+ import { trace, context as otelContext, createContextKey, type Context as OtelContextType, SpanStatusCode, SpanKind, diag, DiagConsoleLogger, DiagLogLevel } from "@opentelemetry/api";
71
+ import type { Span as OtelSdkSpan, SpanProcessor } from "@opentelemetry/sdk-trace-node";
71
72
  import { NodeTracerProvider, BatchSpanProcessor } from "@opentelemetry/sdk-trace-node";
72
73
  import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
74
+
75
+ // ── Telemetry identity propagation ──────────────────────────────────────────
76
+ // Observability backends (Latitude) resolve a span's session/user from the
77
+ // span's OWN attributes — the console's session & conversation views key on
78
+ // the LLM generation spans, not the root span. So stamping `session.id` /
79
+ // `user.id` only on the invoke_agent root groups the API-level trace but
80
+ // leaves the console treating every turn as its own session. The fix is the
81
+ // same one vendor SDKs use: carry the identity in the OTel Context and have
82
+ // a SpanProcessor stamp it onto EVERY span at start.
83
+ const TELEMETRY_SESSION_ID_KEY = createContextKey("poncho.telemetry.session_id");
84
+ const TELEMETRY_USER_ID_KEY = createContextKey("poncho.telemetry.user_id");
85
+
86
+ class IdentityAttributeSpanProcessor implements SpanProcessor {
87
+ onStart(span: OtelSdkSpan, parentContext: OtelContextType): void {
88
+ const sessionId = parentContext.getValue(TELEMETRY_SESSION_ID_KEY);
89
+ if (typeof sessionId === "string" && sessionId) {
90
+ span.setAttribute("session.id", sessionId);
91
+ }
92
+ const userId = parentContext.getValue(TELEMETRY_USER_ID_KEY);
93
+ if (typeof userId === "string" && userId) {
94
+ span.setAttribute("user.id", userId);
95
+ }
96
+ }
97
+ onEnd(): void {}
98
+ forceFlush(): Promise<void> {
99
+ return Promise.resolve();
100
+ }
101
+ shutdown(): Promise<void> {
102
+ return Promise.resolve();
103
+ }
104
+ }
73
105
  import { normalizeOtlp } from "./telemetry.js";
74
106
 
75
107
  /** Extract useful details from OTLPExporterError (has .code + .data) or plain Error. */
@@ -1883,7 +1915,9 @@ export class AgentHarness {
1883
1915
  const processor = new BatchSpanProcessor(exporter);
1884
1916
  this.otlpSpanProcessor = processor;
1885
1917
  const provider = new NodeTracerProvider({
1886
- spanProcessors: [processor],
1918
+ // Identity injector FIRST so every span (root, LLM steps, tool
1919
+ // executions) carries session.id/user.id before batching/export.
1920
+ spanProcessors: [new IdentityAttributeSpanProcessor(), processor],
1887
1921
  });
1888
1922
  provider.register();
1889
1923
  this.otlpTracerProvider = provider;
@@ -2074,7 +2108,15 @@ export class AgentHarness {
2074
2108
  },
2075
2109
  });
2076
2110
 
2077
- const spanContext = trace.setSpan(otelContext.active(), rootSpan);
2111
+ let spanContext = trace.setSpan(otelContext.active(), rootSpan);
2112
+ // Identity rides the context so IdentityAttributeSpanProcessor stamps
2113
+ // session.id/user.id on every descendant span (see processor docs).
2114
+ if (input.conversationId) {
2115
+ spanContext = spanContext.setValue(TELEMETRY_SESSION_ID_KEY, input.conversationId);
2116
+ }
2117
+ if (this.telemetryUserId) {
2118
+ spanContext = spanContext.setValue(TELEMETRY_USER_ID_KEY, this.telemetryUserId);
2119
+ }
2078
2120
 
2079
2121
  try {
2080
2122
  const gen = this.run(input);
@@ -2289,13 +2331,17 @@ ${typeStubs}
2289
2331
  Code is wrapped in an async IIFE — use \`return\` to return a value to the tool result.`;
2290
2332
  }
2291
2333
 
2292
- // Split the system prompt into a static portion (stable across turns
2293
- // and jobs within an hour, modulo MCP connect/skill author/memory edit)
2294
- // and a dynamic tail (memory, todos, time). The static portion gets a
2295
- // 1-hour Anthropic cache breakpoint downstream; the tail rides the
2296
- // existing 5-min message-level breakpoint. See the streamText site for
2297
- // the breakpoint wiring.
2298
- const buildSystemPromptParts = async (): Promise<{ staticPart: string; dynamicPart: string }> => {
2334
+ // Split the system prompt into THREE cacheability tiers (see the
2335
+ // streamText site for the breakpoint wiring):
2336
+ // 1. staticPart agent body + skills + runtime context. Stable across
2337
+ // turns, conversations, and jobs within an hour. 1h breakpoint.
2338
+ // 2. memoryPart the user's memory file. Per-user, shared by every
2339
+ // conversation, and only changes on an explicit memory write — so
2340
+ // it gets its own 1h breakpoint instead of riding the volatile
2341
+ // tail (where it busted the message-history cache for no reason).
2342
+ // 3. dynamicPart — todos + hour-quantized time. Genuinely volatile
2343
+ // within a conversation; uncached, kept as small as possible.
2344
+ const buildSystemPromptParts = async (): Promise<{ staticPart: string; memoryPart: string; dynamicPart: string }> => {
2299
2345
  const agentPrompt = renderCurrentAgentPrompt();
2300
2346
  const tenantSkills = await this.getSkillsForTenant(input.tenantId);
2301
2347
  const skillContextWindow = buildSkillContextWindow(tenantSkills);
@@ -2316,13 +2362,13 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2316
2362
  return `${weekday} ${d.toISOString().slice(0, 13)}Z`;
2317
2363
  })();
2318
2364
  const timeContext = `\n\nCurrent UTC time (hour precision): ${hourlyTime}`;
2319
- const dynamicPart = `${memoryContext}${todoContext}${timeContext}`;
2320
- return { staticPart, dynamicPart };
2365
+ const dynamicPart = `${todoContext}${timeContext}`;
2366
+ return { staticPart, memoryPart: memoryContext, dynamicPart };
2321
2367
  };
2322
- let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
2368
+ let { staticPart: staticSystemPart, memoryPart: memorySystemPart, dynamicPart: dynamicSystemPart } =
2323
2369
  await buildSystemPromptParts();
2324
2370
  // Concatenated form for legacy consumers (token estimation, telemetry).
2325
- let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
2371
+ let systemPrompt = `${staticSystemPart}${memorySystemPart}${dynamicSystemPart}`;
2326
2372
  let lastPromptFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
2327
2373
 
2328
2374
  const pushEvent = (event: AgentEvent): AgentEvent => {
@@ -2966,6 +3012,20 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2966
3012
  anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } },
2967
3013
  },
2968
3014
  },
3015
+ // Memory: per-user, conversation-independent, changes only on
3016
+ // an explicit memory write — its own 1h breakpoint means a
3017
+ // memory edit busts THIS block forward but a normal turn reads
3018
+ // it (plus everything before it) from cache. Breakpoint budget:
3019
+ // Anthropic allows 4; this is #2 of 3 (static, memory, tail).
3020
+ ...(memorySystemPart.length > 0
3021
+ ? [{
3022
+ role: "system" as const,
3023
+ content: memorySystemPart,
3024
+ providerOptions: {
3025
+ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } },
3026
+ },
3027
+ }]
3028
+ : []),
2969
3029
  ...(dynamicSystemPart.length > 0
2970
3030
  ? [{ role: "system" as const, content: dynamicSystemPart }]
2971
3031
  : []),
@@ -3752,9 +3812,9 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3752
3812
  agent = this.parsedAgent as ParsedAgent;
3753
3813
  const currentFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
3754
3814
  if (currentFingerprint !== lastPromptFingerprint) {
3755
- ({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
3815
+ ({ staticPart: staticSystemPart, memoryPart: memorySystemPart, dynamicPart: dynamicSystemPart } =
3756
3816
  await buildSystemPromptParts());
3757
- systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
3817
+ systemPrompt = `${staticSystemPart}${memorySystemPart}${dynamicSystemPart}`;
3758
3818
  lastPromptFingerprint = currentFingerprint;
3759
3819
  }
3760
3820
  }