@poncho-ai/harness 0.59.9 → 0.59.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -4
- package/CHANGELOG.md +23 -0
- package/dist/index.js +51 -9
- package/package.json +1 -1
- package/src/harness.ts +76 -16
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @poncho-ai/harness@0.59.
|
|
2
|
+
> @poncho-ai/harness@0.59.11 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
|
|
3
3
|
> node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
|
|
4
4
|
|
|
5
5
|
[embed-docs] Generated poncho-docs.ts with 4 topics
|
|
@@ -8,9 +8,9 @@
|
|
|
8
8
|
[34mCLI[39m tsup v8.5.1
|
|
9
9
|
[34mCLI[39m Target: es2022
|
|
10
10
|
[34mESM[39m Build start
|
|
11
|
-
[32mESM[39m [1mdist/index.js [22m[32m558.75 KB[39m
|
|
12
11
|
[32mESM[39m [1mdist/isolate-F2PPSUL6.js [22m[32m53.82 KB[39m
|
|
13
|
-
[32mESM[39m
|
|
12
|
+
[32mESM[39m [1mdist/index.js [22m[32m560.62 KB[39m
|
|
13
|
+
[32mESM[39m ⚡️ Build success in 239ms
|
|
14
14
|
[34mDTS[39m Build start
|
|
15
|
-
[32mDTS[39m ⚡️ Build success in
|
|
15
|
+
[32mDTS[39m ⚡️ Build success in 7569ms
|
|
16
16
|
[32mDTS[39m [1mdist/index.d.ts [22m[32m102.06 KB[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,28 @@
|
|
|
1
1
|
# @poncho-ai/harness
|
|
2
2
|
|
|
3
|
+
## 0.59.11
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- [`7464ad0`](https://github.com/cesr/poncho-ai/commit/7464ad04448095b34c4b1cbd52da559cf6bd6024) Thanks [@cesr](https://github.com/cesr)! - The user's memory file gets its own 1-hour Anthropic cache breakpoint.
|
|
8
|
+
It previously rode the uncached dynamic system tail (with todos + time),
|
|
9
|
+
which re-wrote the memory block — typically the bulk of a new
|
|
10
|
+
conversation's one-time cache cost — on every cold prefix, despite memory
|
|
11
|
+
only changing on explicit writes. System prompt is now three tiers:
|
|
12
|
+
static (1h), memory (1h), volatile todos+time (uncached).
|
|
13
|
+
|
|
14
|
+
## 0.59.10
|
|
15
|
+
|
|
16
|
+
### Patch Changes
|
|
17
|
+
|
|
18
|
+
- [`fad3918`](https://github.com/cesr/poncho-ai/commit/fad3918302114f76a29080cf28e9c003c61ef0d9) Thanks [@cesr](https://github.com/cesr)! - Stamp `session.id` / `user.id` on EVERY span, not just the invoke_agent
|
|
19
|
+
root. Observability backends resolve a span's identity from its own
|
|
20
|
+
attributes — Latitude's console session/conversation views key on the LLM
|
|
21
|
+
generation spans, so root-only attributes grouped the API-level trace but
|
|
22
|
+
left the console showing one session per turn and no user. The identity now
|
|
23
|
+
rides the OTel Context and an IdentityAttributeSpanProcessor injects it
|
|
24
|
+
into every descendant span (LLM steps, tool executions) at start.
|
|
25
|
+
|
|
3
26
|
## 0.59.9
|
|
4
27
|
|
|
5
28
|
### Patch Changes
|
package/dist/index.js
CHANGED
|
@@ -8745,7 +8745,7 @@ var createSubagentTools = (manager) => [
|
|
|
8745
8745
|
];
|
|
8746
8746
|
|
|
8747
8747
|
// src/harness.ts
|
|
8748
|
-
import { trace, context as otelContext, SpanStatusCode, SpanKind, diag, DiagConsoleLogger, DiagLogLevel } from "@opentelemetry/api";
|
|
8748
|
+
import { trace, context as otelContext, createContextKey, SpanStatusCode, SpanKind, diag, DiagConsoleLogger, DiagLogLevel } from "@opentelemetry/api";
|
|
8749
8749
|
import { NodeTracerProvider, BatchSpanProcessor } from "@opentelemetry/sdk-trace-node";
|
|
8750
8750
|
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
|
8751
8751
|
|
|
@@ -8928,6 +8928,28 @@ var telemetryLog2 = createLogger7("telemetry");
|
|
|
8928
8928
|
var costLog = createLogger7("cost");
|
|
8929
8929
|
var mcpLog2 = createLogger7("mcp");
|
|
8930
8930
|
var modelLog = createLogger7("model");
|
|
8931
|
+
var TELEMETRY_SESSION_ID_KEY = createContextKey("poncho.telemetry.session_id");
|
|
8932
|
+
var TELEMETRY_USER_ID_KEY = createContextKey("poncho.telemetry.user_id");
|
|
8933
|
+
var IdentityAttributeSpanProcessor = class {
|
|
8934
|
+
onStart(span, parentContext) {
|
|
8935
|
+
const sessionId = parentContext.getValue(TELEMETRY_SESSION_ID_KEY);
|
|
8936
|
+
if (typeof sessionId === "string" && sessionId) {
|
|
8937
|
+
span.setAttribute("session.id", sessionId);
|
|
8938
|
+
}
|
|
8939
|
+
const userId = parentContext.getValue(TELEMETRY_USER_ID_KEY);
|
|
8940
|
+
if (typeof userId === "string" && userId) {
|
|
8941
|
+
span.setAttribute("user.id", userId);
|
|
8942
|
+
}
|
|
8943
|
+
}
|
|
8944
|
+
onEnd() {
|
|
8945
|
+
}
|
|
8946
|
+
forceFlush() {
|
|
8947
|
+
return Promise.resolve();
|
|
8948
|
+
}
|
|
8949
|
+
shutdown() {
|
|
8950
|
+
return Promise.resolve();
|
|
8951
|
+
}
|
|
8952
|
+
};
|
|
8931
8953
|
function formatOtlpError(err) {
|
|
8932
8954
|
if (!(err instanceof Error)) return String(err);
|
|
8933
8955
|
const parts = [];
|
|
@@ -10339,7 +10361,9 @@ var AgentHarness = class _AgentHarness {
|
|
|
10339
10361
|
const processor = new BatchSpanProcessor(exporter);
|
|
10340
10362
|
this.otlpSpanProcessor = processor;
|
|
10341
10363
|
const provider2 = new NodeTracerProvider({
|
|
10342
|
-
|
|
10364
|
+
// Identity injector FIRST so every span (root, LLM steps, tool
|
|
10365
|
+
// executions) carries session.id/user.id before batching/export.
|
|
10366
|
+
spanProcessors: [new IdentityAttributeSpanProcessor(), processor]
|
|
10343
10367
|
});
|
|
10344
10368
|
provider2.register();
|
|
10345
10369
|
this.otlpTracerProvider = provider2;
|
|
@@ -10506,7 +10530,13 @@ var AgentHarness = class _AgentHarness {
|
|
|
10506
10530
|
...input.tenantId ? { "tenant.id": input.tenantId } : {}
|
|
10507
10531
|
}
|
|
10508
10532
|
});
|
|
10509
|
-
|
|
10533
|
+
let spanContext = trace.setSpan(otelContext.active(), rootSpan);
|
|
10534
|
+
if (input.conversationId) {
|
|
10535
|
+
spanContext = spanContext.setValue(TELEMETRY_SESSION_ID_KEY, input.conversationId);
|
|
10536
|
+
}
|
|
10537
|
+
if (this.telemetryUserId) {
|
|
10538
|
+
spanContext = spanContext.setValue(TELEMETRY_USER_ID_KEY, this.telemetryUserId);
|
|
10539
|
+
}
|
|
10510
10540
|
try {
|
|
10511
10541
|
const gen = this.run(input);
|
|
10512
10542
|
let next;
|
|
@@ -10690,11 +10720,11 @@ ${skillContextWindow}${browserContext}${fsContext}${isolateContext}` : `${agentP
|
|
|
10690
10720
|
const timeContext = `
|
|
10691
10721
|
|
|
10692
10722
|
Current UTC time (hour precision): ${hourlyTime}`;
|
|
10693
|
-
const dynamicPart = `${
|
|
10694
|
-
return { staticPart, dynamicPart };
|
|
10723
|
+
const dynamicPart = `${todoContext}${timeContext}`;
|
|
10724
|
+
return { staticPart, memoryPart: memoryContext, dynamicPart };
|
|
10695
10725
|
};
|
|
10696
|
-
let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts();
|
|
10697
|
-
let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
|
|
10726
|
+
let { staticPart: staticSystemPart, memoryPart: memorySystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts();
|
|
10727
|
+
let systemPrompt = `${staticSystemPart}${memorySystemPart}${dynamicSystemPart}`;
|
|
10698
10728
|
let lastPromptFingerprint = `${this.agentFileFingerprint}
|
|
10699
10729
|
${this.skillFingerprint}`;
|
|
10700
10730
|
const pushEvent = (event) => {
|
|
@@ -11173,6 +11203,18 @@ ${textContent}` };
|
|
|
11173
11203
|
anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } }
|
|
11174
11204
|
}
|
|
11175
11205
|
},
|
|
11206
|
+
// Memory: per-user, conversation-independent, changes only on
|
|
11207
|
+
// an explicit memory write — its own 1h breakpoint means a
|
|
11208
|
+
// memory edit busts THIS block forward but a normal turn reads
|
|
11209
|
+
// it (plus everything before it) from cache. Breakpoint budget:
|
|
11210
|
+
// Anthropic allows 4; this is #2 of 3 (static, memory, tail).
|
|
11211
|
+
...memorySystemPart.length > 0 ? [{
|
|
11212
|
+
role: "system",
|
|
11213
|
+
content: memorySystemPart,
|
|
11214
|
+
providerOptions: {
|
|
11215
|
+
anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } }
|
|
11216
|
+
}
|
|
11217
|
+
}] : [],
|
|
11176
11218
|
...dynamicSystemPart.length > 0 ? [{ role: "system", content: dynamicSystemPart }] : [],
|
|
11177
11219
|
...cachedMessages
|
|
11178
11220
|
] : cachedMessages;
|
|
@@ -11814,8 +11856,8 @@ ${textContent}` };
|
|
|
11814
11856
|
const currentFingerprint = `${this.agentFileFingerprint}
|
|
11815
11857
|
${this.skillFingerprint}`;
|
|
11816
11858
|
if (currentFingerprint !== lastPromptFingerprint) {
|
|
11817
|
-
({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts());
|
|
11818
|
-
systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
|
|
11859
|
+
({ staticPart: staticSystemPart, memoryPart: memorySystemPart, dynamicPart: dynamicSystemPart } = await buildSystemPromptParts());
|
|
11860
|
+
systemPrompt = `${staticSystemPart}${memorySystemPart}${dynamicSystemPart}`;
|
|
11819
11861
|
lastPromptFingerprint = currentFingerprint;
|
|
11820
11862
|
}
|
|
11821
11863
|
}
|
package/package.json
CHANGED
package/src/harness.ts
CHANGED
|
@@ -67,9 +67,41 @@ import { createSkillTools, normalizeScriptPolicyPath } from "./skill-tools.js";
|
|
|
67
67
|
import { createSearchTools } from "./search-tools.js";
|
|
68
68
|
import { createSubagentTools } from "./subagent-tools.js";
|
|
69
69
|
import type { SubagentManager } from "./subagent-manager.js";
|
|
70
|
-
import { trace, context as otelContext, SpanStatusCode, SpanKind, diag, DiagConsoleLogger, DiagLogLevel } from "@opentelemetry/api";
|
|
70
|
+
import { trace, context as otelContext, createContextKey, type Context as OtelContextType, SpanStatusCode, SpanKind, diag, DiagConsoleLogger, DiagLogLevel } from "@opentelemetry/api";
|
|
71
|
+
import type { Span as OtelSdkSpan, SpanProcessor } from "@opentelemetry/sdk-trace-node";
|
|
71
72
|
import { NodeTracerProvider, BatchSpanProcessor } from "@opentelemetry/sdk-trace-node";
|
|
72
73
|
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
|
74
|
+
|
|
75
|
+
// ── Telemetry identity propagation ──────────────────────────────────────────
|
|
76
|
+
// Observability backends (Latitude) resolve a span's session/user from the
|
|
77
|
+
// span's OWN attributes — the console's session & conversation views key on
|
|
78
|
+
// the LLM generation spans, not the root span. So stamping `session.id` /
|
|
79
|
+
// `user.id` only on the invoke_agent root groups the API-level trace but
|
|
80
|
+
// leaves the console treating every turn as its own session. The fix is the
|
|
81
|
+
// same one vendor SDKs use: carry the identity in the OTel Context and have
|
|
82
|
+
// a SpanProcessor stamp it onto EVERY span at start.
|
|
83
|
+
const TELEMETRY_SESSION_ID_KEY = createContextKey("poncho.telemetry.session_id");
|
|
84
|
+
const TELEMETRY_USER_ID_KEY = createContextKey("poncho.telemetry.user_id");
|
|
85
|
+
|
|
86
|
+
class IdentityAttributeSpanProcessor implements SpanProcessor {
|
|
87
|
+
onStart(span: OtelSdkSpan, parentContext: OtelContextType): void {
|
|
88
|
+
const sessionId = parentContext.getValue(TELEMETRY_SESSION_ID_KEY);
|
|
89
|
+
if (typeof sessionId === "string" && sessionId) {
|
|
90
|
+
span.setAttribute("session.id", sessionId);
|
|
91
|
+
}
|
|
92
|
+
const userId = parentContext.getValue(TELEMETRY_USER_ID_KEY);
|
|
93
|
+
if (typeof userId === "string" && userId) {
|
|
94
|
+
span.setAttribute("user.id", userId);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
onEnd(): void {}
|
|
98
|
+
forceFlush(): Promise<void> {
|
|
99
|
+
return Promise.resolve();
|
|
100
|
+
}
|
|
101
|
+
shutdown(): Promise<void> {
|
|
102
|
+
return Promise.resolve();
|
|
103
|
+
}
|
|
104
|
+
}
|
|
73
105
|
import { normalizeOtlp } from "./telemetry.js";
|
|
74
106
|
|
|
75
107
|
/** Extract useful details from OTLPExporterError (has .code + .data) or plain Error. */
|
|
@@ -1883,7 +1915,9 @@ export class AgentHarness {
|
|
|
1883
1915
|
const processor = new BatchSpanProcessor(exporter);
|
|
1884
1916
|
this.otlpSpanProcessor = processor;
|
|
1885
1917
|
const provider = new NodeTracerProvider({
|
|
1886
|
-
|
|
1918
|
+
// Identity injector FIRST so every span (root, LLM steps, tool
|
|
1919
|
+
// executions) carries session.id/user.id before batching/export.
|
|
1920
|
+
spanProcessors: [new IdentityAttributeSpanProcessor(), processor],
|
|
1887
1921
|
});
|
|
1888
1922
|
provider.register();
|
|
1889
1923
|
this.otlpTracerProvider = provider;
|
|
@@ -2074,7 +2108,15 @@ export class AgentHarness {
|
|
|
2074
2108
|
},
|
|
2075
2109
|
});
|
|
2076
2110
|
|
|
2077
|
-
|
|
2111
|
+
let spanContext = trace.setSpan(otelContext.active(), rootSpan);
|
|
2112
|
+
// Identity rides the context so IdentityAttributeSpanProcessor stamps
|
|
2113
|
+
// session.id/user.id on every descendant span (see processor docs).
|
|
2114
|
+
if (input.conversationId) {
|
|
2115
|
+
spanContext = spanContext.setValue(TELEMETRY_SESSION_ID_KEY, input.conversationId);
|
|
2116
|
+
}
|
|
2117
|
+
if (this.telemetryUserId) {
|
|
2118
|
+
spanContext = spanContext.setValue(TELEMETRY_USER_ID_KEY, this.telemetryUserId);
|
|
2119
|
+
}
|
|
2078
2120
|
|
|
2079
2121
|
try {
|
|
2080
2122
|
const gen = this.run(input);
|
|
@@ -2289,13 +2331,17 @@ ${typeStubs}
|
|
|
2289
2331
|
Code is wrapped in an async IIFE — use \`return\` to return a value to the tool result.`;
|
|
2290
2332
|
}
|
|
2291
2333
|
|
|
2292
|
-
// Split the system prompt into
|
|
2293
|
-
//
|
|
2294
|
-
//
|
|
2295
|
-
//
|
|
2296
|
-
//
|
|
2297
|
-
//
|
|
2298
|
-
|
|
2334
|
+
// Split the system prompt into THREE cacheability tiers (see the
|
|
2335
|
+
// streamText site for the breakpoint wiring):
|
|
2336
|
+
// 1. staticPart — agent body + skills + runtime context. Stable across
|
|
2337
|
+
// turns, conversations, and jobs within an hour. 1h breakpoint.
|
|
2338
|
+
// 2. memoryPart — the user's memory file. Per-user, shared by every
|
|
2339
|
+
// conversation, and only changes on an explicit memory write — so
|
|
2340
|
+
// it gets its own 1h breakpoint instead of riding the volatile
|
|
2341
|
+
// tail (where it busted the message-history cache for no reason).
|
|
2342
|
+
// 3. dynamicPart — todos + hour-quantized time. Genuinely volatile
|
|
2343
|
+
// within a conversation; uncached, kept as small as possible.
|
|
2344
|
+
const buildSystemPromptParts = async (): Promise<{ staticPart: string; memoryPart: string; dynamicPart: string }> => {
|
|
2299
2345
|
const agentPrompt = renderCurrentAgentPrompt();
|
|
2300
2346
|
const tenantSkills = await this.getSkillsForTenant(input.tenantId);
|
|
2301
2347
|
const skillContextWindow = buildSkillContextWindow(tenantSkills);
|
|
@@ -2316,13 +2362,13 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
2316
2362
|
return `${weekday} ${d.toISOString().slice(0, 13)}Z`;
|
|
2317
2363
|
})();
|
|
2318
2364
|
const timeContext = `\n\nCurrent UTC time (hour precision): ${hourlyTime}`;
|
|
2319
|
-
const dynamicPart = `${
|
|
2320
|
-
return { staticPart, dynamicPart };
|
|
2365
|
+
const dynamicPart = `${todoContext}${timeContext}`;
|
|
2366
|
+
return { staticPart, memoryPart: memoryContext, dynamicPart };
|
|
2321
2367
|
};
|
|
2322
|
-
let { staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
|
|
2368
|
+
let { staticPart: staticSystemPart, memoryPart: memorySystemPart, dynamicPart: dynamicSystemPart } =
|
|
2323
2369
|
await buildSystemPromptParts();
|
|
2324
2370
|
// Concatenated form for legacy consumers (token estimation, telemetry).
|
|
2325
|
-
let systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
|
|
2371
|
+
let systemPrompt = `${staticSystemPart}${memorySystemPart}${dynamicSystemPart}`;
|
|
2326
2372
|
let lastPromptFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
|
|
2327
2373
|
|
|
2328
2374
|
const pushEvent = (event: AgentEvent): AgentEvent => {
|
|
@@ -2966,6 +3012,20 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
2966
3012
|
anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } },
|
|
2967
3013
|
},
|
|
2968
3014
|
},
|
|
3015
|
+
// Memory: per-user, conversation-independent, changes only on
|
|
3016
|
+
// an explicit memory write — its own 1h breakpoint means a
|
|
3017
|
+
// memory edit busts THIS block forward but a normal turn reads
|
|
3018
|
+
// it (plus everything before it) from cache. Breakpoint budget:
|
|
3019
|
+
// Anthropic allows 4; this is #2 of 3 (static, memory, tail).
|
|
3020
|
+
...(memorySystemPart.length > 0
|
|
3021
|
+
? [{
|
|
3022
|
+
role: "system" as const,
|
|
3023
|
+
content: memorySystemPart,
|
|
3024
|
+
providerOptions: {
|
|
3025
|
+
anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } },
|
|
3026
|
+
},
|
|
3027
|
+
}]
|
|
3028
|
+
: []),
|
|
2969
3029
|
...(dynamicSystemPart.length > 0
|
|
2970
3030
|
? [{ role: "system" as const, content: dynamicSystemPart }]
|
|
2971
3031
|
: []),
|
|
@@ -3752,9 +3812,9 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
3752
3812
|
agent = this.parsedAgent as ParsedAgent;
|
|
3753
3813
|
const currentFingerprint = `${this.agentFileFingerprint}\n${this.skillFingerprint}`;
|
|
3754
3814
|
if (currentFingerprint !== lastPromptFingerprint) {
|
|
3755
|
-
({ staticPart: staticSystemPart, dynamicPart: dynamicSystemPart } =
|
|
3815
|
+
({ staticPart: staticSystemPart, memoryPart: memorySystemPart, dynamicPart: dynamicSystemPart } =
|
|
3756
3816
|
await buildSystemPromptParts());
|
|
3757
|
-
systemPrompt = `${staticSystemPart}${dynamicSystemPart}`;
|
|
3817
|
+
systemPrompt = `${staticSystemPart}${memorySystemPart}${dynamicSystemPart}`;
|
|
3758
3818
|
lastPromptFingerprint = currentFingerprint;
|
|
3759
3819
|
}
|
|
3760
3820
|
}
|