@agent-native/core 0.52.0 → 0.53.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -95
- package/blueprints/action/crud.md +98 -0
- package/blueprints/channel/discord.md +74 -0
- package/blueprints/provider/stripe.md +87 -0
- package/blueprints/sandbox/docker.md +78 -0
- package/dist/action.d.ts +24 -0
- package/dist/action.d.ts.map +1 -1
- package/dist/action.js +4 -0
- package/dist/action.js.map +1 -1
- package/dist/agent/observational-memory/compactor.d.ts +43 -0
- package/dist/agent/observational-memory/compactor.d.ts.map +1 -0
- package/dist/agent/observational-memory/compactor.js +50 -0
- package/dist/agent/observational-memory/compactor.js.map +1 -0
- package/dist/agent/observational-memory/config.d.ts +37 -0
- package/dist/agent/observational-memory/config.d.ts.map +1 -0
- package/dist/agent/observational-memory/config.js +48 -0
- package/dist/agent/observational-memory/config.js.map +1 -0
- package/dist/agent/observational-memory/index.d.ts +26 -0
- package/dist/agent/observational-memory/index.d.ts.map +1 -0
- package/dist/agent/observational-memory/index.js +25 -0
- package/dist/agent/observational-memory/index.js.map +1 -0
- package/dist/agent/observational-memory/internal-run.d.ts +37 -0
- package/dist/agent/observational-memory/internal-run.d.ts.map +1 -0
- package/dist/agent/observational-memory/internal-run.js +59 -0
- package/dist/agent/observational-memory/internal-run.js.map +1 -0
- package/dist/agent/observational-memory/message-text.d.ts +13 -0
- package/dist/agent/observational-memory/message-text.d.ts.map +1 -0
- package/dist/agent/observational-memory/message-text.js +46 -0
- package/dist/agent/observational-memory/message-text.js.map +1 -0
- package/dist/agent/observational-memory/migrations.d.ts +13 -0
- package/dist/agent/observational-memory/migrations.d.ts.map +1 -0
- package/dist/agent/observational-memory/migrations.js +43 -0
- package/dist/agent/observational-memory/migrations.js.map +1 -0
- package/dist/agent/observational-memory/observer.d.ts +37 -0
- package/dist/agent/observational-memory/observer.d.ts.map +1 -0
- package/dist/agent/observational-memory/observer.js +82 -0
- package/dist/agent/observational-memory/observer.js.map +1 -0
- package/dist/agent/observational-memory/plugin.d.ts +16 -0
- package/dist/agent/observational-memory/plugin.d.ts.map +1 -0
- package/dist/agent/observational-memory/plugin.js +26 -0
- package/dist/agent/observational-memory/plugin.js.map +1 -0
- package/dist/agent/observational-memory/prompts.d.ts +27 -0
- package/dist/agent/observational-memory/prompts.d.ts.map +1 -0
- package/dist/agent/observational-memory/prompts.js +42 -0
- package/dist/agent/observational-memory/prompts.js.map +1 -0
- package/dist/agent/observational-memory/read.d.ts +47 -0
- package/dist/agent/observational-memory/read.d.ts.map +1 -0
- package/dist/agent/observational-memory/read.js +99 -0
- package/dist/agent/observational-memory/read.js.map +1 -0
- package/dist/agent/observational-memory/reflector.d.ts +31 -0
- package/dist/agent/observational-memory/reflector.d.ts.map +1 -0
- package/dist/agent/observational-memory/reflector.js +76 -0
- package/dist/agent/observational-memory/reflector.js.map +1 -0
- package/dist/agent/observational-memory/schema.d.ts +267 -0
- package/dist/agent/observational-memory/schema.d.ts.map +1 -0
- package/dist/agent/observational-memory/schema.js +48 -0
- package/dist/agent/observational-memory/schema.js.map +1 -0
- package/dist/agent/observational-memory/store.d.ts +52 -0
- package/dist/agent/observational-memory/store.d.ts.map +1 -0
- package/dist/agent/observational-memory/store.js +197 -0
- package/dist/agent/observational-memory/store.js.map +1 -0
- package/dist/agent/observational-memory/types.d.ts +61 -0
- package/dist/agent/observational-memory/types.d.ts.map +1 -0
- package/dist/agent/observational-memory/types.js +9 -0
- package/dist/agent/observational-memory/types.js.map +1 -0
- package/dist/agent/production-agent.d.ts +15 -0
- package/dist/agent/production-agent.d.ts.map +1 -1
- package/dist/agent/production-agent.js +240 -1
- package/dist/agent/production-agent.js.map +1 -1
- package/dist/agent/run-loop-with-resume.d.ts.map +1 -1
- package/dist/agent/run-loop-with-resume.js +49 -0
- package/dist/agent/run-loop-with-resume.js.map +1 -1
- package/dist/agent/run-store.d.ts +17 -0
- package/dist/agent/run-store.d.ts.map +1 -1
- package/dist/agent/run-store.js +55 -0
- package/dist/agent/run-store.js.map +1 -1
- package/dist/agent/runtime-context.d.ts +30 -0
- package/dist/agent/runtime-context.d.ts.map +1 -1
- package/dist/agent/runtime-context.js +54 -1
- package/dist/agent/runtime-context.js.map +1 -1
- package/dist/agent/tool-call-journal.d.ts +101 -0
- package/dist/agent/tool-call-journal.d.ts.map +1 -0
- package/dist/agent/tool-call-journal.js +214 -0
- package/dist/agent/tool-call-journal.js.map +1 -0
- package/dist/agent/types.d.ts +24 -0
- package/dist/agent/types.d.ts.map +1 -1
- package/dist/agent/types.js.map +1 -1
- package/dist/cli/add.d.ts +109 -0
- package/dist/cli/add.d.ts.map +1 -0
- package/dist/cli/add.js +352 -0
- package/dist/cli/add.js.map +1 -0
- package/dist/cli/connect.d.ts +2 -2
- package/dist/cli/connect.d.ts.map +1 -1
- package/dist/cli/connect.js +92 -24
- package/dist/cli/connect.js.map +1 -1
- package/dist/cli/eval.d.ts +17 -0
- package/dist/cli/eval.d.ts.map +1 -0
- package/dist/cli/eval.js +121 -0
- package/dist/cli/eval.js.map +1 -0
- package/dist/cli/index.js +44 -3
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/mcp.d.ts.map +1 -1
- package/dist/cli/mcp.js +11 -5
- package/dist/cli/mcp.js.map +1 -1
- package/dist/cli/plan-local.d.ts +66 -5
- package/dist/cli/plan-local.d.ts.map +1 -1
- package/dist/cli/plan-local.js +495 -19
- package/dist/cli/plan-local.js.map +1 -1
- package/dist/cli/skills.d.ts +2 -2
- package/dist/cli/skills.d.ts.map +1 -1
- package/dist/cli/skills.js +70 -59
- package/dist/cli/skills.js.map +1 -1
- package/dist/client/AssistantChat.d.ts.map +1 -1
- package/dist/client/AssistantChat.js +118 -92
- package/dist/client/AssistantChat.js.map +1 -1
- package/dist/client/agent-chat-adapter.d.ts.map +1 -1
- package/dist/client/agent-chat-adapter.js +16 -0
- package/dist/client/agent-chat-adapter.js.map +1 -1
- package/dist/client/chat/tool-call-display.d.ts +20 -1
- package/dist/client/chat/tool-call-display.d.ts.map +1 -1
- package/dist/client/chat/tool-call-display.js +32 -7
- package/dist/client/chat/tool-call-display.js.map +1 -1
- package/dist/client/sse-event-processor.d.ts +13 -0
- package/dist/client/sse-event-processor.d.ts.map +1 -1
- package/dist/client/sse-event-processor.js +21 -0
- package/dist/client/sse-event-processor.js.map +1 -1
- package/dist/db/client.d.ts +4 -2
- package/dist/db/client.d.ts.map +1 -1
- package/dist/db/client.js +6 -4
- package/dist/db/client.js.map +1 -1
- package/dist/deploy/route-discovery.d.ts.map +1 -1
- package/dist/deploy/route-discovery.js +1 -0
- package/dist/deploy/route-discovery.js.map +1 -1
- package/dist/eval/agent-runner.d.ts +63 -0
- package/dist/eval/agent-runner.d.ts.map +1 -0
- package/dist/eval/agent-runner.js +142 -0
- package/dist/eval/agent-runner.js.map +1 -0
- package/dist/eval/define-eval.d.ts +29 -0
- package/dist/eval/define-eval.d.ts.map +1 -0
- package/dist/eval/define-eval.js +43 -0
- package/dist/eval/define-eval.js.map +1 -0
- package/dist/eval/index.d.ts +18 -0
- package/dist/eval/index.d.ts.map +1 -0
- package/dist/eval/index.js +17 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/report.d.ts +8 -0
- package/dist/eval/report.d.ts.map +1 -0
- package/dist/eval/report.js +44 -0
- package/dist/eval/report.js.map +1 -0
- package/dist/eval/runner.d.ts +67 -0
- package/dist/eval/runner.d.ts.map +1 -0
- package/dist/eval/runner.js +256 -0
- package/dist/eval/runner.js.map +1 -0
- package/dist/eval/scorer.d.ts +83 -0
- package/dist/eval/scorer.d.ts.map +1 -0
- package/dist/eval/scorer.js +195 -0
- package/dist/eval/scorer.js.map +1 -0
- package/dist/eval/types.d.ts +162 -0
- package/dist/eval/types.d.ts.map +1 -0
- package/dist/eval/types.js +20 -0
- package/dist/eval/types.js.map +1 -0
- package/dist/observability/traces.d.ts.map +1 -1
- package/dist/observability/traces.js +100 -1
- package/dist/observability/traces.js.map +1 -1
- package/dist/observability/tracing.d.ts +73 -0
- package/dist/observability/tracing.d.ts.map +1 -0
- package/dist/observability/tracing.js +126 -0
- package/dist/observability/tracing.js.map +1 -0
- package/dist/onboarding/default-steps.d.ts.map +1 -1
- package/dist/onboarding/default-steps.js +4 -1
- package/dist/onboarding/default-steps.js.map +1 -1
- package/dist/provider-api/actions/query-staged-dataset.d.ts +1 -1
- package/dist/scripts/agent-engines/list-agent-engines.d.ts.map +1 -1
- package/dist/scripts/agent-engines/list-agent-engines.js +10 -3
- package/dist/scripts/agent-engines/list-agent-engines.js.map +1 -1
- package/dist/server/action-discovery.d.ts.map +1 -1
- package/dist/server/action-discovery.js +4 -0
- package/dist/server/action-discovery.js.map +1 -1
- package/dist/server/agent-chat-plugin.d.ts +9 -0
- package/dist/server/agent-chat-plugin.d.ts.map +1 -1
- package/dist/server/agent-chat-plugin.js +118 -110
- package/dist/server/agent-chat-plugin.js.map +1 -1
- package/dist/server/agent-teams.d.ts +62 -0
- package/dist/server/agent-teams.d.ts.map +1 -1
- package/dist/server/agent-teams.js +99 -2
- package/dist/server/agent-teams.js.map +1 -1
- package/dist/server/core-routes-plugin.d.ts.map +1 -1
- package/dist/server/core-routes-plugin.js +7 -4
- package/dist/server/core-routes-plugin.js.map +1 -1
- package/dist/server/credential-provider.d.ts.map +1 -1
- package/dist/server/credential-provider.js +2 -0
- package/dist/server/credential-provider.js.map +1 -1
- package/dist/server/framework-request-handler.d.ts.map +1 -1
- package/dist/server/framework-request-handler.js +33 -1
- package/dist/server/framework-request-handler.js.map +1 -1
- package/dist/server/index.d.ts +1 -0
- package/dist/server/index.d.ts.map +1 -1
- package/dist/server/index.js +1 -0
- package/dist/server/index.js.map +1 -1
- package/dist/templates/workspace-core/.agents/skills/external-agents/SKILL.md +10 -0
- package/dist/templates/workspace-core/.agents/skills/harness-agents/SKILL.md +20 -0
- package/dist/templates/workspace-core/.agents/skills/observability/SKILL.md +20 -0
- package/docs/content/agent-teams.md +32 -0
- package/docs/content/blueprint-installer.md +73 -0
- package/docs/content/evals.md +141 -0
- package/docs/content/pr-visual-recap.md +7 -4
- package/docs/content/sandbox-adapters.md +134 -0
- package/docs/content/template-plan.md +20 -8
- package/package.json +5 -1
- package/src/templates/workspace-core/.agents/skills/external-agents/SKILL.md +10 -0
- package/src/templates/workspace-core/.agents/skills/harness-agents/SKILL.md +20 -0
- package/src/templates/workspace-core/.agents/skills/observability/SKILL.md +20 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for the first-class evals primitive.
|
|
3
|
+
*
|
|
4
|
+
* This is a *test-case* eval system (define a prompt + expected behavior,
|
|
5
|
+
* actually run the agent, score the output) — distinct from the post-hoc
|
|
6
|
+
* run-scoring engine in `../observability/evals.ts`, which scores already-
|
|
7
|
+
* completed production runs. The two are complementary:
|
|
8
|
+
*
|
|
9
|
+
* - `observability/evals.ts` — "how did this real run do?" (passive,
|
|
10
|
+
* sampled, lives next to traces).
|
|
11
|
+
* - `eval/*` (this module) — "does the agent do the right thing on this
|
|
12
|
+
* fixed input?" (active, deterministic CI gate, run via the CLI).
|
|
13
|
+
*
|
|
14
|
+
* The pipeline shape (preprocess → analyze → generateScore → generateReason)
|
|
15
|
+
* is borrowed from Mastra's scorer design: each scorer is a small, composable
|
|
16
|
+
* 4-step pipeline so a single scorer can mix plain-JS checks with an LLM
|
|
17
|
+
* judge while still producing one normalized 0..1 number plus a reason.
|
|
18
|
+
*/
|
|
19
|
+
import type { AgentEngine } from "../agent/engine/types.js";
|
|
20
|
+
/**
|
|
21
|
+
* The result of actually running the agent loop for one eval input. Scorers
|
|
22
|
+
* receive this as the thing under test. It is intentionally small and
|
|
23
|
+
* transport-agnostic so a scorer never reaches into framework internals.
|
|
24
|
+
*/
|
|
25
|
+
export interface AgentRunOutput {
|
|
26
|
+
/** Concatenated assistant text emitted across the run. */
|
|
27
|
+
readonly text: string;
|
|
28
|
+
/** Names of tools/actions the agent invoked, in call order. */
|
|
29
|
+
readonly toolCalls: readonly string[];
|
|
30
|
+
/** Whether the run completed without a terminal error event. */
|
|
31
|
+
readonly ok: boolean;
|
|
32
|
+
/** Terminal error message, if the run errored. */
|
|
33
|
+
readonly error?: string;
|
|
34
|
+
/** Synthetic run id, useful for writing eval rows to the observability store. */
|
|
35
|
+
readonly runId: string;
|
|
36
|
+
/** Wall-clock duration of the run in milliseconds. */
|
|
37
|
+
readonly durationMs: number;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Context handed to a scorer's analyze step when it needs an LLM judge. The
|
|
41
|
+
* engine/model are resolved by the runner from the existing engine registry —
|
|
42
|
+
* a scorer NEVER hardcodes a provider or model, keeping evals provider-
|
|
43
|
+
* agnostic. `judge()` is a convenience that streams a single judging turn and
|
|
44
|
+
* returns the raw model text.
|
|
45
|
+
*/
|
|
46
|
+
export interface ScorerAnalyzeContext {
|
|
47
|
+
/** The resolved, provider-agnostic engine for LLM-judge scorers. */
|
|
48
|
+
readonly engine: AgentEngine;
|
|
49
|
+
/** The resolved model string for the engine. */
|
|
50
|
+
readonly model: string;
|
|
51
|
+
/**
|
|
52
|
+
* Run a single LLM judging turn. Returns the model's raw text output. Used
|
|
53
|
+
* by `llmJudge` and any custom LLM-backed analyze step. Provider-agnostic —
|
|
54
|
+
* the engine is whatever the app/CLI resolved.
|
|
55
|
+
*/
|
|
56
|
+
judge(opts: {
|
|
57
|
+
systemPrompt?: string;
|
|
58
|
+
prompt: string;
|
|
59
|
+
maxOutputTokens?: number;
|
|
60
|
+
signal?: AbortSignal;
|
|
61
|
+
}): Promise<string>;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* A 4-step scoring pipeline (Mastra-style):
|
|
65
|
+
*
|
|
66
|
+
* preprocess(run) → x (transform the run/output; optional)
|
|
67
|
+
* analyze(x, ctx) → analysis (plain JS OR an LLM judge; optional)
|
|
68
|
+
* generateScore(a) → 0..1 (REQUIRED, normalized)
|
|
69
|
+
* generateReason(...) → string (human-readable why; optional)
|
|
70
|
+
*
|
|
71
|
+
* Generics flow `Pre` (preprocess output) → `Ana` (analyze output) so a
|
|
72
|
+
* single scorer is fully typed end-to-end.
|
|
73
|
+
*/
|
|
74
|
+
export interface Scorer<Pre = AgentRunOutput, Ana = Pre> {
|
|
75
|
+
readonly name: string;
|
|
76
|
+
preprocess?(run: AgentRunOutput): Pre | Promise<Pre>;
|
|
77
|
+
analyze?(input: Pre, ctx: ScorerAnalyzeContext): Ana | Promise<Ana>;
|
|
78
|
+
/** REQUIRED. Returns a normalized score in [0, 1]. */
|
|
79
|
+
generateScore(analysis: Ana): number | Promise<number>;
|
|
80
|
+
generateReason?(args: {
|
|
81
|
+
run: AgentRunOutput;
|
|
82
|
+
analysis: Ana;
|
|
83
|
+
score: number;
|
|
84
|
+
}): string | Promise<string>;
|
|
85
|
+
}
|
|
86
|
+
/** Definition object passed to `createScorer`. */
|
|
87
|
+
export interface ScorerDefinition<Pre = AgentRunOutput, Ana = Pre> {
|
|
88
|
+
name: string;
|
|
89
|
+
preprocess?(run: AgentRunOutput): Pre | Promise<Pre>;
|
|
90
|
+
analyze?(input: Pre, ctx: ScorerAnalyzeContext): Ana | Promise<Ana>;
|
|
91
|
+
generateScore(analysis: Ana): number | Promise<number>;
|
|
92
|
+
generateReason?(args: {
|
|
93
|
+
run: AgentRunOutput;
|
|
94
|
+
analysis: Ana;
|
|
95
|
+
score: number;
|
|
96
|
+
}): string | Promise<string>;
|
|
97
|
+
}
|
|
98
|
+
/** The prompt + optional setup that drives one eval case. */
|
|
99
|
+
export interface EvalInput {
|
|
100
|
+
/** The user prompt / message sent to the agent. */
|
|
101
|
+
prompt: string;
|
|
102
|
+
/**
|
|
103
|
+
* Optional prior conversation turns to seed before `prompt`. Each is a
|
|
104
|
+
* plain { role, text } pair; the runner converts them to engine messages.
|
|
105
|
+
*/
|
|
106
|
+
history?: Array<{
|
|
107
|
+
role: "user" | "assistant";
|
|
108
|
+
text: string;
|
|
109
|
+
}>;
|
|
110
|
+
}
|
|
111
|
+
/** Context passed to an eval's optional `run` override. */
|
|
112
|
+
export interface EvalRunContext {
|
|
113
|
+
readonly input: EvalInput;
|
|
114
|
+
/** The default agent runner — invoke it to run the agent loop as a caller. */
|
|
115
|
+
runAgent(input: EvalInput): Promise<AgentRunOutput>;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* A named eval = one test case. `scorers` produce per-scorer rows; the case
|
|
119
|
+
* passes when EVERY scorer meets `threshold` (default 0.5, overridable per
|
|
120
|
+
* eval and globally from the CLI).
|
|
121
|
+
*/
|
|
122
|
+
export interface Eval {
|
|
123
|
+
name: string;
|
|
124
|
+
input: EvalInput;
|
|
125
|
+
/**
|
|
126
|
+
* Optional override for how the agent is run for this case. Defaults to the
|
|
127
|
+
* runner's headless `runAgent`. Use this to do custom setup (seed data,
|
|
128
|
+
* multi-turn) before/after the agent call.
|
|
129
|
+
*/
|
|
130
|
+
run?(ctx: EvalRunContext): AgentRunOutput | Promise<AgentRunOutput>;
|
|
131
|
+
scorers: Scorer<any, any>[];
|
|
132
|
+
/** Minimum acceptable score (per scorer) in [0, 1]. Default 0.5. */
|
|
133
|
+
threshold?: number;
|
|
134
|
+
}
|
|
135
|
+
/** One result row per (eval × scorer). Stores both the number AND the reason. */
|
|
136
|
+
export interface ScorerResult {
|
|
137
|
+
scorer: string;
|
|
138
|
+
score: number;
|
|
139
|
+
reason?: string;
|
|
140
|
+
passed: boolean;
|
|
141
|
+
}
|
|
142
|
+
/** Aggregated result for a single eval (all of its scorers). */
|
|
143
|
+
export interface EvalResultRow {
|
|
144
|
+
eval: string;
|
|
145
|
+
threshold: number;
|
|
146
|
+
scores: ScorerResult[];
|
|
147
|
+
/** True only when every scorer passed. */
|
|
148
|
+
passed: boolean;
|
|
149
|
+
/** Mean of the scorer scores, for at-a-glance ranking. */
|
|
150
|
+
avgScore: number;
|
|
151
|
+
durationMs: number;
|
|
152
|
+
/** Terminal error if the agent run itself failed. */
|
|
153
|
+
error?: string;
|
|
154
|
+
}
|
|
155
|
+
/** The full runner report. */
|
|
156
|
+
export interface EvalRunReport {
|
|
157
|
+
total: number;
|
|
158
|
+
passed: number;
|
|
159
|
+
failed: number;
|
|
160
|
+
results: EvalResultRow[];
|
|
161
|
+
}
|
|
162
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/eval/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAI5D;;;;GAIG;AACH,MAAM,WAAW,cAAc;IAC7B,0DAA0D;IAC1D,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,+DAA+D;IAC/D,QAAQ,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IACtC,gEAAgE;IAChE,QAAQ,CAAC,EAAE,EAAE,OAAO,CAAC;IACrB,kDAAkD;IAClD,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,iFAAiF;IACjF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,sDAAsD;IACtD,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAID;;;;;;GAMG;AACH,MAAM,WAAW,oBAAoB;IACnC,oEAAoE;IACpE,QAAQ,CAAC,MAAM,EAAE,WAAW,CAAC;IAC7B,gDAAgD;IAChD,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB;;;;OAIG;IACH,KAAK,CAAC,IAAI,EAAE;QACV,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,MAAM,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,MAAM,CAAC,EAAE,WAAW,CAAC;KACtB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACrB;AAED;;;;;;;;;;GAUG;AACH,MAAM,WAAW,MAAM,CAAC,GAAG,GAAG,cAAc,EAAE,GAAG,GAAG,GAAG;IACrD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,UAAU,CAAC,CAAC,GAAG,EAAE,cAAc,GAAG,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;IACrD,OAAO,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,oBAAoB,GAAG,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;IACpE,sDAAsD;IACtD,aAAa,CAAC,QAAQ,EAAE,GAAG,GAAG,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IACvD,cAAc,CAAC,CAAC,IAAI,EAAE;QACpB,GAAG,EAAE,cAAc,CAAC;QACpB,QAAQ,EAAE,GAAG,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACf,GAAG,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CAC9B;AAED,kDAAkD;AAClD,MAAM,WAAW,gBAAgB,CAAC,GAAG,GAAG,cAAc,EAAE,GAAG,GAAG,GAAG;IAC/D,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,CAAC,GAAG,EAAE,cAAc,GAAG,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;IACrD,OAAO,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,oBAAoB,GAAG,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;IACpE,aAAa,CAAC,QAAQ,EAAE,GAAG,GAAG,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IACvD,cAAc,CAAC,CAAC,IAAI,EAAE;QACpB,GAAG,EAAE,cAAc,CAAC;QACpB,QAAQ,EAAE,GAAG,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACf,GAAG,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CAC9B;AAID,6DAA6D;AAC7D,MAAM,WAAW,SAAS;IACxB,mDAAmD;IACnD,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,OAAO,CAAC,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAC/D;AAED,2DAA2D;AAC3D,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,KAAK,EAAE,SAAS,CAAC;IAC1B,8EAA8E;IAC9E,QAAQ,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;CACrD;AAED;;;;GAIG;AACH,MAAM,WAAW,IAAI;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,SAAS,CAAC;IACjB;;;;OAIG;IACH,GAAG,CAAC,CAAC,GAAG,EAAE,cAAc,GAAG,cAAc,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;IACpE,OAAO,EAAE,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;IAC5B,oEAAoE;IACpE,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAID,iFAAiF;AACjF,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,OAAO,CAAC;CACjB;AAED,gEAAgE;AAChE,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,YAAY,EAAE,CAAC;IACvB,0CAA0C;IAC1C,MAAM,EAAE,OAAO,CAAC;IAChB,0DAA0D;IAC1D,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,8BAA8B;AAC9B,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,aAAa,EAAE,CAAC;CAC1B"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for the first-class evals primitive.
|
|
3
|
+
*
|
|
4
|
+
* This is a *test-case* eval system (define a prompt + expected behavior,
|
|
5
|
+
* actually run the agent, score the output) — distinct from the post-hoc
|
|
6
|
+
* run-scoring engine in `../observability/evals.ts`, which scores already-
|
|
7
|
+
* completed production runs. The two are complementary:
|
|
8
|
+
*
|
|
9
|
+
* - `observability/evals.ts` — "how did this real run do?" (passive,
|
|
10
|
+
* sampled, lives next to traces).
|
|
11
|
+
* - `eval/*` (this module) — "does the agent do the right thing on this
|
|
12
|
+
* fixed input?" (active, deterministic CI gate, run via the CLI).
|
|
13
|
+
*
|
|
14
|
+
* The pipeline shape (preprocess → analyze → generateScore → generateReason)
|
|
15
|
+
* is borrowed from Mastra's scorer design: each scorer is a small, composable
|
|
16
|
+
* 4-step pipeline so a single scorer can mix plain-JS checks with an LLM
|
|
17
|
+
* judge while still producing one normalized 0..1 number plus a reason.
|
|
18
|
+
*/
|
|
19
|
+
export {};
|
|
20
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/eval/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG","sourcesContent":["/**\n * Types for the first-class evals primitive.\n *\n * This is a *test-case* eval system (define a prompt + expected behavior,\n * actually run the agent, score the output) — distinct from the post-hoc\n * run-scoring engine in `../observability/evals.ts`, which scores already-\n * completed production runs. The two are complementary:\n *\n * - `observability/evals.ts` — \"how did this real run do?\" (passive,\n * sampled, lives next to traces).\n * - `eval/*` (this module) — \"does the agent do the right thing on this\n * fixed input?\" (active, deterministic CI gate, run via the CLI).\n *\n * The pipeline shape (preprocess → analyze → generateScore → generateReason)\n * is borrowed from Mastra's scorer design: each scorer is a small, composable\n * 4-step pipeline so a single scorer can mix plain-JS checks with an LLM\n * judge while still producing one normalized 0..1 number plus a reason.\n */\n\nimport type { AgentEngine } from \"../agent/engine/types.js\";\n\n// ─── Agent run output ─────────────────────────────────────────────────\n\n/**\n * The result of actually running the agent loop for one eval input. Scorers\n * receive this as the thing under test. It is intentionally small and\n * transport-agnostic so a scorer never reaches into framework internals.\n */\nexport interface AgentRunOutput {\n /** Concatenated assistant text emitted across the run. */\n readonly text: string;\n /** Names of tools/actions the agent invoked, in call order. */\n readonly toolCalls: readonly string[];\n /** Whether the run completed without a terminal error event. */\n readonly ok: boolean;\n /** Terminal error message, if the run errored. */\n readonly error?: string;\n /** Synthetic run id, useful for writing eval rows to the observability store. */\n readonly runId: string;\n /** Wall-clock duration of the run in milliseconds. */\n readonly durationMs: number;\n}\n\n// ─── Scorer pipeline ──────────────────────────────────────────────────\n\n/**\n * Context handed to a scorer's analyze step when it needs an LLM judge. The\n * engine/model are resolved by the runner from the existing engine registry —\n * a scorer NEVER hardcodes a provider or model, keeping evals provider-\n * agnostic. `judge()` is a convenience that streams a single judging turn and\n * returns the raw model text.\n */\nexport interface ScorerAnalyzeContext {\n /** The resolved, provider-agnostic engine for LLM-judge scorers. */\n readonly engine: AgentEngine;\n /** The resolved model string for the engine. */\n readonly model: string;\n /**\n * Run a single LLM judging turn. Returns the model's raw text output. Used\n * by `llmJudge` and any custom LLM-backed analyze step. Provider-agnostic —\n * the engine is whatever the app/CLI resolved.\n */\n judge(opts: {\n systemPrompt?: string;\n prompt: string;\n maxOutputTokens?: number;\n signal?: AbortSignal;\n }): Promise<string>;\n}\n\n/**\n * A 4-step scoring pipeline (Mastra-style):\n *\n * preprocess(run) → x (transform the run/output; optional)\n * analyze(x, ctx) → analysis (plain JS OR an LLM judge; optional)\n * generateScore(a) → 0..1 (REQUIRED, normalized)\n * generateReason(...) → string (human-readable why; optional)\n *\n * Generics flow `Pre` (preprocess output) → `Ana` (analyze output) so a\n * single scorer is fully typed end-to-end.\n */\nexport interface Scorer<Pre = AgentRunOutput, Ana = Pre> {\n readonly name: string;\n preprocess?(run: AgentRunOutput): Pre | Promise<Pre>;\n analyze?(input: Pre, ctx: ScorerAnalyzeContext): Ana | Promise<Ana>;\n /** REQUIRED. Returns a normalized score in [0, 1]. */\n generateScore(analysis: Ana): number | Promise<number>;\n generateReason?(args: {\n run: AgentRunOutput;\n analysis: Ana;\n score: number;\n }): string | Promise<string>;\n}\n\n/** Definition object passed to `createScorer`. */\nexport interface ScorerDefinition<Pre = AgentRunOutput, Ana = Pre> {\n name: string;\n preprocess?(run: AgentRunOutput): Pre | Promise<Pre>;\n analyze?(input: Pre, ctx: ScorerAnalyzeContext): Ana | Promise<Ana>;\n generateScore(analysis: Ana): number | Promise<number>;\n generateReason?(args: {\n run: AgentRunOutput;\n analysis: Ana;\n score: number;\n }): string | Promise<string>;\n}\n\n// ─── Eval definition ──────────────────────────────────────────────────\n\n/** The prompt + optional setup that drives one eval case. */\nexport interface EvalInput {\n /** The user prompt / message sent to the agent. */\n prompt: string;\n /**\n * Optional prior conversation turns to seed before `prompt`. Each is a\n * plain { role, text } pair; the runner converts them to engine messages.\n */\n history?: Array<{ role: \"user\" | \"assistant\"; text: string }>;\n}\n\n/** Context passed to an eval's optional `run` override. */\nexport interface EvalRunContext {\n readonly input: EvalInput;\n /** The default agent runner — invoke it to run the agent loop as a caller. */\n runAgent(input: EvalInput): Promise<AgentRunOutput>;\n}\n\n/**\n * A named eval = one test case. `scorers` produce per-scorer rows; the case\n * passes when EVERY scorer meets `threshold` (default 0.5, overridable per\n * eval and globally from the CLI).\n */\nexport interface Eval {\n name: string;\n input: EvalInput;\n /**\n * Optional override for how the agent is run for this case. Defaults to the\n * runner's headless `runAgent`. Use this to do custom setup (seed data,\n * multi-turn) before/after the agent call.\n */\n run?(ctx: EvalRunContext): AgentRunOutput | Promise<AgentRunOutput>;\n scorers: Scorer<any, any>[];\n /** Minimum acceptable score (per scorer) in [0, 1]. Default 0.5. */\n threshold?: number;\n}\n\n// ─── Results ──────────────────────────────────────────────────────────\n\n/** One result row per (eval × scorer). Stores both the number AND the reason. */\nexport interface ScorerResult {\n scorer: string;\n score: number;\n reason?: string;\n passed: boolean;\n}\n\n/** Aggregated result for a single eval (all of its scorers). */\nexport interface EvalResultRow {\n eval: string;\n threshold: number;\n scores: ScorerResult[];\n /** True only when every scorer passed. */\n passed: boolean;\n /** Mean of the scorer scores, for at-a-glance ranking. */\n avgScore: number;\n durationMs: number;\n /** Terminal error if the agent run itself failed. */\n error?: string;\n}\n\n/** The full runner report. */\nexport interface EvalRunReport {\n total: number;\n passed: number;\n failed: number;\n results: EvalResultRow[];\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"traces.d.ts","sourceRoot":"","sources":["../../src/observability/traces.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AACnE,OAAO,KAAK,EAA2B,mBAAmB,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"traces.d.ts","sourceRoot":"","sources":["../../src/observability/traces.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AACnE,OAAO,KAAK,EAA2B,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAkB/E;;;wEAGwE;AACxE,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,OAAO,GAAG,OAAO,CAE7D;AAoBD,wBAAsB,sBAAsB,IAAI,OAAO,CAAC,mBAAmB,CAAC,CAY3E;AAED,wBAAsB,mBAAmB,CAAC,IAAI,EAAE;IAC9C,YAAY,EAAE,CAAC,QAAQ,EAAE;QACvB,MAAM,EAAE,GAAG,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,GAAG,EAAE,CAAC;QACb,QAAQ,EAAE,GAAG,EAAE,CAAC;QAChB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC7B,IAAI,EAAE,CAAC,KAAK,EAAE,cAAc,KAAK,IAAI,CAAC;QACtC,MAAM,EAAE,WAAW,CAAC;QACpB,eAAe,CAAC,EAAE,GAAG,CAAC;KACvB,KAAK,OAAO,CAAC,cAAc,CAAC,CAAC;IAC9B,QAAQ,EAAE;QACR,MAAM,EAAE,GAAG,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,GAAG,EAAE,CAAC;QACb,QAAQ,EAAE,GAAG,EAAE,CAAC;QAChB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC7B,IAAI,EAAE,CAAC,KAAK,EAAE,cAAc,KAAK,IAAI,CAAC;QACtC,MAAM,EAAE,WAAW,CAAC;QACpB,eAAe,CAAC,EAAE,GAAG,CAAC;KACvB,CAAC;IACF,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB;;;iBAGa;IACb,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,MAAM,EAAE,mBAAmB,CAAC;CAC7B,GAAG,OAAO,CAAC,cAAc,CAAC,CAiT1B"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { DEFAULT_OBSERVABILITY_CONFIG } from "./types.js";
|
|
2
|
+
import { endAgentSpan, startAgentSpan } from "./tracing.js";
|
|
2
3
|
function spanId() {
|
|
3
4
|
return `span-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
4
5
|
}
|
|
@@ -55,6 +56,17 @@ export async function instrumentAgentLoop(opts) {
|
|
|
55
56
|
const { runAgentLoop, loopOpts, runId, threadId, userId, config } = opts;
|
|
56
57
|
const runStart = Date.now();
|
|
57
58
|
const parentSpanId = spanId();
|
|
59
|
+
// Optional OpenTelemetry root span for this run. No-ops unless a host has
|
|
60
|
+
// installed `@opentelemetry/api` and registered a provider. The promise is
|
|
61
|
+
// resolved before the loop runs so child tool/model spans can parent under
|
|
62
|
+
// it conceptually (we keep them flat in the same tracer, which is enough
|
|
63
|
+
// for the dashboards an embedding app would build).
|
|
64
|
+
const otelRunSpanPromise = startAgentSpan("agent.run", {
|
|
65
|
+
"agent.run_id": runId,
|
|
66
|
+
"agent.thread_id": threadId ?? undefined,
|
|
67
|
+
"agent.user_id": userId ?? undefined,
|
|
68
|
+
"agent.model": loopOpts.model,
|
|
69
|
+
});
|
|
58
70
|
const spans = [];
|
|
59
71
|
let toolInvocationCounter = 0;
|
|
60
72
|
// Keyed by counter to handle concurrent calls to the same tool name
|
|
@@ -68,16 +80,43 @@ export async function instrumentAgentLoop(opts) {
|
|
|
68
80
|
let toolCallCount = 0;
|
|
69
81
|
let successfulTools = 0;
|
|
70
82
|
let failedTools = 0;
|
|
83
|
+
// Track in-flight OTel tool spans so they're all ended even if the loop
|
|
84
|
+
// throws before a matching `tool_done` arrives.
|
|
85
|
+
const openOtelToolSpans = new Set();
|
|
71
86
|
const instrumentedSend = (event) => {
|
|
72
87
|
try {
|
|
73
88
|
if (event.type === "tool_start") {
|
|
74
89
|
const counter = toolInvocationCounter++;
|
|
75
90
|
const sid = spanId();
|
|
76
|
-
|
|
91
|
+
// Start the OTel tool span synchronously-ish: kick off the async
|
|
92
|
+
// resolution and stash the span once it lands. Tool spans are short
|
|
93
|
+
// and the api tracer is synchronous in practice, but we tolerate the
|
|
94
|
+
// microtask gap by recording the span on the pending entry when ready.
|
|
95
|
+
const entry = {
|
|
77
96
|
spanId: sid,
|
|
78
97
|
startMs: Date.now(),
|
|
79
98
|
toolName: event.tool,
|
|
80
99
|
input: event.input,
|
|
100
|
+
otelSpan: null,
|
|
101
|
+
};
|
|
102
|
+
pendingTools.set(counter, entry);
|
|
103
|
+
void startAgentSpan("tool.call", {
|
|
104
|
+
"tool.name": event.tool,
|
|
105
|
+
}).then((span) => {
|
|
106
|
+
if (!span)
|
|
107
|
+
return;
|
|
108
|
+
// If `tool_done` already ran for this call, end the span now with the
|
|
109
|
+
// status it recorded; otherwise stash it for the done handler.
|
|
110
|
+
if (entry.endResult) {
|
|
111
|
+
endAgentSpan(span, {
|
|
112
|
+
status: entry.endResult.status,
|
|
113
|
+
errorMessage: entry.endResult.errorMessage,
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
entry.otelSpan = span;
|
|
118
|
+
openOtelToolSpans.add(span);
|
|
119
|
+
}
|
|
81
120
|
});
|
|
82
121
|
const queue = toolNameToCounters.get(event.tool);
|
|
83
122
|
if (queue)
|
|
@@ -102,6 +141,23 @@ export async function instrumentAgentLoop(opts) {
|
|
|
102
141
|
failedTools++;
|
|
103
142
|
else
|
|
104
143
|
successfulTools++;
|
|
144
|
+
// Finalize the OTel tool span. If the span promise hasn't resolved yet
|
|
145
|
+
// we record the result on the entry so its `.then` handler ends it.
|
|
146
|
+
const otelEndResult = {
|
|
147
|
+
status: (isError ? "error" : "success"),
|
|
148
|
+
errorMessage: isError ? event.result : null,
|
|
149
|
+
};
|
|
150
|
+
if (pending?.otelSpan) {
|
|
151
|
+
openOtelToolSpans.delete(pending.otelSpan);
|
|
152
|
+
endAgentSpan(pending.otelSpan, {
|
|
153
|
+
status: otelEndResult.status,
|
|
154
|
+
errorMessage: otelEndResult.errorMessage,
|
|
155
|
+
attributes: { "tool.name": event.tool },
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
else if (pending) {
|
|
159
|
+
pending.endResult = otelEndResult;
|
|
160
|
+
}
|
|
105
161
|
const span = {
|
|
106
162
|
id: pending?.spanId ?? spanId(),
|
|
107
163
|
runId,
|
|
@@ -218,6 +274,49 @@ export async function instrumentAgentLoop(opts) {
|
|
|
218
274
|
createdAt: runStart,
|
|
219
275
|
};
|
|
220
276
|
writeTraceData(spans, summary, runId, config).catch(() => { });
|
|
277
|
+
// OpenTelemetry export (no-op unless a provider is registered). Emit a
|
|
278
|
+
// self-contained `llm.call` span carrying model + token usage, end any
|
|
279
|
+
// tool spans still open (loop threw mid-tool), and end the run span. Awaited
|
|
280
|
+
// so the spans are emitted before the function returns; cheap when no-op.
|
|
281
|
+
try {
|
|
282
|
+
if (usage) {
|
|
283
|
+
endAgentSpan(await startAgentSpan("llm.call", {}), {
|
|
284
|
+
status: runStatus,
|
|
285
|
+
errorMessage,
|
|
286
|
+
attributes: {
|
|
287
|
+
"llm.model": usage.model,
|
|
288
|
+
"llm.input_tokens": usage.inputTokens,
|
|
289
|
+
"llm.output_tokens": usage.outputTokens,
|
|
290
|
+
"llm.cache_read_tokens": usage.cacheReadTokens,
|
|
291
|
+
"llm.cache_write_tokens": usage.cacheWriteTokens,
|
|
292
|
+
"llm.cost_cents_x100": costCentsX100,
|
|
293
|
+
},
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
for (const toolSpan of openOtelToolSpans) {
|
|
297
|
+
endAgentSpan(toolSpan, {
|
|
298
|
+
status: "error",
|
|
299
|
+
errorMessage: "Agent run ended before tool_done.",
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
openOtelToolSpans.clear();
|
|
303
|
+
endAgentSpan(await otelRunSpanPromise, {
|
|
304
|
+
status: runStatus,
|
|
305
|
+
errorMessage,
|
|
306
|
+
attributes: {
|
|
307
|
+
"agent.tool_calls": toolCallCount,
|
|
308
|
+
"agent.successful_tools": successfulTools,
|
|
309
|
+
"agent.failed_tools": failedTools,
|
|
310
|
+
"agent.duration_ms": totalDurationMs,
|
|
311
|
+
"agent.input_tokens": usage?.inputTokens ?? 0,
|
|
312
|
+
"agent.output_tokens": usage?.outputTokens ?? 0,
|
|
313
|
+
"agent.cost_cents_x100": costCentsX100,
|
|
314
|
+
},
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
catch {
|
|
318
|
+
// OTel export must never break the run.
|
|
319
|
+
}
|
|
221
320
|
}
|
|
222
321
|
return usage;
|
|
223
322
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"traces.js","sourceRoot":"","sources":["../../src/observability/traces.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,4BAA4B,EAAE,MAAM,YAAY,CAAC;AAE1D,SAAS,MAAM;IACb,OAAO,QAAQ,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;AACxE,CAAC;AAED;;;;;;2BAM2B;AAC3B,MAAM,uBAAuB,GAC3B,uGAAuG,CAAC;AAE1G;;;wEAGwE;AACxE,MAAM,UAAU,qBAAqB,CAAC,KAAc;IAClD,OAAO,UAAU,CAAC,KAAK,EAAE,IAAI,OAAO,EAAU,CAAC,CAAC;AAClD,CAAC;AAED,SAAS,UAAU,CAAC,KAAc,EAAE,IAAqB;IACvD,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC9D,IAAI,IAAI,CAAC,GAAG,CAAC,KAAe,CAAC;QAAE,OAAO,YAAY,CAAC;IACnD,IAAI,CAAC,GAAG,CAAC,KAAe,CAAC,CAAC;IAC1B,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;IAC/C,CAAC;IACD,MAAM,GAAG,GAA4B,EAAE,CAAC;IACxC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAgC,CAAC,EAAE,CAAC;QACtE,IAAI,uBAAuB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;YACpC,GAAG,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC;QACxB,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,sBAAsB;IAC1C,IAAI,CAAC;QACH,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;QAC5D,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,sBAAsB,CAAC,CAAC;QACxD,IAAI,MAAM,EAAE,CAAC;YACX,OAAO;gBACL,GAAG,4BAA4B;gBAC/B,GAAG,MAAM;aACa,CAAC;QAC3B,CAAC;IACH,CAAC;IAAC,MAAM,CAAC,CAAA,CAAC;IACV,OAAO,4BAA4B,CAAC;AACtC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,IA+BzC;IACC,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IACzE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC5B,MAAM,YAAY,GAAG,MAAM,EAAE,CAAC;IAE9B,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,IAAI,qBAAqB,GAAG,CAAC,CAAC;IAC9B,oEAAoE;IACpE,MAAM,YAAY,GAAG,IAAI,GAAG,EAQzB,CAAC;IACJ,0EAA0E;IAC1E,2EAA2E;IAC3E,8EAA8E;IAC9E,+EAA+E;IAC/E,wEAAwE;IACxE,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAAoB,CAAC;IAEvD,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,MAAM,gBAAgB,GAAG,CAAC,KAAqB,EAAQ,EAAE;QACvD,IAAI,CAAC;YACH,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;gBAChC,MAAM,OAAO,GAAG,qBAAqB,EAAE,CAAC;gBACxC,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC;gBACrB,YAAY,CAAC,GAAG,CAAC,OAAO,EAAE;oBACxB,MAAM,EAAE,GAAG;oBACX,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE;oBACnB,QAAQ,EAAE,KAAK,CAAC,IAAI;oBACpB,KAAK,EAAE,KAAK,CAAC,KAAK;iBACnB,CAAC,CAAC;gBACH,MAAM,KAAK,GAAG,kBAAkB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACjD,IAAI,KAAK;oBAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;;oBAC1B,kBAAkB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;YACrD,CAAC;iBAAM,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACtC,MAAM,KAAK,GAAG,kBAAkB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACjD,MAAM,OAAO,GAAG,KAAK,EAAE,KAAK,EAAE,CAAC;gBAC/B,MAAM,OAAO,GACX,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;gBAChE,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;oBAC1B,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;oBAC7B,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAC7B,kBAAkB,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC1C,CAAC;gBACD,aAAa,EAAE,CAAC;gBAEhB,MAAM,OAAO,GACX,OAAO,KAAK,CAAC,MAAM,KAAK,QAAQ;oBAChC,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC;wBAC/B,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC;gBAC/C,IAAI,OAAO;oBAAE,WAAW,EAAE,CAAC;;oBACtB,eAAe,EAAE,CAAC;gBAEvB,MAAM,IAAI,GAAc;oBACtB,EAAE,EAAE,OAAO,EAAE,MAAM,IAAI,MAAM,EAAE;oBAC/B,KAAK;oBACL,QAAQ;oBACR,MAAM;oBACN,YAAY;oBACZ,QAAQ,EAAE,WAAW;oBACrB,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,WAAW,EAAE,CAAC;oBACd,YAAY,EAAE,CAAC;oBACf,eAAe,EAAE,CAAC;oBAClB,gBAAgB,EAAE,CAAC;oBACnB,aAAa,EAAE,CAAC;oBAChB,UAAU,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;oBACtD,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;oBACrC,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI;oBAC3C,QAAQ,EACN,MAAM,CAAC,eAAe,IAAI,OAAO;wBAC/B,CAAC,CAAC,yDAAyD;4BACzD,sDAAsD;4BACtD,uDAAuD;4BACvD,oCAAoC;4BACpC;gCACE,KAAK,EAAE,qBAAqB,CAAC,OAAO,CAAC,KAAK,CAGzC;6BACF;wBACH,CAAC,CAAC,IAAI;oBACV,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;iBACtB,CAAC;gBACF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACnB,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;QAEV,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACvB,CAAC,CAAC;IAEF,IAAI,KAAiC,CAAC;IACtC,IAAI,SAAS,GAAwB,SAAS,CAAC;IAC/C,IAAI,YAAY,GAAkB,IAAI,CAAC;IACvC,IAAI,CAAC;QACH,KAAK,GAAG,MAAM,YAAY,CAAC,EAAE,GAAG,QAAQ,EAAE,IAAI,EAAE,gBAAgB,EAAE,CAAC,CAAC;IACtE,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,SAAS,GAAG,OAAO,CAAC;QACpB,YAAY,GAAG,GAAG,EAAE,OAAO,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC;QAC3C,MAAM,GAAG,CAAC;IACZ,CAAC;YAAS,CAAC;QACT,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC1B,MAAM,eAAe,GAAG,MAAM,GAAG,QAAQ,CAAC;QAE1C,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,CAAC;YAC5D,IAAI,KAAK,EAAE,CAAC;gBACV,aAAa,GAAG,aAAa,CAC3B,KAAK,CAAC,WAAW,EACjB,KAAK,CAAC,YAAY,EAClB,KAAK,CAAC,KAAK,EACX,KAAK,CAAC,eAAe,EACrB,KAAK,CAAC,gBAAgB,CACvB,CAAC;YACJ,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;QAEV,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,IAAI,KAAK,EAAE,CAAC;YACV,YAAY,GAAG,CAAC,CAAC;YACjB,MAAM,OAAO,GAAc;gBACzB,EAAE,EAAE,MAAM,EAAE;gBACZ,KAAK;gBACL,QAAQ;gBACR,MAAM;gBACN,YAAY;gBACZ,QAAQ,EAAE,UAAU;gBACpB,IAAI,EAAE,KAAK,CAAC,KAAK;gBACjB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,YAAY,EAAE,KAAK,CAAC,YAAY;gBAChC,eAAe,EAAE,KAAK,CAAC,eAAe;gBACtC,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;gBACxC,aAAa;gBACb,UAAU,EAAE,eAAe;gBAC3B,MAAM,EAAE,SAAS;gBACjB,YAAY;gBACZ,QAAQ,EAAE,IAAI;gBACd,SAAS,EAAE,QAAQ;aACpB,CAAC;YACF,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACtB,CAAC;QAED,MAAM,UAAU,GAAc;YAC5B,EAAE,EAAE,YAAY;YAChB,KAAK;YACL,QAAQ;YACR,MAAM;YACN,YAAY,EAAE,IAAI;YAClB,QAAQ,EAAE,WAAW;YACrB,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,KAAK,EAAE,WAAW,IAAI,CAAC;YACpC,YAAY,EAAE,KAAK,EAAE,YAAY,IAAI,CAAC;YACtC,eAAe,EAAE,KAAK,EAAE,eAAe,IAAI,CAAC;YAC5C,gBAAgB,EAAE,KAAK,EAAE,gBAAgB,IAAI,CAAC;YAC9C,aAAa;YACb,UAAU,EAAE,eAAe;YAC3B,MAAM,EAAE,SAAS;YACjB,YAAY;YACZ,QAAQ,EAAE,IAAI;YACd,SAAS,EAAE,QAAQ;SACpB,CAAC;QACF,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAEvB,MAAM,OAAO,GAAiB;YAC5B,KAAK;YACL,QAAQ;YACR,MAAM;YACN,UAAU,EAAE,KAAK,CAAC,MAAM;YACxB,QAAQ,EAAE,YAAY;YACtB,SAAS,EAAE,aAAa;YACxB,eAAe;YACf,WAAW;YACX,eAAe;YACf,kBAAkB,EAAE,aAAa;YACjC,gBAAgB,EAAE,KAAK,EAAE,WAAW,IAAI,CAAC;YACzC,iBAAiB,EAAE,KAAK,EAAE,YAAY,IAAI,CAAC;YAC3C,KAAK,EAAE,KAAK,EAAE,KAAK,IAAI,QAAQ,CAAC,KAAK;YACrC,SAAS,EAAE,QAAQ;SACpB,CAAC;QAEF,cAAc,CAAC,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,KAAM,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,cAAc,CAC3B,KAAkB,EAClB,OAAqB,EACrB,KAAa,EACb,MAA2B;IAE3B,MAAM,EAAE,eAAe,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;IAC3E,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IACxE,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IAElD,qDAAqD;IACrD,IAAI,CAAC;QACH,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;QACnD,MAAM,WAAW,CAAC,KAAK,EAAE,EAAE,UAAU,EAAE,MAAM,CAAC,cAAc,EAAE,CAAC,CAAC;IAClE,CAAC;IAAC,MAAM,CAAC,CAAA,CAAC;AACZ,CAAC","sourcesContent":["import type { AgentChatEvent } from \"../agent/types.js\";\nimport type { AgentLoopUsage } from \"../agent/production-agent.js\";\nimport type { TraceSpan, TraceSummary, ObservabilityConfig } from \"./types.js\";\nimport { DEFAULT_OBSERVABILITY_CONFIG } from \"./types.js\";\n\nfunction spanId(): string {\n return `span-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;\n}\n\n/** Keys whose values are stripped from persisted tool inputs when\n * `captureToolArgs` is enabled. Matched case-insensitively and tolerant\n * of `_` / `-` separators. M14 in the MCP/A2A audit: tool calls\n * routinely receive credentials verbatim (db-exec INSERTs, fetchTool\n * Authorization headers, ad-hoc bearer tokens) — keeping those values\n * out of agent_trace_spans.metadata avoids long-term storage of\n * short-lived secrets. */\nconst SENSITIVE_FIELD_PATTERN =\n /^(authorization|cookie|api[_-]?key|password|secret|token|access[_-]?token|refresh[_-]?token|bearer)$/i;\n\n/** Recursively walk a structured value and replace sensitive field\n * values with the literal string \"[REDACTED]\". Pure (returns a copy);\n * the original input is never mutated. Cycles are tolerated via a\n * small WeakSet seen-tracker that returns \"[Circular]\" for repeats. */\nexport function redactSensitiveFields(value: unknown): unknown {\n return redactWalk(value, new WeakSet<object>());\n}\n\nfunction redactWalk(value: unknown, seen: WeakSet<object>): unknown {\n if (value === null || typeof value !== \"object\") return value;\n if (seen.has(value as object)) return \"[Circular]\";\n seen.add(value as object);\n if (Array.isArray(value)) {\n return value.map((v) => redactWalk(v, seen));\n }\n const out: Record<string, unknown> = {};\n for (const [k, v] of Object.entries(value as Record<string, unknown>)) {\n if (SENSITIVE_FIELD_PATTERN.test(k)) {\n out[k] = \"[REDACTED]\";\n } else {\n out[k] = redactWalk(v, seen);\n }\n }\n return out;\n}\n\nexport async function getObservabilityConfig(): Promise<ObservabilityConfig> {\n try {\n const { getSetting } = await import(\"../settings/store.js\");\n const stored = await getSetting(\"observability-config\");\n if (stored) {\n return {\n ...DEFAULT_OBSERVABILITY_CONFIG,\n ...stored,\n } as ObservabilityConfig;\n }\n } catch {}\n return DEFAULT_OBSERVABILITY_CONFIG;\n}\n\nexport async function instrumentAgentLoop(opts: {\n runAgentLoop: (loopOpts: {\n engine: any;\n model: string;\n systemPrompt: string;\n tools: any[];\n messages: any[];\n actions: Record<string, any>;\n send: (event: AgentChatEvent) => void;\n signal: AbortSignal;\n providerOptions?: any;\n }) => Promise<AgentLoopUsage>;\n loopOpts: {\n engine: any;\n model: string;\n systemPrompt: string;\n tools: any[];\n messages: any[];\n actions: Record<string, any>;\n send: (event: AgentChatEvent) => void;\n signal: AbortSignal;\n providerOptions?: any;\n };\n runId: string;\n threadId: string | null;\n /** Owner of this run; persisted on every span + summary so dashboard\n * reads can filter to a single user. Null for unauthenticated callers\n * (background tasks, etc.) — those rows aren't returned by per-user\n * reads. */\n userId: string | null;\n config: ObservabilityConfig;\n}): Promise<AgentLoopUsage> {\n const { runAgentLoop, loopOpts, runId, threadId, userId, config } = opts;\n const runStart = Date.now();\n const parentSpanId = spanId();\n\n const spans: TraceSpan[] = [];\n let toolInvocationCounter = 0;\n // Keyed by counter to handle concurrent calls to the same tool name\n const pendingTools = new Map<\n number,\n {\n spanId: string;\n startMs: number;\n toolName: string;\n input: Record<string, string>;\n }\n >();\n // Secondary index: tool name → FIFO queue of pending invocation counters.\n // tool_start/tool_done events carry only the tool name (no call id), so to\n // pair starts and dones correctly when the agent runs concurrent calls to the\n // same tool name (read-only / parallelSafe batches via Promise.all), we keep a\n // queue per name and match each done to the OLDEST still-pending start.\n const toolNameToCounters = new Map<string, number[]>();\n\n let toolCallCount = 0;\n let successfulTools = 0;\n let failedTools = 0;\n\n const instrumentedSend = (event: AgentChatEvent): void => {\n try {\n if (event.type === \"tool_start\") {\n const counter = toolInvocationCounter++;\n const sid = spanId();\n pendingTools.set(counter, {\n spanId: sid,\n startMs: Date.now(),\n toolName: event.tool,\n input: event.input,\n });\n const queue = toolNameToCounters.get(event.tool);\n if (queue) queue.push(counter);\n else toolNameToCounters.set(event.tool, [counter]);\n } else if (event.type === \"tool_done\") {\n const queue = toolNameToCounters.get(event.tool);\n const counter = queue?.shift();\n const pending =\n counter !== undefined ? pendingTools.get(counter) : undefined;\n if (counter !== undefined) {\n pendingTools.delete(counter);\n if (queue && queue.length === 0)\n toolNameToCounters.delete(event.tool);\n }\n toolCallCount++;\n\n const isError =\n typeof event.result === \"string\" &&\n (event.result.startsWith(\"Error\") ||\n event.result.startsWith(\"Error running \"));\n if (isError) failedTools++;\n else successfulTools++;\n\n const span: TraceSpan = {\n id: pending?.spanId ?? spanId(),\n runId,\n threadId,\n userId,\n parentSpanId,\n spanType: \"tool_call\",\n name: event.tool,\n inputTokens: 0,\n outputTokens: 0,\n cacheReadTokens: 0,\n cacheWriteTokens: 0,\n costCentsX100: 0,\n durationMs: pending ? Date.now() - pending.startMs : 0,\n status: isError ? \"error\" : \"success\",\n errorMessage: isError ? event.result : null,\n metadata:\n config.captureToolArgs && pending\n ? // Strip Authorization/api-key/token-shaped values before\n // persisting (M14 in the MCP/A2A audit). Tool-runtime\n // execution still sees the unredacted input — only the\n // long-lived span row is sanitized.\n {\n input: redactSensitiveFields(pending.input) as Record<\n string,\n string\n >,\n }\n : null,\n createdAt: Date.now(),\n };\n spans.push(span);\n }\n } catch {}\n\n loopOpts.send(event);\n };\n\n let usage: AgentLoopUsage | undefined;\n let runStatus: \"success\" | \"error\" = \"success\";\n let errorMessage: string | null = null;\n try {\n usage = await runAgentLoop({ ...loopOpts, send: instrumentedSend });\n } catch (err: any) {\n runStatus = \"error\";\n errorMessage = err?.message ?? String(err);\n throw err;\n } finally {\n const runEnd = Date.now();\n const totalDurationMs = runEnd - runStart;\n\n let costCentsX100 = 0;\n try {\n const { calculateCost } = await import(\"../usage/store.js\");\n if (usage) {\n costCentsX100 = calculateCost(\n usage.inputTokens,\n usage.outputTokens,\n usage.model,\n usage.cacheReadTokens,\n usage.cacheWriteTokens,\n );\n }\n } catch {}\n\n let llmCallCount = 0;\n if (usage) {\n llmCallCount = 1;\n const llmSpan: TraceSpan = {\n id: spanId(),\n runId,\n threadId,\n userId,\n parentSpanId,\n spanType: \"llm_call\",\n name: usage.model,\n inputTokens: usage.inputTokens,\n outputTokens: usage.outputTokens,\n cacheReadTokens: usage.cacheReadTokens,\n cacheWriteTokens: usage.cacheWriteTokens,\n costCentsX100,\n durationMs: totalDurationMs,\n status: runStatus,\n errorMessage,\n metadata: null,\n createdAt: runStart,\n };\n spans.push(llmSpan);\n }\n\n const parentSpan: TraceSpan = {\n id: parentSpanId,\n runId,\n threadId,\n userId,\n parentSpanId: null,\n spanType: \"agent_run\",\n name: \"agent_run\",\n inputTokens: usage?.inputTokens ?? 0,\n outputTokens: usage?.outputTokens ?? 0,\n cacheReadTokens: usage?.cacheReadTokens ?? 0,\n cacheWriteTokens: usage?.cacheWriteTokens ?? 0,\n costCentsX100,\n durationMs: totalDurationMs,\n status: runStatus,\n errorMessage,\n metadata: null,\n createdAt: runStart,\n };\n spans.push(parentSpan);\n\n const summary: TraceSummary = {\n runId,\n threadId,\n userId,\n totalSpans: spans.length,\n llmCalls: llmCallCount,\n toolCalls: toolCallCount,\n successfulTools,\n failedTools,\n totalDurationMs,\n totalCostCentsX100: costCentsX100,\n totalInputTokens: usage?.inputTokens ?? 0,\n totalOutputTokens: usage?.outputTokens ?? 0,\n model: usage?.model ?? loopOpts.model,\n createdAt: runStart,\n };\n\n writeTraceData(spans, summary, runId, config).catch(() => {});\n }\n\n return usage!;\n}\n\nasync function writeTraceData(\n spans: TraceSpan[],\n summary: TraceSummary,\n runId: string,\n config: ObservabilityConfig,\n): Promise<void> {\n const { insertTraceSpan, upsertTraceSummary } = await import(\"./store.js\");\n await Promise.all(spans.map((s) => insertTraceSpan(s).catch(() => {})));\n await upsertTraceSummary(summary).catch(() => {});\n\n // Fire automated evals after trace data is persisted\n try {\n const { evaluateRun } = await import(\"./evals.js\");\n await evaluateRun(runId, { sampleRate: config.evalSampleRate });\n } catch {}\n}\n"]}
|
|
1
|
+
{"version":3,"file":"traces.js","sourceRoot":"","sources":["../../src/observability/traces.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,4BAA4B,EAAE,MAAM,YAAY,CAAC;AAC1D,OAAO,EAAkB,YAAY,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAE5E,SAAS,MAAM;IACb,OAAO,QAAQ,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;AACxE,CAAC;AAED;;;;;;2BAM2B;AAC3B,MAAM,uBAAuB,GAC3B,uGAAuG,CAAC;AAE1G;;;wEAGwE;AACxE,MAAM,UAAU,qBAAqB,CAAC,KAAc;IAClD,OAAO,UAAU,CAAC,KAAK,EAAE,IAAI,OAAO,EAAU,CAAC,CAAC;AAClD,CAAC;AAED,SAAS,UAAU,CAAC,KAAc,EAAE,IAAqB;IACvD,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC9D,IAAI,IAAI,CAAC,GAAG,CAAC,KAAe,CAAC;QAAE,OAAO,YAAY,CAAC;IACnD,IAAI,CAAC,GAAG,CAAC,KAAe,CAAC,CAAC;IAC1B,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;IAC/C,CAAC;IACD,MAAM,GAAG,GAA4B,EAAE,CAAC;IACxC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAgC,CAAC,EAAE,CAAC;QACtE,IAAI,uBAAuB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;YACpC,GAAG,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC;QACxB,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,sBAAsB;IAC1C,IAAI,CAAC;QACH,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;QAC5D,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,sBAAsB,CAAC,CAAC;QACxD,IAAI,MAAM,EAAE,CAAC;YACX,OAAO;gBACL,GAAG,4BAA4B;gBAC/B,GAAG,MAAM;aACa,CAAC;QAC3B,CAAC;IACH,CAAC;IAAC,MAAM,CAAC,CAAA,CAAC;IACV,OAAO,4BAA4B,CAAC;AACtC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,IA+BzC;IACC,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IACzE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC5B,MAAM,YAAY,GAAG,MAAM,EAAE,CAAC;IAE9B,0EAA0E;IAC1E,2EAA2E;IAC3E,2EAA2E;IAC3E,yEAAyE;IACzE,oDAAoD;IACpD,MAAM,kBAAkB,GAAG,cAAc,CAAC,WAAW,EAAE;QACrD,cAAc,EAAE,KAAK;QACrB,iBAAiB,EAAE,QAAQ,IAAI,SAAS;QACxC,eAAe,EAAE,MAAM,IAAI,SAAS;QACpC,aAAa,EAAE,QAAQ,CAAC,KAAK;KAC9B,CAAC,CAAC;IAEH,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,IAAI,qBAAqB,GAAG,CAAC,CAAC;IAC9B,oEAAoE;IACpE,MAAM,YAAY,GAAG,IAAI,GAAG,EAUzB,CAAC;IACJ,0EAA0E;IAC1E,2EAA2E;IAC3E,8EAA8E;IAC9E,+EAA+E;IAC/E,wEAAwE;IACxE,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAAoB,CAAC;IAEvD,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,wEAAwE;IACxE,gDAAgD;IAChD,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAa,CAAC;IAE/C,MAAM,gBAAgB,GAAG,CAAC,KAAqB,EAAQ,EAAE;QACvD,IAAI,CAAC;YACH,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;gBAChC,MAAM,OAAO,GAAG,qBAAqB,EAAE,CAAC;gBACxC,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC;gBACrB,iEAAiE;gBACjE,oEAAoE;gBACpE,qEAAqE;gBACrE,uEAAuE;gBACvE,MAAM,KAAK,GAYP;oBACF,MAAM,EAAE,GAAG;oBACX,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE;oBACnB,QAAQ,EAAE,KAAK,CAAC,IAAI;oBACpB,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,QAAQ,EAAE,IAAI;iBACf,CAAC;gBACF,YAAY,CAAC,GAAG,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;gBACjC,KAAK,cAAc,CAAC,WAAW,EAAE;oBAC/B,WAAW,EAAE,KAAK,CAAC,IAAI;iBACxB,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE;oBACf,IAAI,CAAC,IAAI;wBAAE,OAAO;oBAClB,sEAAsE;oBACtE,+DAA+D;oBAC/D,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;wBACpB,YAAY,CAAC,IAAI,EAAE;4BACjB,MAAM,EAAE,KAAK,CAAC,SAAS,CAAC,MAAM;4BAC9B,YAAY,EAAE,KAAK,CAAC,SAAS,CAAC,YAAY;yBAC3C,CAAC,CAAC;oBACL,CAAC;yBAAM,CAAC;wBACN,KAAK,CAAC,QAAQ,GAAG,IAAI,CAAC;wBACtB,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;oBAC9B,CAAC;gBACH,CAAC,CAAC,CAAC;gBACH,MAAM,KAAK,GAAG,kBAAkB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACjD,IAAI,KAAK;oBAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;;oBAC1B,kBAAkB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;YACrD,CAAC;iBAAM,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACtC,MAAM,KAAK,GAAG,kBAAkB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACjD,MAAM,OAAO,GAAG,KAAK,EAAE,KAAK,EAAE,CAAC;gBAC/B,MAAM,OAAO,GACX,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;gBAChE,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;oBAC1B,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;oBAC7B,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAC7B,kBAAkB,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC1C,CAAC;gBACD,aAAa,EAAE,CAAC;gBAEhB,MAAM,OAAO,GACX,OAAO,KAAK,CAAC,MAAM,KAAK,QAAQ;oBAChC,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC;wBAC/B,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC;gBAC/C,IAAI,OAAO;oBAAE,WAAW,EAAE,CAAC;;oBACtB,eAAe,EAAE,CAAC;gBAEvB,uEAAuE;gBACvE,oEAAoE;gBACpE,MAAM,aAAa,GAAG;oBACpB,MAAM,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAwB;oBAC9D,YAAY,EAAE,OAAO,CAAC,CAAC,CAAE,KAAK,CAAC,MAAiB,CAAC,CAAC,CAAC,IAAI;iBACxD,CAAC;gBACF,IAAI,OAAO,EAAE,QAAQ,EAAE,CAAC;oBACtB,iBAAiB,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;oBAC3C,YAAY,CAAC,OAAO,CAAC,QAAQ,EAAE;wBAC7B,MAAM,EAAE,aAAa,CAAC,MAAM;wBAC5B,YAAY,EAAE,aAAa,CAAC,YAAY;wBACxC,UAAU,EAAE,EAAE,WAAW,EAAE,KAAK,CAAC,IAAI,EAAE;qBACxC,CAAC,CAAC;gBACL,CAAC;qBAAM,IAAI,OAAO,EAAE,CAAC;oBACnB,OAAO,CAAC,SAAS,GAAG,aAAa,CAAC;gBACpC,CAAC;gBAED,MAAM,IAAI,GAAc;oBACtB,EAAE,EAAE,OAAO,EAAE,MAAM,IAAI,MAAM,EAAE;oBAC/B,KAAK;oBACL,QAAQ;oBACR,MAAM;oBACN,YAAY;oBACZ,QAAQ,EAAE,WAAW;oBACrB,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,WAAW,EAAE,CAAC;oBACd,YAAY,EAAE,CAAC;oBACf,eAAe,EAAE,CAAC;oBAClB,gBAAgB,EAAE,CAAC;oBACnB,aAAa,EAAE,CAAC;oBAChB,UAAU,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;oBACtD,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;oBACrC,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI;oBAC3C,QAAQ,EACN,MAAM,CAAC,eAAe,IAAI,OAAO;wBAC/B,CAAC,CAAC,yDAAyD;4BACzD,sDAAsD;4BACtD,uDAAuD;4BACvD,oCAAoC;4BACpC;gCACE,KAAK,EAAE,qBAAqB,CAAC,OAAO,CAAC,KAAK,CAGzC;6BACF;wBACH,CAAC,CAAC,IAAI;oBACV,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;iBACtB,CAAC;gBACF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACnB,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;QAEV,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACvB,CAAC,CAAC;IAEF,IAAI,KAAiC,CAAC;IACtC,IAAI,SAAS,GAAwB,SAAS,CAAC;IAC/C,IAAI,YAAY,GAAkB,IAAI,CAAC;IACvC,IAAI,CAAC;QACH,KAAK,GAAG,MAAM,YAAY,CAAC,EAAE,GAAG,QAAQ,EAAE,IAAI,EAAE,gBAAgB,EAAE,CAAC,CAAC;IACtE,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,SAAS,GAAG,OAAO,CAAC;QACpB,YAAY,GAAG,GAAG,EAAE,OAAO,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC;QAC3C,MAAM,GAAG,CAAC;IACZ,CAAC;YAAS,CAAC;QACT,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC1B,MAAM,eAAe,GAAG,MAAM,GAAG,QAAQ,CAAC;QAE1C,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,CAAC;YAC5D,IAAI,KAAK,EAAE,CAAC;gBACV,aAAa,GAAG,aAAa,CAC3B,KAAK,CAAC,WAAW,EACjB,KAAK,CAAC,YAAY,EAClB,KAAK,CAAC,KAAK,EACX,KAAK,CAAC,eAAe,EACrB,KAAK,CAAC,gBAAgB,CACvB,CAAC;YACJ,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;QAEV,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,IAAI,KAAK,EAAE,CAAC;YACV,YAAY,GAAG,CAAC,CAAC;YACjB,MAAM,OAAO,GAAc;gBACzB,EAAE,EAAE,MAAM,EAAE;gBACZ,KAAK;gBACL,QAAQ;gBACR,MAAM;gBACN,YAAY;gBACZ,QAAQ,EAAE,UAAU;gBACpB,IAAI,EAAE,KAAK,CAAC,KAAK;gBACjB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,YAAY,EAAE,KAAK,CAAC,YAAY;gBAChC,eAAe,EAAE,KAAK,CAAC,eAAe;gBACtC,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;gBACxC,aAAa;gBACb,UAAU,EAAE,eAAe;gBAC3B,MAAM,EAAE,SAAS;gBACjB,YAAY;gBACZ,QAAQ,EAAE,IAAI;gBACd,SAAS,EAAE,QAAQ;aACpB,CAAC;YACF,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACtB,CAAC;QAED,MAAM,UAAU,GAAc;YAC5B,EAAE,EAAE,YAAY;YAChB,KAAK;YACL,QAAQ;YACR,MAAM;YACN,YAAY,EAAE,IAAI;YAClB,QAAQ,EAAE,WAAW;YACrB,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,KAAK,EAAE,WAAW,IAAI,CAAC;YACpC,YAAY,EAAE,KAAK,EAAE,YAAY,IAAI,CAAC;YACtC,eAAe,EAAE,KAAK,EAAE,eAAe,IAAI,CAAC;YAC5C,gBAAgB,EAAE,KAAK,EAAE,gBAAgB,IAAI,CAAC;YAC9C,aAAa;YACb,UAAU,EAAE,eAAe;YAC3B,MAAM,EAAE,SAAS;YACjB,YAAY;YACZ,QAAQ,EAAE,IAAI;YACd,SAAS,EAAE,QAAQ;SACpB,CAAC;QACF,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAEvB,MAAM,OAAO,GAAiB;YAC5B,KAAK;YACL,QAAQ;YACR,MAAM;YACN,UAAU,EAAE,KAAK,CAAC,MAAM;YACxB,QAAQ,EAAE,YAAY;YACtB,SAAS,EAAE,aAAa;YACxB,eAAe;YACf,WAAW;YACX,eAAe;YACf,kBAAkB,EAAE,aAAa;YACjC,gBAAgB,EAAE,KAAK,EAAE,WAAW,IAAI,CAAC;YACzC,iBAAiB,EAAE,KAAK,EAAE,YAAY,IAAI,CAAC;YAC3C,KAAK,EAAE,KAAK,EAAE,KAAK,IAAI,QAAQ,CAAC,KAAK;YACrC,SAAS,EAAE,QAAQ;SACpB,CAAC;QAEF,cAAc,CAAC,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAE9D,uEAAuE;QACvE,uEAAuE;QACvE,6EAA6E;QAC7E,0EAA0E;QAC1E,IAAI,CAAC;YACH,IAAI,KAAK,EAAE,CAAC;gBACV,YAAY,CAAC,MAAM,cAAc,CAAC,UAAU,EAAE,EAAE,CAAC,EAAE;oBACjD,MAAM,EAAE,SAAS;oBACjB,YAAY;oBACZ,UAAU,EAAE;wBACV,WAAW,EAAE,KAAK,CAAC,KAAK;wBACxB,kBAAkB,EAAE,KAAK,CAAC,WAAW;wBACrC,mBAAmB,EAAE,KAAK,CAAC,YAAY;wBACvC,uBAAuB,EAAE,KAAK,CAAC,eAAe;wBAC9C,wBAAwB,EAAE,KAAK,CAAC,gBAAgB;wBAChD,qBAAqB,EAAE,aAAa;qBACrC;iBACF,CAAC,CAAC;YACL,CAAC;YACD,KAAK,MAAM,QAAQ,IAAI,iBAAiB,EAAE,CAAC;gBACzC,YAAY,CAAC,QAAQ,EAAE;oBACrB,MAAM,EAAE,OAAO;oBACf,YAAY,EAAE,mCAAmC;iBAClD,CAAC,CAAC;YACL,CAAC;YACD,iBAAiB,CAAC,KAAK,EAAE,CAAC;YAC1B,YAAY,CAAC,MAAM,kBAAkB,EAAE;gBACrC,MAAM,EAAE,SAAS;gBACjB,YAAY;gBACZ,UAAU,EAAE;oBACV,kBAAkB,EAAE,aAAa;oBACjC,wBAAwB,EAAE,eAAe;oBACzC,oBAAoB,EAAE,WAAW;oBACjC,mBAAmB,EAAE,eAAe;oBACpC,oBAAoB,EAAE,KAAK,EAAE,WAAW,IAAI,CAAC;oBAC7C,qBAAqB,EAAE,KAAK,EAAE,YAAY,IAAI,CAAC;oBAC/C,uBAAuB,EAAE,aAAa;iBACvC;aACF,CAAC,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACP,wCAAwC;QAC1C,CAAC;IACH,CAAC;IAED,OAAO,KAAM,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,cAAc,CAC3B,KAAkB,EAClB,OAAqB,EACrB,KAAa,EACb,MAA2B;IAE3B,MAAM,EAAE,eAAe,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;IAC3E,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IACxE,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IAElD,qDAAqD;IACrD,IAAI,CAAC;QACH,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;QACnD,MAAM,WAAW,CAAC,KAAK,EAAE,EAAE,UAAU,EAAE,MAAM,CAAC,cAAc,EAAE,CAAC,CAAC;IAClE,CAAC;IAAC,MAAM,CAAC,CAAA,CAAC;AACZ,CAAC","sourcesContent":["import type { AgentChatEvent } from \"../agent/types.js\";\nimport type { AgentLoopUsage } from \"../agent/production-agent.js\";\nimport type { TraceSpan, TraceSummary, ObservabilityConfig } from \"./types.js\";\nimport { DEFAULT_OBSERVABILITY_CONFIG } from \"./types.js\";\nimport { type AgentSpan, endAgentSpan, startAgentSpan } from \"./tracing.js\";\n\nfunction spanId(): string {\n return `span-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;\n}\n\n/** Keys whose values are stripped from persisted tool inputs when\n * `captureToolArgs` is enabled. Matched case-insensitively and tolerant\n * of `_` / `-` separators. M14 in the MCP/A2A audit: tool calls\n * routinely receive credentials verbatim (db-exec INSERTs, fetchTool\n * Authorization headers, ad-hoc bearer tokens) — keeping those values\n * out of agent_trace_spans.metadata avoids long-term storage of\n * short-lived secrets. */\nconst SENSITIVE_FIELD_PATTERN =\n /^(authorization|cookie|api[_-]?key|password|secret|token|access[_-]?token|refresh[_-]?token|bearer)$/i;\n\n/** Recursively walk a structured value and replace sensitive field\n * values with the literal string \"[REDACTED]\". Pure (returns a copy);\n * the original input is never mutated. Cycles are tolerated via a\n * small WeakSet seen-tracker that returns \"[Circular]\" for repeats. */\nexport function redactSensitiveFields(value: unknown): unknown {\n return redactWalk(value, new WeakSet<object>());\n}\n\nfunction redactWalk(value: unknown, seen: WeakSet<object>): unknown {\n if (value === null || typeof value !== \"object\") return value;\n if (seen.has(value as object)) return \"[Circular]\";\n seen.add(value as object);\n if (Array.isArray(value)) {\n return value.map((v) => redactWalk(v, seen));\n }\n const out: Record<string, unknown> = {};\n for (const [k, v] of Object.entries(value as Record<string, unknown>)) {\n if (SENSITIVE_FIELD_PATTERN.test(k)) {\n out[k] = \"[REDACTED]\";\n } else {\n out[k] = redactWalk(v, seen);\n }\n }\n return out;\n}\n\nexport async function getObservabilityConfig(): Promise<ObservabilityConfig> {\n try {\n const { getSetting } = await import(\"../settings/store.js\");\n const stored = await getSetting(\"observability-config\");\n if (stored) {\n return {\n ...DEFAULT_OBSERVABILITY_CONFIG,\n ...stored,\n } as ObservabilityConfig;\n }\n } catch {}\n return DEFAULT_OBSERVABILITY_CONFIG;\n}\n\nexport async function instrumentAgentLoop(opts: {\n runAgentLoop: (loopOpts: {\n engine: any;\n model: string;\n systemPrompt: string;\n tools: any[];\n messages: any[];\n actions: Record<string, any>;\n send: (event: AgentChatEvent) => void;\n signal: AbortSignal;\n providerOptions?: any;\n }) => Promise<AgentLoopUsage>;\n loopOpts: {\n engine: any;\n model: string;\n systemPrompt: string;\n tools: any[];\n messages: any[];\n actions: Record<string, any>;\n send: (event: AgentChatEvent) => void;\n signal: AbortSignal;\n providerOptions?: any;\n };\n runId: string;\n threadId: string | null;\n /** Owner of this run; persisted on every span + summary so dashboard\n * reads can filter to a single user. Null for unauthenticated callers\n * (background tasks, etc.) — those rows aren't returned by per-user\n * reads. */\n userId: string | null;\n config: ObservabilityConfig;\n}): Promise<AgentLoopUsage> {\n const { runAgentLoop, loopOpts, runId, threadId, userId, config } = opts;\n const runStart = Date.now();\n const parentSpanId = spanId();\n\n // Optional OpenTelemetry root span for this run. No-ops unless a host has\n // installed `@opentelemetry/api` and registered a provider. The promise is\n // resolved before the loop runs so child tool/model spans can parent under\n // it conceptually (we keep them flat in the same tracer, which is enough\n // for the dashboards an embedding app would build).\n const otelRunSpanPromise = startAgentSpan(\"agent.run\", {\n \"agent.run_id\": runId,\n \"agent.thread_id\": threadId ?? undefined,\n \"agent.user_id\": userId ?? undefined,\n \"agent.model\": loopOpts.model,\n });\n\n const spans: TraceSpan[] = [];\n let toolInvocationCounter = 0;\n // Keyed by counter to handle concurrent calls to the same tool name\n const pendingTools = new Map<\n number,\n {\n spanId: string;\n startMs: number;\n toolName: string;\n input: Record<string, string>;\n otelSpan: AgentSpan | null;\n endResult?: { status: \"success\" | \"error\"; errorMessage: string | null };\n }\n >();\n // Secondary index: tool name → FIFO queue of pending invocation counters.\n // tool_start/tool_done events carry only the tool name (no call id), so to\n // pair starts and dones correctly when the agent runs concurrent calls to the\n // same tool name (read-only / parallelSafe batches via Promise.all), we keep a\n // queue per name and match each done to the OLDEST still-pending start.\n const toolNameToCounters = new Map<string, number[]>();\n\n let toolCallCount = 0;\n let successfulTools = 0;\n let failedTools = 0;\n\n // Track in-flight OTel tool spans so they're all ended even if the loop\n // throws before a matching `tool_done` arrives.\n const openOtelToolSpans = new Set<AgentSpan>();\n\n const instrumentedSend = (event: AgentChatEvent): void => {\n try {\n if (event.type === \"tool_start\") {\n const counter = toolInvocationCounter++;\n const sid = spanId();\n // Start the OTel tool span synchronously-ish: kick off the async\n // resolution and stash the span once it lands. Tool spans are short\n // and the api tracer is synchronous in practice, but we tolerate the\n // microtask gap by recording the span on the pending entry when ready.\n const entry: {\n spanId: string;\n startMs: number;\n toolName: string;\n input: Record<string, string>;\n otelSpan: AgentSpan | null;\n // Set by the done handler if it fires before the span promise\n // resolves, so the resolved span is ended with the correct status.\n endResult?: {\n status: \"success\" | \"error\";\n errorMessage: string | null;\n };\n } = {\n spanId: sid,\n startMs: Date.now(),\n toolName: event.tool,\n input: event.input,\n otelSpan: null,\n };\n pendingTools.set(counter, entry);\n void startAgentSpan(\"tool.call\", {\n \"tool.name\": event.tool,\n }).then((span) => {\n if (!span) return;\n // If `tool_done` already ran for this call, end the span now with the\n // status it recorded; otherwise stash it for the done handler.\n if (entry.endResult) {\n endAgentSpan(span, {\n status: entry.endResult.status,\n errorMessage: entry.endResult.errorMessage,\n });\n } else {\n entry.otelSpan = span;\n openOtelToolSpans.add(span);\n }\n });\n const queue = toolNameToCounters.get(event.tool);\n if (queue) queue.push(counter);\n else toolNameToCounters.set(event.tool, [counter]);\n } else if (event.type === \"tool_done\") {\n const queue = toolNameToCounters.get(event.tool);\n const counter = queue?.shift();\n const pending =\n counter !== undefined ? pendingTools.get(counter) : undefined;\n if (counter !== undefined) {\n pendingTools.delete(counter);\n if (queue && queue.length === 0)\n toolNameToCounters.delete(event.tool);\n }\n toolCallCount++;\n\n const isError =\n typeof event.result === \"string\" &&\n (event.result.startsWith(\"Error\") ||\n event.result.startsWith(\"Error running \"));\n if (isError) failedTools++;\n else successfulTools++;\n\n // Finalize the OTel tool span. If the span promise hasn't resolved yet\n // we record the result on the entry so its `.then` handler ends it.\n const otelEndResult = {\n status: (isError ? \"error\" : \"success\") as \"success\" | \"error\",\n errorMessage: isError ? (event.result as string) : null,\n };\n if (pending?.otelSpan) {\n openOtelToolSpans.delete(pending.otelSpan);\n endAgentSpan(pending.otelSpan, {\n status: otelEndResult.status,\n errorMessage: otelEndResult.errorMessage,\n attributes: { \"tool.name\": event.tool },\n });\n } else if (pending) {\n pending.endResult = otelEndResult;\n }\n\n const span: TraceSpan = {\n id: pending?.spanId ?? spanId(),\n runId,\n threadId,\n userId,\n parentSpanId,\n spanType: \"tool_call\",\n name: event.tool,\n inputTokens: 0,\n outputTokens: 0,\n cacheReadTokens: 0,\n cacheWriteTokens: 0,\n costCentsX100: 0,\n durationMs: pending ? Date.now() - pending.startMs : 0,\n status: isError ? \"error\" : \"success\",\n errorMessage: isError ? event.result : null,\n metadata:\n config.captureToolArgs && pending\n ? // Strip Authorization/api-key/token-shaped values before\n // persisting (M14 in the MCP/A2A audit). Tool-runtime\n // execution still sees the unredacted input — only the\n // long-lived span row is sanitized.\n {\n input: redactSensitiveFields(pending.input) as Record<\n string,\n string\n >,\n }\n : null,\n createdAt: Date.now(),\n };\n spans.push(span);\n }\n } catch {}\n\n loopOpts.send(event);\n };\n\n let usage: AgentLoopUsage | undefined;\n let runStatus: \"success\" | \"error\" = \"success\";\n let errorMessage: string | null = null;\n try {\n usage = await runAgentLoop({ ...loopOpts, send: instrumentedSend });\n } catch (err: any) {\n runStatus = \"error\";\n errorMessage = err?.message ?? String(err);\n throw err;\n } finally {\n const runEnd = Date.now();\n const totalDurationMs = runEnd - runStart;\n\n let costCentsX100 = 0;\n try {\n const { calculateCost } = await import(\"../usage/store.js\");\n if (usage) {\n costCentsX100 = calculateCost(\n usage.inputTokens,\n usage.outputTokens,\n usage.model,\n usage.cacheReadTokens,\n usage.cacheWriteTokens,\n );\n }\n } catch {}\n\n let llmCallCount = 0;\n if (usage) {\n llmCallCount = 1;\n const llmSpan: TraceSpan = {\n id: spanId(),\n runId,\n threadId,\n userId,\n parentSpanId,\n spanType: \"llm_call\",\n name: usage.model,\n inputTokens: usage.inputTokens,\n outputTokens: usage.outputTokens,\n cacheReadTokens: usage.cacheReadTokens,\n cacheWriteTokens: usage.cacheWriteTokens,\n costCentsX100,\n durationMs: totalDurationMs,\n status: runStatus,\n errorMessage,\n metadata: null,\n createdAt: runStart,\n };\n spans.push(llmSpan);\n }\n\n const parentSpan: TraceSpan = {\n id: parentSpanId,\n runId,\n threadId,\n userId,\n parentSpanId: null,\n spanType: \"agent_run\",\n name: \"agent_run\",\n inputTokens: usage?.inputTokens ?? 0,\n outputTokens: usage?.outputTokens ?? 0,\n cacheReadTokens: usage?.cacheReadTokens ?? 0,\n cacheWriteTokens: usage?.cacheWriteTokens ?? 0,\n costCentsX100,\n durationMs: totalDurationMs,\n status: runStatus,\n errorMessage,\n metadata: null,\n createdAt: runStart,\n };\n spans.push(parentSpan);\n\n const summary: TraceSummary = {\n runId,\n threadId,\n userId,\n totalSpans: spans.length,\n llmCalls: llmCallCount,\n toolCalls: toolCallCount,\n successfulTools,\n failedTools,\n totalDurationMs,\n totalCostCentsX100: costCentsX100,\n totalInputTokens: usage?.inputTokens ?? 0,\n totalOutputTokens: usage?.outputTokens ?? 0,\n model: usage?.model ?? loopOpts.model,\n createdAt: runStart,\n };\n\n writeTraceData(spans, summary, runId, config).catch(() => {});\n\n // OpenTelemetry export (no-op unless a provider is registered). Emit a\n // self-contained `llm.call` span carrying model + token usage, end any\n // tool spans still open (loop threw mid-tool), and end the run span. Awaited\n // so the spans are emitted before the function returns; cheap when no-op.\n try {\n if (usage) {\n endAgentSpan(await startAgentSpan(\"llm.call\", {}), {\n status: runStatus,\n errorMessage,\n attributes: {\n \"llm.model\": usage.model,\n \"llm.input_tokens\": usage.inputTokens,\n \"llm.output_tokens\": usage.outputTokens,\n \"llm.cache_read_tokens\": usage.cacheReadTokens,\n \"llm.cache_write_tokens\": usage.cacheWriteTokens,\n \"llm.cost_cents_x100\": costCentsX100,\n },\n });\n }\n for (const toolSpan of openOtelToolSpans) {\n endAgentSpan(toolSpan, {\n status: \"error\",\n errorMessage: \"Agent run ended before tool_done.\",\n });\n }\n openOtelToolSpans.clear();\n endAgentSpan(await otelRunSpanPromise, {\n status: runStatus,\n errorMessage,\n attributes: {\n \"agent.tool_calls\": toolCallCount,\n \"agent.successful_tools\": successfulTools,\n \"agent.failed_tools\": failedTools,\n \"agent.duration_ms\": totalDurationMs,\n \"agent.input_tokens\": usage?.inputTokens ?? 0,\n \"agent.output_tokens\": usage?.outputTokens ?? 0,\n \"agent.cost_cents_x100\": costCentsX100,\n },\n });\n } catch {\n // OTel export must never break the run.\n }\n }\n\n return usage!;\n}\n\nasync function writeTraceData(\n spans: TraceSpan[],\n summary: TraceSummary,\n runId: string,\n config: ObservabilityConfig,\n): Promise<void> {\n const { insertTraceSpan, upsertTraceSummary } = await import(\"./store.js\");\n await Promise.all(spans.map((s) => insertTraceSpan(s).catch(() => {})));\n await upsertTraceSummary(summary).catch(() => {});\n\n // Fire automated evals after trace data is persisted\n try {\n const { evaluateRun } = await import(\"./evals.js\");\n await evaluateRun(runId, { sampleRate: config.evalSampleRate });\n } catch {}\n}\n"]}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Optional OpenTelemetry layer for the agent loop.
|
|
3
|
+
*
|
|
4
|
+
* The framework's primary trace store is the in-house `agent_trace_spans` /
|
|
5
|
+
* `agent_trace_summaries` tables (see `traces.ts`). This module adds an
|
|
6
|
+
* OPTIONAL, no-op-unless-configured OpenTelemetry export on top of that, so a
|
|
7
|
+
* host that already runs an OTel collector can see agent runs, model calls, and
|
|
8
|
+
* tool calls alongside the rest of their distributed traces.
|
|
9
|
+
*
|
|
10
|
+
* Design constraints:
|
|
11
|
+
* - `@opentelemetry/api` is an OPTIONAL dependency. If it isn't installed the
|
|
12
|
+
* helpers degrade to silent no-ops — nothing here ever throws into the agent
|
|
13
|
+
* loop.
|
|
14
|
+
* - The API package ships a default NO-OP tracer. Until a host registers a
|
|
15
|
+
* real `TracerProvider` (via `@opentelemetry/sdk-node` or similar, which
|
|
16
|
+
* core deliberately does NOT depend on), `tracer.startSpan(...)` returns a
|
|
17
|
+
* no-op span and the cost is a couple of property reads. We never register a
|
|
18
|
+
* provider ourselves — instrumentation is opt-in by the embedding app.
|
|
19
|
+
* - Heavy SDK packages (`@opentelemetry/sdk-*`, exporters) are NOT added to
|
|
20
|
+
* core. The host owns the provider/exporter wiring; core only emits spans.
|
|
21
|
+
*/
|
|
22
|
+
/**
|
|
23
|
+
* Minimal structural subset of the OpenTelemetry `Span` we use. Declared
|
|
24
|
+
* locally so this module type-checks even when `@opentelemetry/api` isn't
|
|
25
|
+
* installed (it's an optional dependency).
|
|
26
|
+
*/
|
|
27
|
+
export interface AgentSpan {
|
|
28
|
+
setAttribute(key: string, value: string | number | boolean): void;
|
|
29
|
+
setAttributes(attributes: Record<string, string | number | boolean>): void;
|
|
30
|
+
/** OTel `SpanStatusCode`: 1 = OK, 2 = ERROR. */
|
|
31
|
+
setStatus(status: {
|
|
32
|
+
code: number;
|
|
33
|
+
message?: string;
|
|
34
|
+
}): void;
|
|
35
|
+
recordException(exception: {
|
|
36
|
+
name?: string;
|
|
37
|
+
message: string;
|
|
38
|
+
}): void;
|
|
39
|
+
end(): void;
|
|
40
|
+
}
|
|
41
|
+
/** OTel `SpanStatusCode` values, inlined so we don't need the api types here. */
|
|
42
|
+
export declare const SPAN_STATUS_OK = 1;
|
|
43
|
+
export declare const SPAN_STATUS_ERROR = 2;
|
|
44
|
+
interface AgentTracer {
|
|
45
|
+
startSpan(name: string, options?: {
|
|
46
|
+
attributes?: Record<string, string | number | boolean>;
|
|
47
|
+
}): AgentSpan;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Start a span. When OTel isn't installed (or no provider is registered) this
|
|
51
|
+
* returns `null` and the caller simply skips span bookkeeping — there is no
|
|
52
|
+
* runtime cost beyond the cached null check.
|
|
53
|
+
*/
|
|
54
|
+
export declare function startAgentSpan(name: string, attributes?: Record<string, string | number | boolean | null | undefined>): Promise<AgentSpan | null>;
|
|
55
|
+
/**
|
|
56
|
+
* Finish a span, setting OK/ERROR status and recording the error message when
|
|
57
|
+
* present. Safe to call with `null` (no-op) and never throws.
|
|
58
|
+
*/
|
|
59
|
+
export declare function endAgentSpan(span: AgentSpan | null, result?: {
|
|
60
|
+
status?: "success" | "error";
|
|
61
|
+
errorMessage?: string | null;
|
|
62
|
+
attributes?: Record<string, string | number | boolean | null | undefined>;
|
|
63
|
+
}): void;
|
|
64
|
+
/** For tests — reset the cached tracer so a fresh provider can be detected. */
|
|
65
|
+
export declare function __resetAgentTracerCache(): void;
|
|
66
|
+
/**
|
|
67
|
+
* For tests — inject a tracer directly (e.g. an in-memory test provider's
|
|
68
|
+
* tracer) without going through the `@opentelemetry/api` global. Pass `null`
|
|
69
|
+
* to simulate "no tracer available".
|
|
70
|
+
*/
|
|
71
|
+
export declare function __setAgentTracerForTests(tracer: AgentTracer | null): void;
|
|
72
|
+
export {};
|
|
73
|
+
//# sourceMappingURL=tracing.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tracing.d.ts","sourceRoot":"","sources":["../../src/observability/tracing.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAIH;;;;GAIG;AACH,MAAM,WAAW,SAAS;IACxB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,IAAI,CAAC;IAClE,aAAa,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC;IAC3E,gDAAgD;IAChD,SAAS,CAAC,MAAM,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IAC5D,eAAe,CAAC,SAAS,EAAE;QAAE,IAAI,CAAC,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IACrE,GAAG,IAAI,IAAI,CAAC;CACb;AAED,iFAAiF;AACjF,eAAO,MAAM,cAAc,IAAI,CAAC;AAChC,eAAO,MAAM,iBAAiB,IAAI,CAAC;AAQnC,UAAU,WAAW;IACnB,SAAS,CACP,IAAI,EAAE,MAAM,EACZ,OAAO,CAAC,EAAE;QAAE,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC,CAAA;KAAE,GACnE,SAAS,CAAC;CACd;AAgCD;;;;GAIG;AACH,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EACZ,UAAU,GAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,IAAI,GAAG,SAAS,CAAM,GAC5E,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,CAU3B;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAC1B,IAAI,EAAE,SAAS,GAAG,IAAI,EACtB,MAAM,GAAE;IACN,MAAM,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC;IAC7B,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,IAAI,GAAG,SAAS,CAAC,CAAC;CACtE,GACL,IAAI,CA0BN;AAED,+EAA+E;AAC/E,wBAAgB,uBAAuB,IAAI,IAAI,CAE9C;AAED;;;;GAIG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,WAAW,GAAG,IAAI,GAAG,IAAI,CAEzE"}
|