veryfront 0.1.521 → 0.1.523
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/cli/templates/manifest.d.ts +405 -405
- package/esm/cli/templates/manifest.js +454 -454
- package/esm/deno.d.ts +2 -7
- package/esm/deno.js +4 -16
- package/esm/extensions/{ext-tracing-opentelemetry → ext-observability-opentelemetry}/src/index.d.ts +3 -3
- package/esm/extensions/ext-observability-opentelemetry/src/index.d.ts.map +1 -0
- package/esm/extensions/{ext-tracing-opentelemetry → ext-observability-opentelemetry}/src/index.js +10 -10
- package/esm/src/agent/service/config.d.ts.map +1 -1
- package/esm/src/agent/service/config.js +2 -0
- package/esm/src/agent/service/node-telemetry.d.ts +1 -1
- package/esm/src/agent/service/node-telemetry.d.ts.map +1 -1
- package/esm/src/agent/service/node-telemetry.js +1 -1
- package/esm/src/agent/testing/index.d.ts +1 -1
- package/esm/src/agent/testing/index.d.ts.map +1 -1
- package/esm/src/agent/testing/index.js +1 -1
- package/esm/src/agent/testing/live-evals/index.d.ts +2 -1
- package/esm/src/agent/testing/live-evals/index.d.ts.map +1 -1
- package/esm/src/agent/testing/live-evals/index.js +2 -1
- package/esm/src/agent/testing/live-evals/request.d.ts +16 -17
- package/esm/src/agent/testing/live-evals/request.d.ts.map +1 -1
- package/esm/src/agent/testing/live-evals/runner.d.ts +124 -0
- package/esm/src/agent/testing/live-evals/runner.d.ts.map +1 -0
- package/esm/src/agent/testing/live-evals/runner.js +391 -0
- package/esm/src/agent/veryfront-cloud-agent-service.js +2 -2
- package/esm/src/extensions/{tracing → observability}/index.d.ts +2 -2
- package/esm/src/extensions/observability/index.d.ts.map +1 -0
- package/esm/src/extensions/{tracing → observability}/index.js +2 -2
- package/esm/src/extensions/{tracing → observability}/node-telemetry-provider.d.ts +2 -2
- package/esm/src/extensions/observability/node-telemetry-provider.d.ts.map +1 -0
- package/esm/src/extensions/{tracing → observability}/node-telemetry-provider.js +2 -2
- package/esm/src/extensions/{tracing → observability}/tracing-exporter.d.ts +2 -2
- package/esm/src/extensions/observability/tracing-exporter.d.ts.map +1 -0
- package/esm/src/extensions/observability/tracing-exporter.js +8 -0
- package/esm/src/extensions/recommendations.js +2 -2
- package/esm/src/observability/metrics/manager.js +1 -1
- package/esm/src/observability/simple-metrics/otel-instruments.js +1 -1
- package/esm/src/observability/tracing/api-shim.d.ts +4 -4
- package/esm/src/observability/tracing/api-shim.js +7 -7
- package/esm/src/observability/tracing/manager.js +2 -2
- package/esm/src/observability/tracing/otlp-setup.d.ts +1 -1
- package/esm/src/observability/tracing/otlp-setup.js +4 -4
- package/esm/src/proxy/tracing.d.ts +1 -1
- package/esm/src/proxy/tracing.js +2 -2
- package/esm/src/react/components/chat/theme.d.ts.map +1 -1
- package/esm/src/react/components/chat/theme.js +4 -2
- package/esm/src/server/dev-ui/manifest.d.ts +17 -17
- package/esm/src/server/dev-ui/manifest.js +17 -17
- package/esm/src/server/handlers/dev/framework-candidates.generated.d.ts.map +1 -1
- package/esm/src/server/handlers/dev/framework-candidates.generated.js +4 -1
- package/esm/src/utils/version-constant.d.ts +1 -1
- package/esm/src/utils/version-constant.js +1 -1
- package/package.json +4 -4
- package/src/cli/templates/manifest.js +454 -454
- package/src/deno.js +4 -16
- package/src/extensions/{ext-tracing-opentelemetry → ext-observability-opentelemetry}/src/index.ts +12 -12
- package/src/src/agent/service/config.ts +2 -0
- package/src/src/agent/service/node-telemetry.ts +1 -1
- package/src/src/agent/testing/index.ts +12 -0
- package/src/src/agent/testing/live-evals/index.ts +18 -1
- package/src/src/agent/testing/live-evals/request.ts +19 -1
- package/src/src/agent/testing/live-evals/runner.ts +629 -0
- package/src/src/agent/veryfront-cloud-agent-service.ts +2 -2
- package/src/src/extensions/{tracing → observability}/index.ts +2 -2
- package/src/src/extensions/{tracing → observability}/node-telemetry-provider.ts +2 -2
- package/src/src/extensions/{tracing → observability}/tracing-exporter.ts +2 -2
- package/src/src/extensions/recommendations.ts +2 -2
- package/src/src/observability/metrics/manager.ts +1 -1
- package/src/src/observability/simple-metrics/otel-instruments.ts +1 -1
- package/src/src/observability/tracing/api-shim.ts +7 -7
- package/src/src/observability/tracing/manager.ts +2 -2
- package/src/src/observability/tracing/otlp-setup.ts +4 -4
- package/src/src/proxy/tracing.ts +2 -2
- package/src/src/react/components/chat/theme.ts +4 -2
- package/src/src/server/bootstrap.ts +1 -1
- package/src/src/server/dev-ui/manifest.js +17 -17
- package/src/src/server/handlers/dev/framework-candidates.generated.ts +4 -1
- package/src/src/utils/version-constant.ts +1 -1
- package/esm/extensions/ext-tracing-opentelemetry/src/index.d.ts.map +0 -1
- package/esm/src/extensions/tracing/index.d.ts.map +0 -1
- package/esm/src/extensions/tracing/node-telemetry-provider.d.ts.map +0 -1
- package/esm/src/extensions/tracing/tracing-exporter.d.ts.map +0 -1
- package/esm/src/extensions/tracing/tracing-exporter.js +0 -8
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
import * as dntShim from "../../../../_dnt.shims.js";
|
|
2
|
+
import { agUiSseEventTypes, buildAgUiSseTraceSignature as buildTraceSignature, getAgUiSseStringField as getStringField, parseAgUiSseResponse as parseSseResponse, } from "../../index.js";
|
|
3
|
+
import { buildFailureSuffix, buildProgressLine, containsOrderedSubsequence } from "./formatting.js";
|
|
4
|
+
import { buildLiveEvalRequestBody } from "./request.js";
|
|
5
|
+
import { createFailedEvalResult, createPassedEvalResult, createSkippedEvalResult, } from "./result.js";
|
|
6
|
+
function resolveFetch(config) {
|
|
7
|
+
return config.fetch ?? fetch;
|
|
8
|
+
}
|
|
9
|
+
function createLiveEvalJudgeSupport(config) {
|
|
10
|
+
async function judgeLlm(input) {
|
|
11
|
+
try {
|
|
12
|
+
const body = buildLiveEvalRequestBody({
|
|
13
|
+
testCaseId: "llm-judge",
|
|
14
|
+
prompt: `You are an eval judge. Grade the following answer.
|
|
15
|
+
|
|
16
|
+
QUESTION: ${input.question}
|
|
17
|
+
|
|
18
|
+
ANSWER: ${input.answer}
|
|
19
|
+
|
|
20
|
+
CRITERIA: ${input.criteria}
|
|
21
|
+
|
|
22
|
+
Respond with exactly one line: PASS or FAIL followed by a brief reason.
|
|
23
|
+
Example: "PASS — correctly explains the pattern with accurate details"
|
|
24
|
+
Example: "FAIL — mentions the wrong file convention"`,
|
|
25
|
+
projectId: null,
|
|
26
|
+
allowedTools: [],
|
|
27
|
+
forceRuntimeOverrides: true,
|
|
28
|
+
maxSteps: 2,
|
|
29
|
+
});
|
|
30
|
+
const response = await resolveFetch(config)(config.endpoint, {
|
|
31
|
+
method: "POST",
|
|
32
|
+
headers: {
|
|
33
|
+
"Content-Type": "application/json",
|
|
34
|
+
Authorization: `Bearer ${config.authToken}`,
|
|
35
|
+
},
|
|
36
|
+
body: JSON.stringify(body),
|
|
37
|
+
signal: AbortSignal.timeout(30_000),
|
|
38
|
+
});
|
|
39
|
+
const run = await parseSseResponse(response);
|
|
40
|
+
if (run.responseStatus !== 200) {
|
|
41
|
+
return { pass: false, reason: `judge returned HTTP ${run.responseStatus}` };
|
|
42
|
+
}
|
|
43
|
+
const line = run.text
|
|
44
|
+
.split("\n")
|
|
45
|
+
.map((value) => value.trim())
|
|
46
|
+
.find((value) => value.length > 0) ?? "";
|
|
47
|
+
if (line.toUpperCase().startsWith("PASS")) {
|
|
48
|
+
return { pass: true, reason: line };
|
|
49
|
+
}
|
|
50
|
+
return { pass: false, reason: line || "judge returned no decision" };
|
|
51
|
+
}
|
|
52
|
+
catch (error) {
|
|
53
|
+
return {
|
|
54
|
+
pass: false,
|
|
55
|
+
reason: error instanceof Error ? error.message : String(error),
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
function withJudge(structuralVerify, judgeInput) {
|
|
60
|
+
return async (run) => {
|
|
61
|
+
const structuralFailure = structuralVerify(run);
|
|
62
|
+
if (structuralFailure) {
|
|
63
|
+
return structuralFailure;
|
|
64
|
+
}
|
|
65
|
+
if (!config.enableLlmJudge) {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
const judgment = await judgeLlm({
|
|
69
|
+
question: judgeInput.question,
|
|
70
|
+
answer: run.text,
|
|
71
|
+
criteria: judgeInput.criteria,
|
|
72
|
+
});
|
|
73
|
+
return judgment.pass ? null : `LLM judge: ${judgment.reason}`;
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
return {
|
|
77
|
+
judgeLlm,
|
|
78
|
+
withJudge,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
function createInitialProgressSnapshot() {
|
|
82
|
+
return {
|
|
83
|
+
eventCount: 0,
|
|
84
|
+
lastEventType: null,
|
|
85
|
+
lastToolCallName: null,
|
|
86
|
+
toolStarts: [],
|
|
87
|
+
textLength: 0,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
function isUnrefableTimer(value) {
|
|
91
|
+
return typeof value === "object" && value !== null && "unref" in value &&
|
|
92
|
+
typeof value.unref === "function";
|
|
93
|
+
}
|
|
94
|
+
function maybeUnrefTimer(timer) {
|
|
95
|
+
if (isUnrefableTimer(timer)) {
|
|
96
|
+
timer.unref();
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
function createLiveEvalProgressReporter(input) {
|
|
100
|
+
let latestProgress = createInitialProgressSnapshot();
|
|
101
|
+
const progressTimer = dntShim.setInterval(() => {
|
|
102
|
+
input.log(buildProgressLine({
|
|
103
|
+
caseId: input.caseId,
|
|
104
|
+
startedAt: input.startedAt,
|
|
105
|
+
progress: latestProgress,
|
|
106
|
+
}));
|
|
107
|
+
}, input.intervalMs);
|
|
108
|
+
maybeUnrefTimer(progressTimer);
|
|
109
|
+
return {
|
|
110
|
+
stop: () => {
|
|
111
|
+
clearInterval(progressTimer);
|
|
112
|
+
},
|
|
113
|
+
update: (snapshot) => {
|
|
114
|
+
latestProgress = snapshot;
|
|
115
|
+
},
|
|
116
|
+
getSnapshot: () => latestProgress,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
function collectPreparedArtifactPaths(prepared) {
|
|
120
|
+
if (!prepared?.metadata) {
|
|
121
|
+
return [];
|
|
122
|
+
}
|
|
123
|
+
return [
|
|
124
|
+
...new Set(Object.entries(prepared.metadata)
|
|
125
|
+
.filter(([key, value]) => key.toLowerCase().includes("path") && value.length > 0)
|
|
126
|
+
.map(([, value]) => value)),
|
|
127
|
+
].sort();
|
|
128
|
+
}
|
|
129
|
+
function extractPreparedConversationId(prepared) {
|
|
130
|
+
return typeof prepared?.metadata?.conversationId === "string" &&
|
|
131
|
+
prepared.metadata.conversationId.length > 0
|
|
132
|
+
? prepared.metadata.conversationId
|
|
133
|
+
: null;
|
|
134
|
+
}
|
|
135
|
+
function createLiveEvalRunArtifacts(input) {
|
|
136
|
+
return {
|
|
137
|
+
...(input.runId ? { runId: input.runId } : {}),
|
|
138
|
+
traceSignature: input.traceSignature,
|
|
139
|
+
toolStarts: input.run.toolStarts,
|
|
140
|
+
toolArgsPreview: input.run.toolArgs.join(" | ").slice(0, 1000),
|
|
141
|
+
textPreview: input.run.text.slice(0, 280),
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
function createFailedRunEvalResult(input) {
|
|
145
|
+
return createFailedEvalResult({
|
|
146
|
+
id: input.context.id,
|
|
147
|
+
label: input.context.label,
|
|
148
|
+
runtime: input.context.runtime,
|
|
149
|
+
details: input.details,
|
|
150
|
+
startedAt: input.context.startedAt,
|
|
151
|
+
...(input.context.conversationId ? { conversationId: input.context.conversationId } : {}),
|
|
152
|
+
...(input.runArtifacts.runId ? { runId: input.runArtifacts.runId } : {}),
|
|
153
|
+
...(input.context.artifactPaths?.length ? { artifactPaths: input.context.artifactPaths } : {}),
|
|
154
|
+
traceSignature: input.runArtifacts.traceSignature,
|
|
155
|
+
toolStarts: input.runArtifacts.toolStarts,
|
|
156
|
+
toolArgsPreview: input.runArtifacts.toolArgsPreview,
|
|
157
|
+
textPreview: input.runArtifacts.textPreview,
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
function createPassedRunEvalResult(input) {
|
|
161
|
+
return createPassedEvalResult({
|
|
162
|
+
id: input.context.id,
|
|
163
|
+
label: input.context.label,
|
|
164
|
+
runtime: input.context.runtime,
|
|
165
|
+
details: input.details,
|
|
166
|
+
startedAt: input.context.startedAt,
|
|
167
|
+
...(input.context.conversationId ? { conversationId: input.context.conversationId } : {}),
|
|
168
|
+
...(input.runArtifacts.runId ? { runId: input.runArtifacts.runId } : {}),
|
|
169
|
+
...(input.context.artifactPaths?.length ? { artifactPaths: input.context.artifactPaths } : {}),
|
|
170
|
+
traceSignature: input.runArtifacts.traceSignature,
|
|
171
|
+
toolStarts: input.runArtifacts.toolStarts,
|
|
172
|
+
toolArgsPreview: input.runArtifacts.toolArgsPreview,
|
|
173
|
+
textPreview: input.runArtifacts.textPreview,
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
function createStreamingFailureEvalResult(input) {
|
|
177
|
+
return createFailedEvalResult({
|
|
178
|
+
id: input.context.id,
|
|
179
|
+
label: input.context.label,
|
|
180
|
+
runtime: input.context.runtime,
|
|
181
|
+
details: `${input.details}${buildFailureSuffix(input.progress)}`,
|
|
182
|
+
startedAt: input.context.startedAt,
|
|
183
|
+
...(input.context.conversationId ? { conversationId: input.context.conversationId } : {}),
|
|
184
|
+
...(input.context.artifactPaths?.length ? { artifactPaths: input.context.artifactPaths } : {}),
|
|
185
|
+
toolStarts: input.progress.toolStarts,
|
|
186
|
+
textPreview: input.progress.textLength > 0
|
|
187
|
+
? `${input.progress.textLength} characters streamed`
|
|
188
|
+
: undefined,
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
function createLiveEvalResultContext(input) {
|
|
192
|
+
return {
|
|
193
|
+
id: input.testCase.id,
|
|
194
|
+
label: input.testCase.label,
|
|
195
|
+
runtime: input.runtime,
|
|
196
|
+
startedAt: input.startedAt,
|
|
197
|
+
...(input.conversationId ? { conversationId: input.conversationId } : {}),
|
|
198
|
+
...(input.artifactPaths.length > 0 ? { artifactPaths: input.artifactPaths } : {}),
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
function buildLiveEvalRunBody(input) {
|
|
202
|
+
const customBody = typeof input.prepared?.metadata?.customBody === "string"
|
|
203
|
+
? input.prepared.metadata.customBody
|
|
204
|
+
: null;
|
|
205
|
+
if (customBody) {
|
|
206
|
+
return JSON.parse(customBody);
|
|
207
|
+
}
|
|
208
|
+
return buildLiveEvalRequestBody({
|
|
209
|
+
testCaseId: input.testCase.id,
|
|
210
|
+
prompt: input.prepared?.prompt ?? input.testCase.prompt ?? "",
|
|
211
|
+
metadata: input.prepared?.metadata,
|
|
212
|
+
projectId: input.config.projectId && input.testCase.requireProject
|
|
213
|
+
? input.config.projectId
|
|
214
|
+
: null,
|
|
215
|
+
...(input.config.branchId ? { branchId: input.config.branchId } : {}),
|
|
216
|
+
...(input.config.model ? { model: input.config.model } : {}),
|
|
217
|
+
...(input.conversationId ? { conversationId: input.conversationId } : {}),
|
|
218
|
+
allowedTools: input.testCase.allowedTools,
|
|
219
|
+
forceRuntimeOverrides: input.testCase.forceRuntimeOverrides,
|
|
220
|
+
maxSteps: input.testCase.maxSteps,
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
async function resolveCompletedLiveEvalRun(input) {
|
|
224
|
+
const traceSignature = buildTraceSignature(input.run.eventTypes);
|
|
225
|
+
const runArtifacts = createLiveEvalRunArtifacts({
|
|
226
|
+
run: input.run,
|
|
227
|
+
runId: input.runId,
|
|
228
|
+
traceSignature,
|
|
229
|
+
});
|
|
230
|
+
const failure = await input.testCase.verify(input.run, input.prepared);
|
|
231
|
+
if (!failure && input.testCase.expectedEventSubsequence) {
|
|
232
|
+
if (!containsOrderedSubsequence(input.run.eventTypes, input.testCase.expectedEventSubsequence)) {
|
|
233
|
+
return createFailedRunEvalResult({
|
|
234
|
+
context: input.context,
|
|
235
|
+
details: `Expected AG-UI event subsequence ${input.testCase.expectedEventSubsequence.join(" -> ")}, got ${traceSignature}`,
|
|
236
|
+
runArtifacts,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
if (failure) {
|
|
241
|
+
return createFailedRunEvalResult({
|
|
242
|
+
context: input.context,
|
|
243
|
+
details: failure,
|
|
244
|
+
runArtifacts,
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
return createPassedRunEvalResult({
|
|
248
|
+
context: input.context,
|
|
249
|
+
details: `OK: ${input.run.toolStarts.join(", ") || "no tools"} | ${input.run.text.slice(0, 140) || "no text"}`,
|
|
250
|
+
runArtifacts,
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
function extractRunId(run) {
|
|
254
|
+
for (const event of run.events) {
|
|
255
|
+
const runId = getStringField(event, "runId") ?? getStringField(event, "run_id");
|
|
256
|
+
if (runId) {
|
|
257
|
+
return runId;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
return null;
|
|
261
|
+
}
|
|
262
|
+
export function hasFinished(run) {
|
|
263
|
+
return run.eventTypes.includes(agUiSseEventTypes.runFinished) && !run.runError;
|
|
264
|
+
}
|
|
265
|
+
export function containsSkillLoad(run, skillId) {
|
|
266
|
+
return run.toolStarts.includes("load_skill") && run.toolArgs.join("").includes(skillId);
|
|
267
|
+
}
|
|
268
|
+
export function countStepStartedEvents(run) {
|
|
269
|
+
return run.eventTypes.filter((eventType) => eventType === agUiSseEventTypes.stepStarted).length;
|
|
270
|
+
}
|
|
271
|
+
export function createLiveEvalCaseSupport(config) {
|
|
272
|
+
const fetchImpl = resolveFetch(config);
|
|
273
|
+
const log = config.log ?? console.log;
|
|
274
|
+
const { judgeLlm, withJudge } = createLiveEvalJudgeSupport(config);
|
|
275
|
+
async function verifyFileExists(input) {
|
|
276
|
+
if (!config.projectId || !config.readProjectFile) {
|
|
277
|
+
return null;
|
|
278
|
+
}
|
|
279
|
+
const file = await config.readProjectFile({
|
|
280
|
+
filePath: input.filePath,
|
|
281
|
+
requestTimeoutMs: config.requestTimeoutMs,
|
|
282
|
+
});
|
|
283
|
+
if (!file) {
|
|
284
|
+
return `${input.description ?? input.filePath}: file not found in project after task completed`;
|
|
285
|
+
}
|
|
286
|
+
if (!file.content || file.content.trim().length === 0) {
|
|
287
|
+
return `${input.description ?? input.filePath}: file exists but is empty`;
|
|
288
|
+
}
|
|
289
|
+
if (input.requiredContent) {
|
|
290
|
+
const missing = input.requiredContent.filter((keyword) => !file.content.toLowerCase().includes(keyword.toLowerCase()));
|
|
291
|
+
if (missing.length > 0) {
|
|
292
|
+
return `${input.description ?? input.filePath}: missing required content: ${missing.join(", ")}. Got: ${file.content.slice(0, 200)}`;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
return null;
|
|
296
|
+
}
|
|
297
|
+
async function runEval(testCase, runtime) {
|
|
298
|
+
const startedAt = Date.now();
|
|
299
|
+
if (testCase.requireProject && !config.projectId) {
|
|
300
|
+
return createSkippedEvalResult({
|
|
301
|
+
id: testCase.id,
|
|
302
|
+
label: testCase.label,
|
|
303
|
+
runtime,
|
|
304
|
+
details: "Skipped because AG_UI_EVAL_PROJECT_ID is not set.",
|
|
305
|
+
startedAt,
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
const prepared = testCase.prepare
|
|
309
|
+
? await testCase.prepare({
|
|
310
|
+
apiUrl: config.apiUrl,
|
|
311
|
+
authToken: config.authToken,
|
|
312
|
+
projectId: config.projectId,
|
|
313
|
+
})
|
|
314
|
+
: null;
|
|
315
|
+
const preparedConversationId = extractPreparedConversationId(prepared);
|
|
316
|
+
const preparedArtifactPaths = collectPreparedArtifactPaths(prepared);
|
|
317
|
+
const resultContext = createLiveEvalResultContext({
|
|
318
|
+
testCase,
|
|
319
|
+
runtime,
|
|
320
|
+
startedAt,
|
|
321
|
+
conversationId: preparedConversationId,
|
|
322
|
+
artifactPaths: preparedArtifactPaths,
|
|
323
|
+
});
|
|
324
|
+
try {
|
|
325
|
+
const sidecarCleanup = prepared?.startSidecar ? await prepared.startSidecar() : undefined;
|
|
326
|
+
const progressReporter = createLiveEvalProgressReporter({
|
|
327
|
+
caseId: testCase.id,
|
|
328
|
+
startedAt,
|
|
329
|
+
intervalMs: config.progressLogIntervalMs,
|
|
330
|
+
log,
|
|
331
|
+
});
|
|
332
|
+
const body = buildLiveEvalRunBody({
|
|
333
|
+
config,
|
|
334
|
+
testCase,
|
|
335
|
+
prepared,
|
|
336
|
+
conversationId: preparedConversationId,
|
|
337
|
+
});
|
|
338
|
+
try {
|
|
339
|
+
const response = await fetchImpl(config.endpoint, {
|
|
340
|
+
method: "POST",
|
|
341
|
+
headers: {
|
|
342
|
+
"Content-Type": "application/json",
|
|
343
|
+
Authorization: `Bearer ${config.authToken}`,
|
|
344
|
+
},
|
|
345
|
+
body: JSON.stringify(body),
|
|
346
|
+
signal: AbortSignal.timeout(config.requestTimeoutMs),
|
|
347
|
+
});
|
|
348
|
+
log(`[stream] ${runtime}:${testCase.id} HTTP ${response.status}`);
|
|
349
|
+
const run = await parseSseResponse(response, {
|
|
350
|
+
onProgress: progressReporter.update,
|
|
351
|
+
});
|
|
352
|
+
return resolveCompletedLiveEvalRun({
|
|
353
|
+
testCase,
|
|
354
|
+
run,
|
|
355
|
+
prepared,
|
|
356
|
+
context: resultContext,
|
|
357
|
+
runId: extractRunId(run) ?? undefined,
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
catch (error) {
|
|
361
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
362
|
+
return createStreamingFailureEvalResult({
|
|
363
|
+
context: resultContext,
|
|
364
|
+
details: message,
|
|
365
|
+
progress: progressReporter.getSnapshot(),
|
|
366
|
+
});
|
|
367
|
+
}
|
|
368
|
+
finally {
|
|
369
|
+
progressReporter.stop();
|
|
370
|
+
await sidecarCleanup?.();
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
finally {
|
|
374
|
+
await prepared?.cleanup?.();
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
return {
|
|
378
|
+
judgeLlm,
|
|
379
|
+
runEval,
|
|
380
|
+
verifyFileExists,
|
|
381
|
+
withJudge,
|
|
382
|
+
};
|
|
383
|
+
}
|
|
384
|
+
export const liveEvalRunnerInternals = {
|
|
385
|
+
collectPreparedArtifactPaths,
|
|
386
|
+
createFailedRunEvalResult,
|
|
387
|
+
createLiveEvalRunArtifacts,
|
|
388
|
+
createPassedRunEvalResult,
|
|
389
|
+
createStreamingFailureEvalResult,
|
|
390
|
+
extractRunId,
|
|
391
|
+
};
|
|
@@ -5,7 +5,7 @@ import { fileURLToPath } from "node:url";
|
|
|
5
5
|
import { createAgentServiceSandboxTools } from "../sandbox/index.js";
|
|
6
6
|
import { register, tryResolve } from "../extensions/contracts.js";
|
|
7
7
|
import { MISSING_EXTENSION_ERROR } from "../extensions/errors.js";
|
|
8
|
-
import { NodeTelemetryProviderName, } from "../extensions/
|
|
8
|
+
import { NodeTelemetryProviderName, } from "../extensions/observability/index.js";
|
|
9
9
|
import { SandboxShellToolsProviderName, } from "../extensions/sandbox/index.js";
|
|
10
10
|
import { createRemoteMCPToolSource, createToolsFromRemoteDefinitions, sleepTool, toolRegistry, } from "../tool/index.js";
|
|
11
11
|
import { parseProviderError } from "../chat/provider-errors.js";
|
|
@@ -171,7 +171,7 @@ async function ensureDefaultAuthProvider() {
|
|
|
171
171
|
async function ensureDefaultNodeTelemetryProvider() {
|
|
172
172
|
if (tryResolve(NodeTelemetryProviderName))
|
|
173
173
|
return;
|
|
174
|
-
const { OpenTelemetryNodeTelemetryProvider } = await import("../../extensions/ext-
|
|
174
|
+
const { OpenTelemetryNodeTelemetryProvider } = await import("../../extensions/ext-observability-opentelemetry/src/index.js");
|
|
175
175
|
register(NodeTelemetryProviderName, new OpenTelemetryNodeTelemetryProvider());
|
|
176
176
|
}
|
|
177
177
|
function resolveEnvironment(options) {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Observability category barrel: tracing and Node telemetry contracts.
|
|
3
3
|
*
|
|
4
|
-
* @module extensions/
|
|
4
|
+
* @module extensions/observability
|
|
5
5
|
*/
|
|
6
6
|
import "../../../_dnt.polyfills.js";
|
|
7
7
|
export type { SpanData, TracerProvider, TracingExporter } from "./tracing-exporter.js";
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/src/extensions/observability/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,OAAO,4BAA4B,CAAC;AAGpC,YAAY,EAAE,QAAQ,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACvF,YAAY,EACV,8BAA8B,EAC9B,kCAAkC,EAClC,mBAAmB,EACnB,0BAA0B,EAC1B,qBAAqB,GACtB,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,yBAAyB,EAAE,MAAM,8BAA8B,CAAC"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Observability category barrel: tracing and Node telemetry contracts.
|
|
3
3
|
*
|
|
4
|
-
* @module extensions/
|
|
4
|
+
* @module extensions/observability
|
|
5
5
|
*/
|
|
6
6
|
import "../../../_dnt.polyfills.js";
|
|
7
7
|
export { NodeTelemetryProviderName } from "./node-telemetry-provider.js";
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Contract interface for Node.js OpenTelemetry runtime bootstrap.
|
|
3
3
|
*
|
|
4
|
-
* Default implementation: `@veryfront/ext-
|
|
4
|
+
* Default implementation: `@veryfront/ext-observability-opentelemetry`
|
|
5
5
|
*
|
|
6
|
-
* @module extensions/
|
|
6
|
+
* @module extensions/observability/node-telemetry-provider
|
|
7
7
|
*/
|
|
8
8
|
export declare const NodeTelemetryProviderName = "NodeTelemetryProvider";
|
|
9
9
|
export type NodeTelemetryInstrumentationConfig = {
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"node-telemetry-provider.d.ts","sourceRoot":"","sources":["../../../../src/src/extensions/observability/node-telemetry-provider.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,eAAO,MAAM,yBAAyB,0BAA0B,CAAC;AAEjE,MAAM,MAAM,kCAAkC,GAAG;IAC/C,IAAI,EAAE,OAAO,CAAC;IACd,OAAO,EAAE,OAAO,CAAC;IACjB,EAAE,EAAE,OAAO,CAAC;CACb,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG;IAChC,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IAChE,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;CAClE,CAAC;AAEF,MAAM,MAAM,0BAA0B,GAAG;IACvC,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC;CACrE,CAAC;AAEF,MAAM,MAAM,8BAA8B,GAAG;IAC3C,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,MAAM,CAAC;IACvB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACzC,eAAe,EAAE,kCAAkC,CAAC;IACpD,MAAM,CAAC,EAAE,mBAAmB,CAAC;IAC7B,aAAa,CAAC,EAAE,0BAA0B,CAAC;CAC5C,CAAC;AAEF;;;;;GAKG;AACH,MAAM,WAAW,qBAAqB;IACpC,UAAU,CAAC,OAAO,EAAE,8BAA8B,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;CACvE"}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Contract interface for Node.js OpenTelemetry runtime bootstrap.
|
|
3
3
|
*
|
|
4
|
-
* Default implementation: `@veryfront/ext-
|
|
4
|
+
* Default implementation: `@veryfront/ext-observability-opentelemetry`
|
|
5
5
|
*
|
|
6
|
-
* @module extensions/
|
|
6
|
+
* @module extensions/observability/node-telemetry-provider
|
|
7
7
|
*/
|
|
8
8
|
export const NodeTelemetryProviderName = "NodeTelemetryProvider";
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Contract interface for tracing/telemetry exporters.
|
|
3
3
|
*
|
|
4
|
-
* Default implementation: `@veryfront/ext-
|
|
4
|
+
* Default implementation: `@veryfront/ext-observability-opentelemetry`
|
|
5
5
|
*
|
|
6
|
-
* @module extensions/
|
|
6
|
+
* @module extensions/observability/tracing-exporter
|
|
7
7
|
*/
|
|
8
8
|
/**
|
|
9
9
|
* Minimal TracerProvider interface for the contract.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tracing-exporter.d.ts","sourceRoot":"","sources":["../../../../src/src/extensions/observability/tracing-exporter.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH;;;GAGG;AACH,MAAM,WAAW,cAAc;IAC7B,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;CACpD;AAED,2CAA2C;AAC3C,MAAM,WAAW,QAAQ;IACvB,8BAA8B;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,6CAA6C;IAC7C,OAAO,EAAE,MAAM,CAAC;IAChB,sCAAsC;IACtC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,qCAAqC;IACrC,IAAI,EAAE,MAAM,CAAC;IACb,0EAA0E;IAC1E,IAAI,EAAE,QAAQ,GAAG,QAAQ,GAAG,UAAU,GAAG,UAAU,GAAG,UAAU,CAAC;IACjE,sDAAsD;IACtD,SAAS,EAAE,MAAM,CAAC;IAClB,oDAAoD;IACpD,OAAO,EAAE,MAAM,CAAC;IAChB,iDAAiD;IACjD,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC,CAAC;IACtD,0BAA0B;IAC1B,MAAM,EAAE;QAAE,IAAI,EAAE,IAAI,GAAG,OAAO,GAAG,OAAO,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CAC9D;AAED;;;;;;GAMG;AACH,MAAM,WAAW,eAAe;IAC9B;;;OAGG;IACH,KAAK,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAEtD,yCAAyC;IACzC,MAAM,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAEzC,gDAAgD;IAChD,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAE1B;;;OAGG;IACH,WAAW,IAAI,cAAc,CAAC;IAE9B;;;OAGG;IACH,aAAa,IAAI;QAAE,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,OAAO,CAAA;KAAE,GAAG,IAAI,CAAC;IAE1F;;;;OAIG;IACH,WAAW,CAAC,IAAI;QAAE,aAAa,IAAI,OAAO,CAAC;QAAC,OAAO,CAAC,GAAG,EAAE,OAAO,GAAG,OAAO,CAAA;KAAE,GAAG,IAAI,CAAC;CACrF"}
|
|
@@ -13,8 +13,8 @@ const recommendations = new Map([
|
|
|
13
13
|
["ContentProcessor", "@veryfront/ext-content-mdx"],
|
|
14
14
|
["DocumentExtractor", "@veryfront/ext-document-kreuzberg"],
|
|
15
15
|
["AuthProvider", "@veryfront/ext-auth-jwt"],
|
|
16
|
-
["TracingExporter", "@veryfront/ext-
|
|
17
|
-
["NodeTelemetryProvider", "@veryfront/ext-
|
|
16
|
+
["TracingExporter", "@veryfront/ext-observability-opentelemetry"],
|
|
17
|
+
["NodeTelemetryProvider", "@veryfront/ext-observability-opentelemetry"],
|
|
18
18
|
["LLMProvider:openai", "@veryfront/ext-llm-openai"],
|
|
19
19
|
["LLMProvider:anthropic", "@veryfront/ext-llm-anthropic"],
|
|
20
20
|
["LLMProvider:google", "@veryfront/ext-llm-google"],
|
|
@@ -68,7 +68,7 @@ export class MetricsManager {
|
|
|
68
68
|
return;
|
|
69
69
|
}
|
|
70
70
|
try {
|
|
71
|
-
// The metrics API is injected by ext-
|
|
71
|
+
// The metrics API is injected by ext-observability-opentelemetry via setGlobalMetricsAPI().
|
|
72
72
|
// When the extension is not active, metrics collection is disabled.
|
|
73
73
|
const metricsApi = getGlobalMetricsAPI();
|
|
74
74
|
if (!metricsApi) {
|
|
@@ -23,7 +23,7 @@ export async function ensureOtelInstruments() {
|
|
|
23
23
|
if (!isDeno)
|
|
24
24
|
return;
|
|
25
25
|
try {
|
|
26
|
-
// The metrics API is injected by ext-
|
|
26
|
+
// The metrics API is injected by ext-observability-opentelemetry via setGlobalMetricsAPI().
|
|
27
27
|
// When the extension is not active, the meter is unavailable and we return.
|
|
28
28
|
const metricsApi = getGlobalMetricsAPI();
|
|
29
29
|
if (!metricsApi)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Thin in-process shim for `@opentelemetry/api`.
|
|
3
3
|
*
|
|
4
4
|
* Core files that previously imported directly from `@opentelemetry/api` now
|
|
5
|
-
* import from this module. When the `ext-
|
|
5
|
+
* import from this module. When the `ext-observability-opentelemetry` extension is present
|
|
6
6
|
* the real SDK provider is wired in via `setGlobalTracerProvider`; otherwise
|
|
7
7
|
* every call falls back to a no-op implementation so the core boots without the
|
|
8
8
|
* extension installed.
|
|
@@ -128,7 +128,7 @@ export interface MetricsAPI {
|
|
|
128
128
|
}
|
|
129
129
|
/**
|
|
130
130
|
* Register the real OTel trace API's span accessors. Called by the
|
|
131
|
-
* ext-
|
|
131
|
+
* ext-observability-opentelemetry extension after it wires the SDK so that the shim's
|
|
132
132
|
* `trace.getActiveSpan()` / `trace.getSpan()` can return real spans.
|
|
133
133
|
*/
|
|
134
134
|
export declare function setGlobalActiveSpanAccessor(accessor: {
|
|
@@ -143,7 +143,7 @@ export declare function setGlobalTracerProvider(p: TracerProvider): void;
|
|
|
143
143
|
export declare function getGlobalTracerProvider(): TracerProvider;
|
|
144
144
|
/**
|
|
145
145
|
* Get a tracer from the active provider.
|
|
146
|
-
* Returns the no-op tracer when ext-
|
|
146
|
+
* Returns the no-op tracer when ext-observability-opentelemetry is not installed.
|
|
147
147
|
*/
|
|
148
148
|
export declare function getTracer(name: string, version?: string): Tracer;
|
|
149
149
|
export declare const context: {
|
|
@@ -168,7 +168,7 @@ export declare const defaultTextMapGetter: TextMapGetter<Record<string, string>>
|
|
|
168
168
|
export declare const defaultTextMapSetter: TextMapSetter<Record<string, string>>;
|
|
169
169
|
/**
|
|
170
170
|
* Register the OTel Metrics API (from the SDK).
|
|
171
|
-
* Called by ext-
|
|
171
|
+
* Called by ext-observability-opentelemetry in its setup hook so the metrics subsystem
|
|
172
172
|
* can use `getMeter()` when available.
|
|
173
173
|
*/
|
|
174
174
|
export declare function setGlobalMetricsAPI(api: MetricsAPI): void;
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Thin in-process shim for `@opentelemetry/api`.
|
|
3
3
|
*
|
|
4
4
|
* Core files that previously imported directly from `@opentelemetry/api` now
|
|
5
|
-
* import from this module. When the `ext-
|
|
5
|
+
* import from this module. When the `ext-observability-opentelemetry` extension is present
|
|
6
6
|
* the real SDK provider is wired in via `setGlobalTracerProvider`; otherwise
|
|
7
7
|
* every call falls back to a no-op implementation so the core boots without the
|
|
8
8
|
* extension installed.
|
|
@@ -34,7 +34,7 @@ export const SpanStatusCode = {
|
|
|
34
34
|
ERROR: 2,
|
|
35
35
|
};
|
|
36
36
|
// ---------------------------------------------------------------------------
|
|
37
|
-
// No-op provider (default when ext-
|
|
37
|
+
// No-op provider (default when ext-observability-opentelemetry is not installed)
|
|
38
38
|
// ---------------------------------------------------------------------------
|
|
39
39
|
function createNoopContext() {
|
|
40
40
|
return {
|
|
@@ -100,13 +100,13 @@ let _activeContext = NOOP_CONTEXT;
|
|
|
100
100
|
let _propagator = null;
|
|
101
101
|
/**
|
|
102
102
|
* Optional accessor for the currently active span. Wired by
|
|
103
|
-
* ext-
|
|
103
|
+
* ext-observability-opentelemetry (via `setGlobalActiveSpanAccessor`) so `trace.getActiveSpan()`
|
|
104
104
|
* and `trace.getSpan()` return the real SDK span once the extension is active.
|
|
105
105
|
*/
|
|
106
106
|
let _activeSpanAccessor = null;
|
|
107
107
|
/**
|
|
108
108
|
* Register the real OTel trace API's span accessors. Called by the
|
|
109
|
-
* ext-
|
|
109
|
+
* ext-observability-opentelemetry extension after it wires the SDK so that the shim's
|
|
110
110
|
* `trace.getActiveSpan()` / `trace.getSpan()` can return real spans.
|
|
111
111
|
*/
|
|
112
112
|
export function setGlobalActiveSpanAccessor(accessor) {
|
|
@@ -124,7 +124,7 @@ export function getGlobalTracerProvider() {
|
|
|
124
124
|
}
|
|
125
125
|
/**
|
|
126
126
|
* Get a tracer from the active provider.
|
|
127
|
-
* Returns the no-op tracer when ext-
|
|
127
|
+
* Returns the no-op tracer when ext-observability-opentelemetry is not installed.
|
|
128
128
|
*/
|
|
129
129
|
export function getTracer(name, version) {
|
|
130
130
|
return _provider.getTracer(name, version);
|
|
@@ -208,12 +208,12 @@ export const defaultTextMapSetter = {
|
|
|
208
208
|
},
|
|
209
209
|
};
|
|
210
210
|
// ---------------------------------------------------------------------------
|
|
211
|
-
// Metrics API registry (injected by ext-
|
|
211
|
+
// Metrics API registry (injected by ext-observability-opentelemetry when active)
|
|
212
212
|
// ---------------------------------------------------------------------------
|
|
213
213
|
let _metricsApi = null;
|
|
214
214
|
/**
|
|
215
215
|
* Register the OTel Metrics API (from the SDK).
|
|
216
|
-
* Called by ext-
|
|
216
|
+
* Called by ext-observability-opentelemetry in its setup hook so the metrics subsystem
|
|
217
217
|
* can use `getMeter()` when available.
|
|
218
218
|
*/
|
|
219
219
|
export function setGlobalMetricsAPI(api) {
|
|
@@ -43,7 +43,7 @@ export class TracingManager {
|
|
|
43
43
|
}
|
|
44
44
|
}
|
|
45
45
|
async initializeTracer(config) {
|
|
46
|
-
// Use the shim API — delegates to the real SDK when ext-
|
|
46
|
+
// Use the shim API — delegates to the real SDK when ext-observability-opentelemetry is wired.
|
|
47
47
|
const shimApi = await import("./api-shim.js");
|
|
48
48
|
const api = {
|
|
49
49
|
trace: {
|
|
@@ -64,7 +64,7 @@ export class TracingManager {
|
|
|
64
64
|
};
|
|
65
65
|
this.state.api = api;
|
|
66
66
|
this.state.tracer = api.trace.getTracer(config.serviceName ?? "veryfront", VERSION);
|
|
67
|
-
// No-op propagator used only when ext-
|
|
67
|
+
// No-op propagator used only when ext-observability-opentelemetry is NOT installed.
|
|
68
68
|
// When the extension is active, it registers W3CTraceContextPropagator
|
|
69
69
|
// on the shim directly; we intentionally do NOT wrap shimApi.propagation
|
|
70
70
|
// here (doing so would cause infinite recursion when the global
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**************************
|
|
2
2
|
* OpenTelemetry OTLP Setup
|
|
3
3
|
*
|
|
4
|
-
* Thin wrapper that delegates to the `ext-
|
|
4
|
+
* Thin wrapper that delegates to the `ext-observability-opentelemetry` extension via the
|
|
5
5
|
* `TracingExporter` contract. When the extension is not installed, all span
|
|
6
6
|
* operations silently no-op.
|
|
7
7
|
*
|