evalution 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/LICENSE.addendum +79 -0
- package/LICENSING.md +69 -0
- package/README.md +50 -0
- package/bin/evalution.js +5 -0
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.js +871 -0
- package/dist/client/assets/index-CORbBplP.js +144 -0
- package/dist/client/assets/index-CgcFVsRZ.css +32 -0
- package/dist/client/favicon.svg +19 -0
- package/dist/client/index.html +17 -0
- package/dist/index.d.ts +1440 -0
- package/dist/index.js +1325 -0
- package/dist/vercel-ai-sdk-CareWPDM.js +759 -0
- package/package.json +102 -0
|
@@ -0,0 +1,759 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { SpanStatusCode, trace } from "@opentelemetry/api";
|
|
3
|
+
import fs from "node:fs";
|
|
4
|
+
import { generateText, streamText } from "ai";
|
|
5
|
+
import { extractPropertiesFromDeclaration, findTypeDeclaration } from "ts-proppy";
|
|
6
|
+
import ts from "typescript";
|
|
7
|
+
//#region src/shared/helpers.ts
|
|
8
|
+
function otelOperationToSpanKind(operationName) {
|
|
9
|
+
switch (operationName) {
|
|
10
|
+
case "chat":
|
|
11
|
+
case "response":
|
|
12
|
+
case "text_completion":
|
|
13
|
+
case "generate_content": return "LLM";
|
|
14
|
+
case "execute_tool": return "TOOL";
|
|
15
|
+
case "create_agent":
|
|
16
|
+
case "invoke_agent": return "AGENT";
|
|
17
|
+
case "embeddings": return "EMBEDDING";
|
|
18
|
+
default: return "DEFAULT";
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
/** Whether a property value can be edited in the UI. */
|
|
22
|
+
function isEditable(value) {
|
|
23
|
+
return value.kind !== "raw" && !(value.kind === "functionCall" && !value.binding);
|
|
24
|
+
}
|
|
25
|
+
//#endregion
|
|
26
|
+
//#region src/trace/prompt-tracer.ts
|
|
27
|
+
/**
|
|
28
|
+
* Attribute name a span can set to pick one of evalution's span-kind values
|
|
29
|
+
* (`'LLM'`, `'TOOL'`, `'AGENT'`, `'EMBEDDING'`, `'DEFAULT'`). Falls back to
|
|
30
|
+
* `'DEFAULT'` when absent or unrecognised.
|
|
31
|
+
*/
|
|
32
|
+
const SPAN_KIND_ATTRIBUTE = "evalution.span.type";
|
|
33
|
+
/**
|
|
34
|
+
* Attribute name a span can set to scope its {@link PROMPT_ID_ATTRIBUTE} to a
|
|
35
|
+
* specific prompt provider. When absent, the prompt ID is treated as global.
|
|
36
|
+
*/
|
|
37
|
+
const PROMPT_PROVIDER_ID_ATTRIBUTE = "evalution.prompt.provider.id";
|
|
38
|
+
/**
|
|
39
|
+
* Attribute name a span can set to link itself to a specific prompt. The value
|
|
40
|
+
* is a globally-unique prompt ID unless {@link PROMPT_PROVIDER_ID_ATTRIBUTE} is
|
|
41
|
+
* also set, in which case it is scoped to that provider.
|
|
42
|
+
*/
|
|
43
|
+
const PROMPT_ID_ATTRIBUTE = "evalution.prompt.id";
|
|
44
|
+
/**
|
|
45
|
+
* Attribute name a span can set to give a human-readable name to the prompt.
|
|
46
|
+
*/
|
|
47
|
+
const PROMPT_NAME_ATTRIBUTE = "gen_ai.prompt.name";
|
|
48
|
+
/**
|
|
49
|
+
* Wraps a {@link Tracer} so that every span it produces is tagged with the
|
|
50
|
+
* attributes that associate it with a prompt — the prompt's name
|
|
51
|
+
* ({@link PROMPT_NAME_ATTRIBUTE}), an optional global prompt ID
|
|
52
|
+
* ({@link PROMPT_ID_ATTRIBUTE}), and an `'LLM'` span kind
|
|
53
|
+
* ({@link SPAN_KIND_ATTRIBUTE}). Attributes set explicitly on an individual
|
|
54
|
+
* span take precedence over these defaults.
|
|
55
|
+
*
|
|
56
|
+
* This depends only on `@opentelemetry/api`, so it can be re-used by SDK
|
|
57
|
+
* adapter packages (e.g. `@evalution/vercel-ai-sdk`) without pulling in the
|
|
58
|
+
* rest of evalution.
|
|
59
|
+
*
|
|
60
|
+
* @param prompt - The prompt to attribute spans to. `name` is a human-readable
|
|
61
|
+
* name; the optional `id` is a globally-unique prompt ID used to resolve
|
|
62
|
+
* runtime traces back to the prompt.
|
|
63
|
+
* @param tracer - Tracer to wrap. Defaults to a tracer from the globally
|
|
64
|
+
* registered tracer provider.
|
|
65
|
+
* @returns A tracer that forwards to `tracer` while attaching the prompt
|
|
66
|
+
* attributes to each span it creates.
|
|
67
|
+
*/
|
|
68
|
+
function createTracerForPrompt(prompt, tracer) {
|
|
69
|
+
const inner = tracer ?? trace.getTracer("evalution");
|
|
70
|
+
const withPromptAttributes = (options) => ({
|
|
71
|
+
...options,
|
|
72
|
+
attributes: {
|
|
73
|
+
[SPAN_KIND_ATTRIBUTE]: "LLM",
|
|
74
|
+
[PROMPT_NAME_ATTRIBUTE]: prompt.name,
|
|
75
|
+
...prompt.id !== void 0 && { ["evalution.prompt.id"]: prompt.id },
|
|
76
|
+
...options?.attributes
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
return {
|
|
80
|
+
startSpan(name, options, context) {
|
|
81
|
+
return inner.startSpan(name, withPromptAttributes(options), context);
|
|
82
|
+
},
|
|
83
|
+
startActiveSpan(name, ...rest) {
|
|
84
|
+
if (typeof rest[0] === "function") return inner.startActiveSpan(name, withPromptAttributes(), rest[0]);
|
|
85
|
+
if (typeof rest[1] === "function") return inner.startActiveSpan(name, withPromptAttributes(rest[0]), rest[1]);
|
|
86
|
+
return inner.startActiveSpan(name, withPromptAttributes(rest[0]), rest[1], rest[2]);
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
//#endregion
|
|
91
|
+
//#region src/trace/base-otel-trace-provider.ts
|
|
92
|
+
const KNOWN_KINDS = [
|
|
93
|
+
"LLM",
|
|
94
|
+
"TOOL",
|
|
95
|
+
"AGENT",
|
|
96
|
+
"EMBEDDING",
|
|
97
|
+
"DEFAULT"
|
|
98
|
+
];
|
|
99
|
+
function hrTimeToMs(time) {
|
|
100
|
+
return time[0] * 1e3 + time[1] / 1e6;
|
|
101
|
+
}
|
|
102
|
+
function readKind(attributes) {
|
|
103
|
+
const raw = attributes["evalution.span.type"] ?? otelOperationToSpanKind(attributes["gen_ai.operation.name"]);
|
|
104
|
+
return typeof raw === "string" && KNOWN_KINDS.includes(raw) ? raw : "DEFAULT";
|
|
105
|
+
}
|
|
106
|
+
const PARAM_ATTRIBUTES = [
|
|
107
|
+
"gen_ai.request.temperature",
|
|
108
|
+
"gen_ai.request.max_tokens",
|
|
109
|
+
"gen_ai.request.top_k",
|
|
110
|
+
"gen_ai.request.top_p",
|
|
111
|
+
"gen_ai.request.frequency_penalty",
|
|
112
|
+
"gen_ai.request.presence_penalty",
|
|
113
|
+
"gen_ai.request.seed",
|
|
114
|
+
"gen_ai.request.stop_sequences",
|
|
115
|
+
"gen_ai.request.choice.count"
|
|
116
|
+
];
|
|
117
|
+
function str(v) {
|
|
118
|
+
return typeof v === "string" ? v : void 0;
|
|
119
|
+
}
|
|
120
|
+
function num(v) {
|
|
121
|
+
return typeof v === "number" ? v : void 0;
|
|
122
|
+
}
|
|
123
|
+
function parseMessages(v) {
|
|
124
|
+
if (typeof v !== "string") return void 0;
|
|
125
|
+
try {
|
|
126
|
+
const parsed = JSON.parse(v);
|
|
127
|
+
if (!Array.isArray(parsed)) return void 0;
|
|
128
|
+
return parsed.flatMap((msg) => {
|
|
129
|
+
if (!msg || typeof msg !== "object") return [];
|
|
130
|
+
const m = msg;
|
|
131
|
+
const role = str(m.role) ?? "unknown";
|
|
132
|
+
const content = m.content;
|
|
133
|
+
if (typeof content === "string") return [{
|
|
134
|
+
role,
|
|
135
|
+
content
|
|
136
|
+
}];
|
|
137
|
+
if (Array.isArray(content)) {
|
|
138
|
+
const text = content.filter((c) => !!c && typeof c === "object").filter((c) => c.type === "text").map((c) => str(c.text) ?? "").join("");
|
|
139
|
+
return text ? [{
|
|
140
|
+
role,
|
|
141
|
+
content: text
|
|
142
|
+
}] : [];
|
|
143
|
+
}
|
|
144
|
+
return [];
|
|
145
|
+
});
|
|
146
|
+
} catch {
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
function parseOutput(v) {
|
|
151
|
+
if (typeof v !== "string") return void 0;
|
|
152
|
+
try {
|
|
153
|
+
const parsed = JSON.parse(v);
|
|
154
|
+
if (!Array.isArray(parsed)) return void 0;
|
|
155
|
+
return parsed.flatMap((msg) => {
|
|
156
|
+
if (!msg || typeof msg !== "object") return [];
|
|
157
|
+
const content = msg.content;
|
|
158
|
+
if (typeof content === "string") return [content];
|
|
159
|
+
if (Array.isArray(content)) return content.filter((c) => !!c && typeof c === "object").filter((c) => c.type === "text").map((c) => str(c.text) ?? "");
|
|
160
|
+
return [];
|
|
161
|
+
}).join("\n");
|
|
162
|
+
} catch {
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
function readLLM(attributes) {
|
|
167
|
+
const provider = str(attributes["gen_ai.provider.name"]) ?? str(attributes["gen_ai.system"]);
|
|
168
|
+
const model = str(attributes["gen_ai.response.model"]) ?? str(attributes["gen_ai.request.model"]);
|
|
169
|
+
const promptTokens = num(attributes["gen_ai.usage.input_tokens"]) ?? num(attributes["ai.usage.promptTokens"]);
|
|
170
|
+
const completionTokens = num(attributes["gen_ai.usage.output_tokens"]) ?? num(attributes["ai.usage.completionTokens"]);
|
|
171
|
+
const messages = parseMessages(attributes["gen_ai.input.messages"] ?? attributes["ai.prompt.messages"]);
|
|
172
|
+
const output = parseOutput(attributes["gen_ai.output.messages"]) ?? str(attributes["ai.response.text"]);
|
|
173
|
+
const paramEntries = PARAM_ATTRIBUTES.map((key) => [key.replace("gen_ai.request.", ""), attributes[key]]).filter(([, v]) => v !== void 0);
|
|
174
|
+
const parameters = paramEntries.length > 0 ? Object.fromEntries(paramEntries) : void 0;
|
|
175
|
+
const totalTokens = promptTokens !== void 0 && completionTokens !== void 0 ? promptTokens + completionTokens : void 0;
|
|
176
|
+
if (!provider && !model && !promptTokens && !completionTokens && !messages && !output && !parameters) return;
|
|
177
|
+
return {
|
|
178
|
+
...provider && { provider },
|
|
179
|
+
...model && { model },
|
|
180
|
+
...messages && { messages },
|
|
181
|
+
...output && { output },
|
|
182
|
+
...promptTokens !== void 0 && { promptTokens },
|
|
183
|
+
...completionTokens !== void 0 && { completionTokens },
|
|
184
|
+
...totalTokens !== void 0 && { totalTokens },
|
|
185
|
+
...parameters && { parameters }
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
function llmAndPrompt(attributes) {
|
|
189
|
+
const llm = readLLM(attributes);
|
|
190
|
+
const id = str(attributes["evalution.prompt.id"]);
|
|
191
|
+
const providerId = str(attributes["evalution.prompt.provider.id"]);
|
|
192
|
+
const prompt = id ? {
|
|
193
|
+
id,
|
|
194
|
+
...providerId && { providerId }
|
|
195
|
+
} : void 0;
|
|
196
|
+
return {
|
|
197
|
+
...llm && { llm },
|
|
198
|
+
...prompt && { prompt }
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
function mapStatus(status) {
|
|
202
|
+
if (status.code === SpanStatusCode.ERROR) return "error";
|
|
203
|
+
if (status.code === SpanStatusCode.OK) return "ok";
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Merges a later snapshot of a span into an earlier one.
|
|
207
|
+
*
|
|
208
|
+
* OpenTelemetry reports each span twice — at `onStart` (creation-time
|
|
209
|
+
* attributes only) and at `onEnd` (the full set) — and the two snapshots can
|
|
210
|
+
* carry complementary data. This unions their `attributes` and lets any
|
|
211
|
+
* *defined* field on `incoming` update `existing`, so nothing recorded at start
|
|
212
|
+
* is lost when the span ends, and end-only fields (status, timings, token
|
|
213
|
+
* usage, …) are filled in.
|
|
214
|
+
*/
|
|
215
|
+
function mergeSpans(existing, incoming) {
|
|
216
|
+
const merged = { ...existing };
|
|
217
|
+
for (const [key, value] of Object.entries(incoming)) if (value !== void 0) merged[key] = value;
|
|
218
|
+
const result = merged;
|
|
219
|
+
if (existing.attributes || incoming.attributes) result.attributes = {
|
|
220
|
+
...existing.attributes,
|
|
221
|
+
...incoming.attributes
|
|
222
|
+
};
|
|
223
|
+
return result;
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Base class for a {@link TraceProvider} populated by OpenTelemetry spans.
|
|
227
|
+
*
|
|
228
|
+
* Register the processor returned by {@link getSpanProcessor} on a
|
|
229
|
+
* `BasicTracerProvider` (from `@opentelemetry/sdk-trace-base`).
|
|
230
|
+
*/
|
|
231
|
+
var BaseOTelTraceProvider = class {
|
|
232
|
+
id;
|
|
233
|
+
displayName;
|
|
234
|
+
description;
|
|
235
|
+
subscribers = /* @__PURE__ */ new Map();
|
|
236
|
+
watchers = /* @__PURE__ */ new Set();
|
|
237
|
+
spanPromises = /* @__PURE__ */ new Map();
|
|
238
|
+
constructor(options) {
|
|
239
|
+
this.id = options.id;
|
|
240
|
+
this.displayName = options.displayName;
|
|
241
|
+
this.description = options.description;
|
|
242
|
+
}
|
|
243
|
+
async getTrace(traceId) {
|
|
244
|
+
const trace = await this.getTraceWithoutSpans(traceId);
|
|
245
|
+
if (!trace) return void 0;
|
|
246
|
+
return {
|
|
247
|
+
trace,
|
|
248
|
+
spans: await this.getTraceSpans(traceId)
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
subscribeTrace(traceId, callback) {
|
|
252
|
+
let set = this.subscribers.get(traceId);
|
|
253
|
+
if (!set) {
|
|
254
|
+
set = /* @__PURE__ */ new Set();
|
|
255
|
+
this.subscribers.set(traceId, set);
|
|
256
|
+
}
|
|
257
|
+
set.add(callback);
|
|
258
|
+
return () => {
|
|
259
|
+
set.delete(callback);
|
|
260
|
+
if (set.size === 0) this.subscribers.delete(traceId);
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
watch(callback) {
|
|
264
|
+
this.watchers.add(callback);
|
|
265
|
+
return () => {
|
|
266
|
+
this.watchers.delete(callback);
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
/**
|
|
270
|
+
* Returns a `SpanProcessor` that funnels every OpenTelemetry span the
|
|
271
|
+
* caller's tracer produces into this provider's in-memory store.
|
|
272
|
+
*/
|
|
273
|
+
getSpanProcessor() {
|
|
274
|
+
return {
|
|
275
|
+
onStart: (span, _parentContext) => {
|
|
276
|
+
const spanId = span.spanContext().spanId;
|
|
277
|
+
const p = this.handleStart(span).catch(console.error);
|
|
278
|
+
this.spanPromises.set(spanId, p);
|
|
279
|
+
p.finally(() => this.spanPromises.delete(spanId));
|
|
280
|
+
},
|
|
281
|
+
onEnd: (span) => {
|
|
282
|
+
const spanId = span.spanContext().spanId;
|
|
283
|
+
const p = (this.spanPromises.get(spanId) ?? Promise.resolve()).then(() => this.handleEnd(span)).catch(console.error);
|
|
284
|
+
this.spanPromises.set(spanId, p);
|
|
285
|
+
p.finally(() => this.spanPromises.delete(spanId));
|
|
286
|
+
},
|
|
287
|
+
forceFlush: async () => {},
|
|
288
|
+
shutdown: async () => {}
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
async handleStart(span) {
|
|
292
|
+
const ctx = span.spanContext();
|
|
293
|
+
const traceId = ctx.traceId;
|
|
294
|
+
const spanId = ctx.spanId;
|
|
295
|
+
const parentCtx = span.parentSpanContext;
|
|
296
|
+
if ((!parentCtx || parentCtx.traceId !== traceId) && !await this.hasTrace(traceId)) {
|
|
297
|
+
const startTime = hrTimeToMs(span.startTime);
|
|
298
|
+
const trace = {
|
|
299
|
+
id: traceId,
|
|
300
|
+
providerId: this.id,
|
|
301
|
+
name: span.name,
|
|
302
|
+
startTime,
|
|
303
|
+
status: "running",
|
|
304
|
+
attributes: { ...span.attributes }
|
|
305
|
+
};
|
|
306
|
+
await this.addOrUpdateTrace(trace);
|
|
307
|
+
this.emitChange({
|
|
308
|
+
type: "add",
|
|
309
|
+
traceId
|
|
310
|
+
});
|
|
311
|
+
}
|
|
312
|
+
const ourSpan = {
|
|
313
|
+
id: spanId,
|
|
314
|
+
traceId,
|
|
315
|
+
parentId: parentCtx && parentCtx.traceId === traceId ? parentCtx.spanId : void 0,
|
|
316
|
+
name: span.name,
|
|
317
|
+
kind: readKind(span.attributes),
|
|
318
|
+
startTime: hrTimeToMs(span.startTime),
|
|
319
|
+
attributes: { ...span.attributes },
|
|
320
|
+
...llmAndPrompt(span.attributes)
|
|
321
|
+
};
|
|
322
|
+
const stored = await this.addOrUpdateSpan(ourSpan);
|
|
323
|
+
this.emitStream(traceId, {
|
|
324
|
+
type: "span-start",
|
|
325
|
+
span: stored
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
async handleEnd(span) {
|
|
329
|
+
const ctx = span.spanContext();
|
|
330
|
+
const traceId = ctx.traceId;
|
|
331
|
+
const ended = {
|
|
332
|
+
id: ctx.spanId,
|
|
333
|
+
traceId,
|
|
334
|
+
parentId: span.parentSpanContext && span.parentSpanContext.traceId === traceId ? span.parentSpanContext.spanId : void 0,
|
|
335
|
+
name: span.name,
|
|
336
|
+
kind: readKind(span.attributes),
|
|
337
|
+
startTime: hrTimeToMs(span.startTime),
|
|
338
|
+
endTime: hrTimeToMs(span.endTime),
|
|
339
|
+
status: mapStatus(span.status),
|
|
340
|
+
errorMessage: span.status.code === SpanStatusCode.ERROR ? span.status.message : void 0,
|
|
341
|
+
attributes: { ...span.attributes },
|
|
342
|
+
...llmAndPrompt(span.attributes)
|
|
343
|
+
};
|
|
344
|
+
const stored = await this.addOrUpdateSpan(ended);
|
|
345
|
+
this.emitStream(traceId, {
|
|
346
|
+
type: "span-end",
|
|
347
|
+
span: stored
|
|
348
|
+
});
|
|
349
|
+
if (!stored.parentId) {
|
|
350
|
+
const existing = await this.getTraceWithoutSpans(traceId);
|
|
351
|
+
if (existing) {
|
|
352
|
+
const endedTrace = {
|
|
353
|
+
...existing,
|
|
354
|
+
endTime: stored.endTime,
|
|
355
|
+
status: stored.status === "error" ? "error" : "ok",
|
|
356
|
+
attributes: {
|
|
357
|
+
...existing.attributes,
|
|
358
|
+
...stored.attributes
|
|
359
|
+
}
|
|
360
|
+
};
|
|
361
|
+
await this.addOrUpdateTrace(endedTrace);
|
|
362
|
+
this.emitStream(traceId, {
|
|
363
|
+
type: "trace-end",
|
|
364
|
+
trace: endedTrace
|
|
365
|
+
});
|
|
366
|
+
this.emitChange({
|
|
367
|
+
type: "update",
|
|
368
|
+
traceId
|
|
369
|
+
});
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
emitStream(traceId, event) {
|
|
374
|
+
const set = this.subscribers.get(traceId);
|
|
375
|
+
if (!set) return;
|
|
376
|
+
for (const cb of set) try {
|
|
377
|
+
cb(event);
|
|
378
|
+
} catch (err) {
|
|
379
|
+
console.error("Trace subscriber threw:", err);
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
emitChange(event) {
|
|
383
|
+
for (const cb of this.watchers) try {
|
|
384
|
+
cb(event);
|
|
385
|
+
} catch (err) {
|
|
386
|
+
console.error("Trace watcher threw:", err);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
async drainPendingHandlers() {
|
|
390
|
+
while (this.spanPromises.size > 0) await Promise.all([...this.spanPromises.values()]);
|
|
391
|
+
}
|
|
392
|
+
};
|
|
393
|
+
//#endregion
|
|
394
|
+
//#region src/trace/memory-trace-provider.ts
|
|
395
|
+
/**
|
|
396
|
+
* In-memory {@link TraceProvider} populated by OpenTelemetry spans.
|
|
397
|
+
*
|
|
398
|
+
* Register the processor returned by {@link getSpanProcessor} on a
|
|
399
|
+
* `BasicTracerProvider` (from `@opentelemetry/sdk-trace-base`).
|
|
400
|
+
*/
|
|
401
|
+
var MemoryTraceProvider = class extends BaseOTelTraceProvider {
|
|
402
|
+
traces = /* @__PURE__ */ new Map();
|
|
403
|
+
spansByTrace = /* @__PURE__ */ new Map();
|
|
404
|
+
constructor({ id = "memory", displayName = "In-Memory Traces", description = "Stores OpenTelemetry spans in memory for the current process." } = {}) {
|
|
405
|
+
super({
|
|
406
|
+
id,
|
|
407
|
+
displayName,
|
|
408
|
+
description
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
async getAllTraces() {
|
|
412
|
+
const summaries = Array.from(this.traces.values()).map((t) => ({
|
|
413
|
+
id: t.id,
|
|
414
|
+
providerId: this.id,
|
|
415
|
+
name: t.name,
|
|
416
|
+
startTime: t.startTime,
|
|
417
|
+
endTime: t.endTime,
|
|
418
|
+
status: t.status,
|
|
419
|
+
spanCount: this.spansByTrace.get(t.id)?.length ?? 0
|
|
420
|
+
}));
|
|
421
|
+
summaries.sort((a, b) => b.startTime - a.startTime);
|
|
422
|
+
return summaries;
|
|
423
|
+
}
|
|
424
|
+
async hasTrace(traceId) {
|
|
425
|
+
return this.traces.has(traceId);
|
|
426
|
+
}
|
|
427
|
+
async getTraceWithoutSpans(traceId) {
|
|
428
|
+
return this.traces.get(traceId);
|
|
429
|
+
}
|
|
430
|
+
async getTraceSpans(traceId) {
|
|
431
|
+
return this.spansByTrace.get(traceId) ?? [];
|
|
432
|
+
}
|
|
433
|
+
async addOrUpdateTrace(trace) {
|
|
434
|
+
this.traces.set(trace.id, trace);
|
|
435
|
+
}
|
|
436
|
+
async addOrUpdateSpan(span) {
|
|
437
|
+
let list = this.spansByTrace.get(span.traceId);
|
|
438
|
+
if (!list) {
|
|
439
|
+
list = [];
|
|
440
|
+
this.spansByTrace.set(span.traceId, list);
|
|
441
|
+
}
|
|
442
|
+
const idx = list.findIndex((s) => s.id === span.id);
|
|
443
|
+
if (idx >= 0) {
|
|
444
|
+
const merged = mergeSpans(list[idx], span);
|
|
445
|
+
list[idx] = merged;
|
|
446
|
+
return merged;
|
|
447
|
+
}
|
|
448
|
+
list.push(span);
|
|
449
|
+
return span;
|
|
450
|
+
}
|
|
451
|
+
};
|
|
452
|
+
//#endregion
|
|
453
|
+
//#region src/shared/setup-task.ts
|
|
454
|
+
/**
|
|
455
|
+
* Shared, dependency-free types describing the onboarding "setup tasks" a user
|
|
456
|
+
* can run to wire up an AI SDK.
|
|
457
|
+
*
|
|
458
|
+
* This module is imported by both the browser client (to render the
|
|
459
|
+
* manual-setup picker) and the server (to define and execute the tasks), so it
|
|
460
|
+
* must stay free of any Node- or DOM-specific imports.
|
|
461
|
+
*/
|
|
462
|
+
/** Path, relative to the project root, where evalution looks for its config. */
|
|
463
|
+
const CONFIG_FILE_RELATIVE_PATH = ".evalution/config.ts";
|
|
464
|
+
/**
|
|
465
|
+
* The shell command a run-style step executes. `install_package` steps map to
|
|
466
|
+
* `npm i <package>`; `run_command` steps carry their command verbatim.
|
|
467
|
+
*/
|
|
468
|
+
function setupStepCommand(step) {
|
|
469
|
+
return step.kind === "install_package" ? `npm i ${step.package}` : step.command;
|
|
470
|
+
}
|
|
471
|
+
//#endregion
|
|
472
|
+
//#region src/sdk/sdk-adapter.ts
|
|
473
|
+
/**
|
|
474
|
+
* Walk up the directory tree from both `rootDir` and `process.cwd()` looking
|
|
475
|
+
* for `node_modules/<packageName>/<dtsRelPath>`.
|
|
476
|
+
*/
|
|
477
|
+
function findPackageDts(packageName, dtsRelPath, rootDir) {
|
|
478
|
+
const seen = /* @__PURE__ */ new Set();
|
|
479
|
+
for (const start of [rootDir, process.cwd()]) {
|
|
480
|
+
let dir = start;
|
|
481
|
+
while (!seen.has(dir)) {
|
|
482
|
+
seen.add(dir);
|
|
483
|
+
const candidate = path.join(dir, "node_modules", packageName, dtsRelPath);
|
|
484
|
+
try {
|
|
485
|
+
fs.accessSync(candidate);
|
|
486
|
+
return candidate;
|
|
487
|
+
} catch {}
|
|
488
|
+
const parent = path.dirname(dir);
|
|
489
|
+
if (parent === dir) break;
|
|
490
|
+
dir = parent;
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
return null;
|
|
494
|
+
}
|
|
495
|
+
//#endregion
|
|
496
|
+
//#region src/sdk/vercel-ai-sdk.ts
|
|
497
|
+
const MODEL_KEY = "model";
|
|
498
|
+
const SYSTEM_KEY = "system";
|
|
499
|
+
const MESSAGES_KEY = "messages";
|
|
500
|
+
const RESERVED_KEYS = new Set([
|
|
501
|
+
MODEL_KEY,
|
|
502
|
+
SYSTEM_KEY,
|
|
503
|
+
MESSAGES_KEY
|
|
504
|
+
]);
|
|
505
|
+
/** The `prompts()` factory from `@evalution/vercel-ai-sdk`. */
|
|
506
|
+
const PROMPTS_HELPER_CALL = {
|
|
507
|
+
callee: "prompts",
|
|
508
|
+
import: {
|
|
509
|
+
name: "prompts",
|
|
510
|
+
from: "@evalution/vercel-ai-sdk"
|
|
511
|
+
}
|
|
512
|
+
};
|
|
513
|
+
/** Build the binding-candidate array for a provider function call. */
|
|
514
|
+
function providerBinding(provider) {
|
|
515
|
+
return [{
|
|
516
|
+
kind: "parameter",
|
|
517
|
+
enclosingCall: PROMPTS_HELPER_CALL
|
|
518
|
+
}, {
|
|
519
|
+
kind: "import",
|
|
520
|
+
spec: {
|
|
521
|
+
name: provider,
|
|
522
|
+
from: `@ai-sdk/${provider}`
|
|
523
|
+
}
|
|
524
|
+
}];
|
|
525
|
+
}
|
|
526
|
+
/** Build a {@link ModelInfo} entry from group, label, provider, and model ID. */
|
|
527
|
+
function model(group, label, provider, modelId) {
|
|
528
|
+
const id = `${provider}/${modelId}`;
|
|
529
|
+
return {
|
|
530
|
+
id,
|
|
531
|
+
label,
|
|
532
|
+
group,
|
|
533
|
+
values: {
|
|
534
|
+
function: {
|
|
535
|
+
kind: "functionCall",
|
|
536
|
+
callee: provider,
|
|
537
|
+
args: [{
|
|
538
|
+
kind: "primitive",
|
|
539
|
+
value: modelId
|
|
540
|
+
}],
|
|
541
|
+
binding: providerBinding(provider)
|
|
542
|
+
},
|
|
543
|
+
string: {
|
|
544
|
+
kind: "primitive",
|
|
545
|
+
value: id
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
};
|
|
549
|
+
}
|
|
550
|
+
/** Build a custom-value template entry for a provider (used in `groups.{provider}.customValueTemplates.function`). */
|
|
551
|
+
function customValueTemplate(provider) {
|
|
552
|
+
return {
|
|
553
|
+
kind: "functionCall",
|
|
554
|
+
callee: provider,
|
|
555
|
+
args: [{
|
|
556
|
+
kind: "primitive",
|
|
557
|
+
value: "$input"
|
|
558
|
+
}],
|
|
559
|
+
binding: providerBinding(provider)
|
|
560
|
+
};
|
|
561
|
+
}
|
|
562
|
+
/** Starter contents of `.evalution/config.ts` for the Vercel AI SDK. */
|
|
563
|
+
const CONFIG_FILE_CONTENTS = `import type { EvalutionConfig } from 'evalution';
|
|
564
|
+
import { FilePromptProvider, VercelAISDK } from 'evalution';
|
|
565
|
+
|
|
566
|
+
export default {
|
|
567
|
+
promptProviders: [
|
|
568
|
+
new FilePromptProvider({
|
|
569
|
+
sdk: new VercelAISDK(),
|
|
570
|
+
}),
|
|
571
|
+
],
|
|
572
|
+
} satisfies EvalutionConfig;
|
|
573
|
+
`;
|
|
574
|
+
/**
|
|
575
|
+
* {@link SDKAdapter} implementation for the
|
|
576
|
+
* [Vercel AI SDK](https://sdk.vercel.ai/).
|
|
577
|
+
*
|
|
578
|
+
* - `getModelParameters` reads `CallSettings` from the SDK's `.d.ts` bundle
|
|
579
|
+
* and surfaces parameters with simple types that can be edited in the UI.
|
|
580
|
+
* - `executeConfig` delegates to `generateText`.
|
|
581
|
+
*/
|
|
582
|
+
var VercelAISDK = class {
|
|
583
|
+
promptsHelperImport = PROMPTS_HELPER_CALL.import.from;
|
|
584
|
+
/** Onboarding task: install the SDK package, then drop a starter config. */
|
|
585
|
+
static setupTask = {
|
|
586
|
+
id: "vercel-ai-sdk",
|
|
587
|
+
label: "AI SDK",
|
|
588
|
+
icon: "vercel",
|
|
589
|
+
steps: [
|
|
590
|
+
{
|
|
591
|
+
kind: "install_package",
|
|
592
|
+
id: "install-ai",
|
|
593
|
+
package: "ai"
|
|
594
|
+
},
|
|
595
|
+
{
|
|
596
|
+
kind: "install_package",
|
|
597
|
+
id: "install-evalution-vercel-ai-sdk",
|
|
598
|
+
package: "@evalution/vercel-ai-sdk"
|
|
599
|
+
},
|
|
600
|
+
{
|
|
601
|
+
kind: "create_config",
|
|
602
|
+
id: "create-config",
|
|
603
|
+
path: CONFIG_FILE_RELATIVE_PATH,
|
|
604
|
+
contents: CONFIG_FILE_CONTENTS
|
|
605
|
+
}
|
|
606
|
+
]
|
|
607
|
+
};
|
|
608
|
+
getModelCatalog() {
|
|
609
|
+
return Promise.resolve({
|
|
610
|
+
modelValueTypes: {
|
|
611
|
+
function: {
|
|
612
|
+
label: "Provider",
|
|
613
|
+
description: "Call provider function (e.g. openai(\"gpt-4o\"))"
|
|
614
|
+
},
|
|
615
|
+
string: {
|
|
616
|
+
label: "Gateway",
|
|
617
|
+
description: "Use a gateway model string (e.g. \"openai/gpt-4o\")"
|
|
618
|
+
}
|
|
619
|
+
},
|
|
620
|
+
groups: {
|
|
621
|
+
OpenAI: { customValueTemplates: { function: customValueTemplate("openai") } },
|
|
622
|
+
Anthropic: { customValueTemplates: { function: customValueTemplate("anthropic") } },
|
|
623
|
+
Google: { customValueTemplates: { function: customValueTemplate("google") } }
|
|
624
|
+
},
|
|
625
|
+
models: [
|
|
626
|
+
model("OpenAI", "GPT-5.5 Pro", "openai", "gpt-5.5-pro"),
|
|
627
|
+
model("OpenAI", "GPT-5.5", "openai", "gpt-5.5"),
|
|
628
|
+
model("OpenAI", "GPT-5.4 Pro", "openai", "gpt-5.4-pro"),
|
|
629
|
+
model("OpenAI", "GPT-5.4", "openai", "gpt-5.4"),
|
|
630
|
+
model("OpenAI", "GPT-5.4 mini", "openai", "gpt-5.4-mini"),
|
|
631
|
+
model("OpenAI", "GPT-5.4 nano", "openai", "gpt-5.4-nano"),
|
|
632
|
+
model("Anthropic", "Claude Opus 4.8", "anthropic", "claude-opus-4-8"),
|
|
633
|
+
model("Anthropic", "Claude Sonnet 4.6", "anthropic", "claude-sonnet-4-6"),
|
|
634
|
+
model("Anthropic", "Claude Haiku 4.5", "anthropic", "claude-haiku-4-5"),
|
|
635
|
+
model("Google", "Gemini 3.5 Flash", "google", "gemini-3.5-flash"),
|
|
636
|
+
model("Google", "Gemini 3.1 Pro Preview", "google", "gemini-3.1-pro-preview"),
|
|
637
|
+
model("Google", "Gemini 3.1 Flash-Lite", "google", "gemini-3.1-flash-lite")
|
|
638
|
+
]
|
|
639
|
+
});
|
|
640
|
+
}
|
|
641
|
+
getModelParameters(rootDir) {
|
|
642
|
+
const dtsPath = findPackageDts("ai", "dist/index.d.ts", rootDir);
|
|
643
|
+
if (!dtsPath) return [];
|
|
644
|
+
const sourceText = fs.readFileSync(dtsPath, "utf-8");
|
|
645
|
+
const sourceFile = ts.createSourceFile(dtsPath, sourceText, ts.ScriptTarget.Latest, true);
|
|
646
|
+
const decl = findTypeDeclaration(sourceFile, "CallSettings");
|
|
647
|
+
if (!decl) return [];
|
|
648
|
+
return extractPropertiesFromDeclaration(decl, sourceFile).definitions;
|
|
649
|
+
}
|
|
650
|
+
async executeConfig(config, stream) {
|
|
651
|
+
if (stream) return (await streamText(config)).textStream;
|
|
652
|
+
else {
|
|
653
|
+
const result = await generateText(config);
|
|
654
|
+
return {
|
|
655
|
+
text: result.text,
|
|
656
|
+
usage: result.usage,
|
|
657
|
+
finishReason: result.finishReason
|
|
658
|
+
};
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
normalizePrompt(prompt) {
|
|
662
|
+
const { definitions, values } = prompt.extractedProps;
|
|
663
|
+
const modelValue = values?.[MODEL_KEY];
|
|
664
|
+
const systemValue = values?.[SYSTEM_KEY];
|
|
665
|
+
const messagesValue = values?.[MESSAGES_KEY];
|
|
666
|
+
const modelParameters = definitions.filter((d) => !RESERVED_KEYS.has(d.name)).map((def) => {
|
|
667
|
+
const value = values?.[def.name];
|
|
668
|
+
return {
|
|
669
|
+
def,
|
|
670
|
+
value,
|
|
671
|
+
editable: value ? isEditable(value) : true
|
|
672
|
+
};
|
|
673
|
+
});
|
|
674
|
+
return {
|
|
675
|
+
id: prompt.id,
|
|
676
|
+
providerId: prompt.providerId,
|
|
677
|
+
globalId: prompt.globalId,
|
|
678
|
+
name: prompt.name,
|
|
679
|
+
functionParameters: prompt.functionParameters,
|
|
680
|
+
metadata: prompt.metadata,
|
|
681
|
+
treePath: prompt.treePath,
|
|
682
|
+
model: modelValue,
|
|
683
|
+
modelEditable: modelValue ? isEditable(modelValue) : true,
|
|
684
|
+
system: systemValue,
|
|
685
|
+
systemEditable: systemValue ? isEditable(systemValue) : true,
|
|
686
|
+
messages: extractMessages(messagesValue),
|
|
687
|
+
messagesEditable: messagesValue ? isEditable(messagesValue) : true,
|
|
688
|
+
modelParameters
|
|
689
|
+
};
|
|
690
|
+
}
|
|
691
|
+
denormalizeUpdates(updates, _currentValues) {
|
|
692
|
+
const out = {};
|
|
693
|
+
if (MODEL_KEY in updates) out[MODEL_KEY] = updates.model ?? null;
|
|
694
|
+
if (SYSTEM_KEY in updates) out[SYSTEM_KEY] = updates.system ?? null;
|
|
695
|
+
if (MESSAGES_KEY in updates) out[MESSAGES_KEY] = updates.messages === null || updates.messages === void 0 ? null : messagesToValue(updates.messages);
|
|
696
|
+
if (updates.modelParameters) for (const [name, value] of Object.entries(updates.modelParameters)) out[name] = value;
|
|
697
|
+
return out;
|
|
698
|
+
}
|
|
699
|
+
};
|
|
700
|
+
function messagesToValue(msgs) {
|
|
701
|
+
return {
|
|
702
|
+
kind: "array",
|
|
703
|
+
elements: msgs.map((msg) => ({
|
|
704
|
+
kind: "object",
|
|
705
|
+
properties: {
|
|
706
|
+
role: {
|
|
707
|
+
kind: "primitive",
|
|
708
|
+
value: msg.role
|
|
709
|
+
},
|
|
710
|
+
content: msg.content
|
|
711
|
+
}
|
|
712
|
+
}))
|
|
713
|
+
};
|
|
714
|
+
}
|
|
715
|
+
const EMPTY_CONTENT = {
|
|
716
|
+
kind: "primitive",
|
|
717
|
+
value: ""
|
|
718
|
+
};
|
|
719
|
+
function extractMessages(value) {
|
|
720
|
+
if (!value) return [];
|
|
721
|
+
if (value.kind !== "array") return [{
|
|
722
|
+
role: "user",
|
|
723
|
+
content: value
|
|
724
|
+
}];
|
|
725
|
+
return value.elements.map((el) => {
|
|
726
|
+
if (el.kind !== "object") return {
|
|
727
|
+
role: "user",
|
|
728
|
+
content: el
|
|
729
|
+
};
|
|
730
|
+
const roleValue = el.properties.role;
|
|
731
|
+
const role = roleValue?.kind === "primitive" ? String(roleValue.value) : "user";
|
|
732
|
+
const content = el.properties.content ?? EMPTY_CONTENT;
|
|
733
|
+
const toolCalls = extractToolCalls(el.properties.toolCalls);
|
|
734
|
+
return toolCalls ? {
|
|
735
|
+
role,
|
|
736
|
+
content,
|
|
737
|
+
toolCalls
|
|
738
|
+
} : {
|
|
739
|
+
role,
|
|
740
|
+
content
|
|
741
|
+
};
|
|
742
|
+
});
|
|
743
|
+
}
|
|
744
|
+
function extractToolCalls(value) {
|
|
745
|
+
if (!value || value.kind !== "array") return void 0;
|
|
746
|
+
const out = [];
|
|
747
|
+
for (const el of value.elements) {
|
|
748
|
+
if (el.kind !== "object") continue;
|
|
749
|
+
const name = el.properties.toolName;
|
|
750
|
+
const args = el.properties.args;
|
|
751
|
+
out.push({
|
|
752
|
+
toolName: name?.kind === "primitive" ? String(name.value) : "",
|
|
753
|
+
args: args?.kind === "primitive" ? String(args.value) : ""
|
|
754
|
+
});
|
|
755
|
+
}
|
|
756
|
+
return out.length > 0 ? out : void 0;
|
|
757
|
+
}
|
|
758
|
+
//#endregion
|
|
759
|
+
export { MemoryTraceProvider as a, PROMPT_PROVIDER_ID_ATTRIBUTE as c, isEditable as d, setupStepCommand as i, SPAN_KIND_ATTRIBUTE as l, findPackageDts as n, PROMPT_ID_ATTRIBUTE as o, CONFIG_FILE_RELATIVE_PATH as r, PROMPT_NAME_ATTRIBUTE as s, VercelAISDK as t, createTracerForPrompt as u };
|