evalution 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,759 @@
1
+ import path from "node:path";
2
+ import { SpanStatusCode, trace } from "@opentelemetry/api";
3
+ import fs from "node:fs";
4
+ import { generateText, streamText } from "ai";
5
+ import { extractPropertiesFromDeclaration, findTypeDeclaration } from "ts-proppy";
6
+ import ts from "typescript";
7
+ //#region src/shared/helpers.ts
8
+ function otelOperationToSpanKind(operationName) {
9
+ switch (operationName) {
10
+ case "chat":
11
+ case "response":
12
+ case "text_completion":
13
+ case "generate_content": return "LLM";
14
+ case "execute_tool": return "TOOL";
15
+ case "create_agent":
16
+ case "invoke_agent": return "AGENT";
17
+ case "embeddings": return "EMBEDDING";
18
+ default: return "DEFAULT";
19
+ }
20
+ }
21
+ /** Whether a property value can be edited in the UI. */
22
+ function isEditable(value) {
23
+ return value.kind !== "raw" && !(value.kind === "functionCall" && !value.binding);
24
+ }
25
+ //#endregion
26
+ //#region src/trace/prompt-tracer.ts
27
+ /**
28
+ * Attribute name a span can set to pick one of evalution's span-kind values
29
+ * (`'LLM'`, `'TOOL'`, `'AGENT'`, `'EMBEDDING'`, `'DEFAULT'`). Falls back to
30
+ * `'DEFAULT'` when absent or unrecognised.
31
+ */
32
+ const SPAN_KIND_ATTRIBUTE = "evalution.span.type";
33
+ /**
34
+ * Attribute name a span can set to scope its {@link PROMPT_ID_ATTRIBUTE} to a
35
+ * specific prompt provider. When absent, the prompt ID is treated as global.
36
+ */
37
+ const PROMPT_PROVIDER_ID_ATTRIBUTE = "evalution.prompt.provider.id";
38
+ /**
39
+ * Attribute name a span can set to link itself to a specific prompt. The value
40
+ * is a globally-unique prompt ID unless {@link PROMPT_PROVIDER_ID_ATTRIBUTE} is
41
+ * also set, in which case it is scoped to that provider.
42
+ */
43
+ const PROMPT_ID_ATTRIBUTE = "evalution.prompt.id";
44
+ /**
45
+ * Attribute name a span can set to give a human-readable name to the prompt.
46
+ */
47
+ const PROMPT_NAME_ATTRIBUTE = "gen_ai.prompt.name";
48
+ /**
49
+ * Wraps a {@link Tracer} so that every span it produces is tagged with the
50
+ * attributes that associate it with a prompt — the prompt's name
51
+ * ({@link PROMPT_NAME_ATTRIBUTE}), an optional global prompt ID
52
+ * ({@link PROMPT_ID_ATTRIBUTE}), and an `'LLM'` span kind
53
+ * ({@link SPAN_KIND_ATTRIBUTE}). Attributes set explicitly on an individual
54
+ * span take precedence over these defaults.
55
+ *
56
+ * This depends only on `@opentelemetry/api`, so it can be re-used by SDK
57
+ * adapter packages (e.g. `@evalution/vercel-ai-sdk`) without pulling in the
58
+ * rest of evalution.
59
+ *
60
+ * @param prompt - The prompt to attribute spans to. `name` is a human-readable
61
+ * name; the optional `id` is a globally-unique prompt ID used to resolve
62
+ * runtime traces back to the prompt.
63
+ * @param tracer - Tracer to wrap. Defaults to a tracer from the globally
64
+ * registered tracer provider.
65
+ * @returns A tracer that forwards to `tracer` while attaching the prompt
66
+ * attributes to each span it creates.
67
+ */
68
+ function createTracerForPrompt(prompt, tracer) {
69
+ const inner = tracer ?? trace.getTracer("evalution");
70
+ const withPromptAttributes = (options) => ({
71
+ ...options,
72
+ attributes: {
73
+ [SPAN_KIND_ATTRIBUTE]: "LLM",
74
+ [PROMPT_NAME_ATTRIBUTE]: prompt.name,
75
+ ...prompt.id !== void 0 && { ["evalution.prompt.id"]: prompt.id },
76
+ ...options?.attributes
77
+ }
78
+ });
79
+ return {
80
+ startSpan(name, options, context) {
81
+ return inner.startSpan(name, withPromptAttributes(options), context);
82
+ },
83
+ startActiveSpan(name, ...rest) {
84
+ if (typeof rest[0] === "function") return inner.startActiveSpan(name, withPromptAttributes(), rest[0]);
85
+ if (typeof rest[1] === "function") return inner.startActiveSpan(name, withPromptAttributes(rest[0]), rest[1]);
86
+ return inner.startActiveSpan(name, withPromptAttributes(rest[0]), rest[1], rest[2]);
87
+ }
88
+ };
89
+ }
90
+ //#endregion
91
+ //#region src/trace/base-otel-trace-provider.ts
92
+ const KNOWN_KINDS = [
93
+ "LLM",
94
+ "TOOL",
95
+ "AGENT",
96
+ "EMBEDDING",
97
+ "DEFAULT"
98
+ ];
99
+ function hrTimeToMs(time) {
100
+ return time[0] * 1e3 + time[1] / 1e6;
101
+ }
102
+ function readKind(attributes) {
103
+ const raw = attributes["evalution.span.type"] ?? otelOperationToSpanKind(attributes["gen_ai.operation.name"]);
104
+ return typeof raw === "string" && KNOWN_KINDS.includes(raw) ? raw : "DEFAULT";
105
+ }
106
+ const PARAM_ATTRIBUTES = [
107
+ "gen_ai.request.temperature",
108
+ "gen_ai.request.max_tokens",
109
+ "gen_ai.request.top_k",
110
+ "gen_ai.request.top_p",
111
+ "gen_ai.request.frequency_penalty",
112
+ "gen_ai.request.presence_penalty",
113
+ "gen_ai.request.seed",
114
+ "gen_ai.request.stop_sequences",
115
+ "gen_ai.request.choice.count"
116
+ ];
117
+ function str(v) {
118
+ return typeof v === "string" ? v : void 0;
119
+ }
120
+ function num(v) {
121
+ return typeof v === "number" ? v : void 0;
122
+ }
123
+ function parseMessages(v) {
124
+ if (typeof v !== "string") return void 0;
125
+ try {
126
+ const parsed = JSON.parse(v);
127
+ if (!Array.isArray(parsed)) return void 0;
128
+ return parsed.flatMap((msg) => {
129
+ if (!msg || typeof msg !== "object") return [];
130
+ const m = msg;
131
+ const role = str(m.role) ?? "unknown";
132
+ const content = m.content;
133
+ if (typeof content === "string") return [{
134
+ role,
135
+ content
136
+ }];
137
+ if (Array.isArray(content)) {
138
+ const text = content.filter((c) => !!c && typeof c === "object").filter((c) => c.type === "text").map((c) => str(c.text) ?? "").join("");
139
+ return text ? [{
140
+ role,
141
+ content: text
142
+ }] : [];
143
+ }
144
+ return [];
145
+ });
146
+ } catch {
147
+ return;
148
+ }
149
+ }
150
+ function parseOutput(v) {
151
+ if (typeof v !== "string") return void 0;
152
+ try {
153
+ const parsed = JSON.parse(v);
154
+ if (!Array.isArray(parsed)) return void 0;
155
+ return parsed.flatMap((msg) => {
156
+ if (!msg || typeof msg !== "object") return [];
157
+ const content = msg.content;
158
+ if (typeof content === "string") return [content];
159
+ if (Array.isArray(content)) return content.filter((c) => !!c && typeof c === "object").filter((c) => c.type === "text").map((c) => str(c.text) ?? "");
160
+ return [];
161
+ }).join("\n");
162
+ } catch {
163
+ return;
164
+ }
165
+ }
166
+ function readLLM(attributes) {
167
+ const provider = str(attributes["gen_ai.provider.name"]) ?? str(attributes["gen_ai.system"]);
168
+ const model = str(attributes["gen_ai.response.model"]) ?? str(attributes["gen_ai.request.model"]);
169
+ const promptTokens = num(attributes["gen_ai.usage.input_tokens"]) ?? num(attributes["ai.usage.promptTokens"]);
170
+ const completionTokens = num(attributes["gen_ai.usage.output_tokens"]) ?? num(attributes["ai.usage.completionTokens"]);
171
+ const messages = parseMessages(attributes["gen_ai.input.messages"] ?? attributes["ai.prompt.messages"]);
172
+ const output = parseOutput(attributes["gen_ai.output.messages"]) ?? str(attributes["ai.response.text"]);
173
+ const paramEntries = PARAM_ATTRIBUTES.map((key) => [key.replace("gen_ai.request.", ""), attributes[key]]).filter(([, v]) => v !== void 0);
174
+ const parameters = paramEntries.length > 0 ? Object.fromEntries(paramEntries) : void 0;
175
+ const totalTokens = promptTokens !== void 0 && completionTokens !== void 0 ? promptTokens + completionTokens : void 0;
176
+ if (!provider && !model && !promptTokens && !completionTokens && !messages && !output && !parameters) return;
177
+ return {
178
+ ...provider && { provider },
179
+ ...model && { model },
180
+ ...messages && { messages },
181
+ ...output && { output },
182
+ ...promptTokens !== void 0 && { promptTokens },
183
+ ...completionTokens !== void 0 && { completionTokens },
184
+ ...totalTokens !== void 0 && { totalTokens },
185
+ ...parameters && { parameters }
186
+ };
187
+ }
188
+ function llmAndPrompt(attributes) {
189
+ const llm = readLLM(attributes);
190
+ const id = str(attributes["evalution.prompt.id"]);
191
+ const providerId = str(attributes["evalution.prompt.provider.id"]);
192
+ const prompt = id ? {
193
+ id,
194
+ ...providerId && { providerId }
195
+ } : void 0;
196
+ return {
197
+ ...llm && { llm },
198
+ ...prompt && { prompt }
199
+ };
200
+ }
201
+ function mapStatus(status) {
202
+ if (status.code === SpanStatusCode.ERROR) return "error";
203
+ if (status.code === SpanStatusCode.OK) return "ok";
204
+ }
205
+ /**
206
+ * Merges a later snapshot of a span into an earlier one.
207
+ *
208
+ * OpenTelemetry reports each span twice — at `onStart` (creation-time
209
+ * attributes only) and at `onEnd` (the full set) — and the two snapshots can
210
+ * carry complementary data. This unions their `attributes` and lets any
211
+ * *defined* field on `incoming` update `existing`, so nothing recorded at start
212
+ * is lost when the span ends, and end-only fields (status, timings, token
213
+ * usage, …) are filled in.
214
+ */
215
+ function mergeSpans(existing, incoming) {
216
+ const merged = { ...existing };
217
+ for (const [key, value] of Object.entries(incoming)) if (value !== void 0) merged[key] = value;
218
+ const result = merged;
219
+ if (existing.attributes || incoming.attributes) result.attributes = {
220
+ ...existing.attributes,
221
+ ...incoming.attributes
222
+ };
223
+ return result;
224
+ }
225
+ /**
226
+ * Base class for a {@link TraceProvider} populated by OpenTelemetry spans.
227
+ *
228
+ * Register the processor returned by {@link getSpanProcessor} on a
229
+ * `BasicTracerProvider` (from `@opentelemetry/sdk-trace-base`).
230
+ */
231
+ var BaseOTelTraceProvider = class {
232
+ id;
233
+ displayName;
234
+ description;
235
+ subscribers = /* @__PURE__ */ new Map();
236
+ watchers = /* @__PURE__ */ new Set();
237
+ spanPromises = /* @__PURE__ */ new Map();
238
+ constructor(options) {
239
+ this.id = options.id;
240
+ this.displayName = options.displayName;
241
+ this.description = options.description;
242
+ }
243
+ async getTrace(traceId) {
244
+ const trace = await this.getTraceWithoutSpans(traceId);
245
+ if (!trace) return void 0;
246
+ return {
247
+ trace,
248
+ spans: await this.getTraceSpans(traceId)
249
+ };
250
+ }
251
+ subscribeTrace(traceId, callback) {
252
+ let set = this.subscribers.get(traceId);
253
+ if (!set) {
254
+ set = /* @__PURE__ */ new Set();
255
+ this.subscribers.set(traceId, set);
256
+ }
257
+ set.add(callback);
258
+ return () => {
259
+ set.delete(callback);
260
+ if (set.size === 0) this.subscribers.delete(traceId);
261
+ };
262
+ }
263
+ watch(callback) {
264
+ this.watchers.add(callback);
265
+ return () => {
266
+ this.watchers.delete(callback);
267
+ };
268
+ }
269
+ /**
270
+ * Returns a `SpanProcessor` that funnels every OpenTelemetry span the
271
+ * caller's tracer produces into this provider's in-memory store.
272
+ */
273
+ getSpanProcessor() {
274
+ return {
275
+ onStart: (span, _parentContext) => {
276
+ const spanId = span.spanContext().spanId;
277
+ const p = this.handleStart(span).catch(console.error);
278
+ this.spanPromises.set(spanId, p);
279
+ p.finally(() => this.spanPromises.delete(spanId));
280
+ },
281
+ onEnd: (span) => {
282
+ const spanId = span.spanContext().spanId;
283
+ const p = (this.spanPromises.get(spanId) ?? Promise.resolve()).then(() => this.handleEnd(span)).catch(console.error);
284
+ this.spanPromises.set(spanId, p);
285
+ p.finally(() => this.spanPromises.delete(spanId));
286
+ },
287
+ forceFlush: async () => {},
288
+ shutdown: async () => {}
289
+ };
290
+ }
291
+ async handleStart(span) {
292
+ const ctx = span.spanContext();
293
+ const traceId = ctx.traceId;
294
+ const spanId = ctx.spanId;
295
+ const parentCtx = span.parentSpanContext;
296
+ if ((!parentCtx || parentCtx.traceId !== traceId) && !await this.hasTrace(traceId)) {
297
+ const startTime = hrTimeToMs(span.startTime);
298
+ const trace = {
299
+ id: traceId,
300
+ providerId: this.id,
301
+ name: span.name,
302
+ startTime,
303
+ status: "running",
304
+ attributes: { ...span.attributes }
305
+ };
306
+ await this.addOrUpdateTrace(trace);
307
+ this.emitChange({
308
+ type: "add",
309
+ traceId
310
+ });
311
+ }
312
+ const ourSpan = {
313
+ id: spanId,
314
+ traceId,
315
+ parentId: parentCtx && parentCtx.traceId === traceId ? parentCtx.spanId : void 0,
316
+ name: span.name,
317
+ kind: readKind(span.attributes),
318
+ startTime: hrTimeToMs(span.startTime),
319
+ attributes: { ...span.attributes },
320
+ ...llmAndPrompt(span.attributes)
321
+ };
322
+ const stored = await this.addOrUpdateSpan(ourSpan);
323
+ this.emitStream(traceId, {
324
+ type: "span-start",
325
+ span: stored
326
+ });
327
+ }
328
+ async handleEnd(span) {
329
+ const ctx = span.spanContext();
330
+ const traceId = ctx.traceId;
331
+ const ended = {
332
+ id: ctx.spanId,
333
+ traceId,
334
+ parentId: span.parentSpanContext && span.parentSpanContext.traceId === traceId ? span.parentSpanContext.spanId : void 0,
335
+ name: span.name,
336
+ kind: readKind(span.attributes),
337
+ startTime: hrTimeToMs(span.startTime),
338
+ endTime: hrTimeToMs(span.endTime),
339
+ status: mapStatus(span.status),
340
+ errorMessage: span.status.code === SpanStatusCode.ERROR ? span.status.message : void 0,
341
+ attributes: { ...span.attributes },
342
+ ...llmAndPrompt(span.attributes)
343
+ };
344
+ const stored = await this.addOrUpdateSpan(ended);
345
+ this.emitStream(traceId, {
346
+ type: "span-end",
347
+ span: stored
348
+ });
349
+ if (!stored.parentId) {
350
+ const existing = await this.getTraceWithoutSpans(traceId);
351
+ if (existing) {
352
+ const endedTrace = {
353
+ ...existing,
354
+ endTime: stored.endTime,
355
+ status: stored.status === "error" ? "error" : "ok",
356
+ attributes: {
357
+ ...existing.attributes,
358
+ ...stored.attributes
359
+ }
360
+ };
361
+ await this.addOrUpdateTrace(endedTrace);
362
+ this.emitStream(traceId, {
363
+ type: "trace-end",
364
+ trace: endedTrace
365
+ });
366
+ this.emitChange({
367
+ type: "update",
368
+ traceId
369
+ });
370
+ }
371
+ }
372
+ }
373
+ emitStream(traceId, event) {
374
+ const set = this.subscribers.get(traceId);
375
+ if (!set) return;
376
+ for (const cb of set) try {
377
+ cb(event);
378
+ } catch (err) {
379
+ console.error("Trace subscriber threw:", err);
380
+ }
381
+ }
382
+ emitChange(event) {
383
+ for (const cb of this.watchers) try {
384
+ cb(event);
385
+ } catch (err) {
386
+ console.error("Trace watcher threw:", err);
387
+ }
388
+ }
389
+ async drainPendingHandlers() {
390
+ while (this.spanPromises.size > 0) await Promise.all([...this.spanPromises.values()]);
391
+ }
392
+ };
393
+ //#endregion
394
+ //#region src/trace/memory-trace-provider.ts
395
+ /**
396
+ * In-memory {@link TraceProvider} populated by OpenTelemetry spans.
397
+ *
398
+ * Register the processor returned by {@link getSpanProcessor} on a
399
+ * `BasicTracerProvider` (from `@opentelemetry/sdk-trace-base`).
400
+ */
401
+ var MemoryTraceProvider = class extends BaseOTelTraceProvider {
402
+ traces = /* @__PURE__ */ new Map();
403
+ spansByTrace = /* @__PURE__ */ new Map();
404
+ constructor({ id = "memory", displayName = "In-Memory Traces", description = "Stores OpenTelemetry spans in memory for the current process." } = {}) {
405
+ super({
406
+ id,
407
+ displayName,
408
+ description
409
+ });
410
+ }
411
+ async getAllTraces() {
412
+ const summaries = Array.from(this.traces.values()).map((t) => ({
413
+ id: t.id,
414
+ providerId: this.id,
415
+ name: t.name,
416
+ startTime: t.startTime,
417
+ endTime: t.endTime,
418
+ status: t.status,
419
+ spanCount: this.spansByTrace.get(t.id)?.length ?? 0
420
+ }));
421
+ summaries.sort((a, b) => b.startTime - a.startTime);
422
+ return summaries;
423
+ }
424
+ async hasTrace(traceId) {
425
+ return this.traces.has(traceId);
426
+ }
427
+ async getTraceWithoutSpans(traceId) {
428
+ return this.traces.get(traceId);
429
+ }
430
+ async getTraceSpans(traceId) {
431
+ return this.spansByTrace.get(traceId) ?? [];
432
+ }
433
+ async addOrUpdateTrace(trace) {
434
+ this.traces.set(trace.id, trace);
435
+ }
436
+ async addOrUpdateSpan(span) {
437
+ let list = this.spansByTrace.get(span.traceId);
438
+ if (!list) {
439
+ list = [];
440
+ this.spansByTrace.set(span.traceId, list);
441
+ }
442
+ const idx = list.findIndex((s) => s.id === span.id);
443
+ if (idx >= 0) {
444
+ const merged = mergeSpans(list[idx], span);
445
+ list[idx] = merged;
446
+ return merged;
447
+ }
448
+ list.push(span);
449
+ return span;
450
+ }
451
+ };
452
+ //#endregion
453
+ //#region src/shared/setup-task.ts
454
+ /**
455
+ * Shared, dependency-free types describing the onboarding "setup tasks" a user
456
+ * can run to wire up an AI SDK.
457
+ *
458
+ * This module is imported by both the browser client (to render the
459
+ * manual-setup picker) and the server (to define and execute the tasks), so it
460
+ * must stay free of any Node- or DOM-specific imports.
461
+ */
462
+ /** Path, relative to the project root, where evalution looks for its config. */
463
+ const CONFIG_FILE_RELATIVE_PATH = ".evalution/config.ts";
464
+ /**
465
+ * The shell command a run-style step executes. `install_package` steps map to
466
+ * `npm i <package>`; `run_command` steps carry their command verbatim.
467
+ */
468
+ function setupStepCommand(step) {
469
+ return step.kind === "install_package" ? `npm i ${step.package}` : step.command;
470
+ }
471
+ //#endregion
472
+ //#region src/sdk/sdk-adapter.ts
473
+ /**
474
+ * Walk up the directory tree from both `rootDir` and `process.cwd()` looking
475
+ * for `node_modules/<packageName>/<dtsRelPath>`.
476
+ */
477
+ function findPackageDts(packageName, dtsRelPath, rootDir) {
478
+ const seen = /* @__PURE__ */ new Set();
479
+ for (const start of [rootDir, process.cwd()]) {
480
+ let dir = start;
481
+ while (!seen.has(dir)) {
482
+ seen.add(dir);
483
+ const candidate = path.join(dir, "node_modules", packageName, dtsRelPath);
484
+ try {
485
+ fs.accessSync(candidate);
486
+ return candidate;
487
+ } catch {}
488
+ const parent = path.dirname(dir);
489
+ if (parent === dir) break;
490
+ dir = parent;
491
+ }
492
+ }
493
+ return null;
494
+ }
495
+ //#endregion
496
+ //#region src/sdk/vercel-ai-sdk.ts
497
+ const MODEL_KEY = "model";
498
+ const SYSTEM_KEY = "system";
499
+ const MESSAGES_KEY = "messages";
500
+ const RESERVED_KEYS = new Set([
501
+ MODEL_KEY,
502
+ SYSTEM_KEY,
503
+ MESSAGES_KEY
504
+ ]);
505
+ /** The `prompts()` factory from `@evalution/vercel-ai-sdk`. */
506
+ const PROMPTS_HELPER_CALL = {
507
+ callee: "prompts",
508
+ import: {
509
+ name: "prompts",
510
+ from: "@evalution/vercel-ai-sdk"
511
+ }
512
+ };
513
+ /** Build the binding-candidate array for a provider function call. */
514
+ function providerBinding(provider) {
515
+ return [{
516
+ kind: "parameter",
517
+ enclosingCall: PROMPTS_HELPER_CALL
518
+ }, {
519
+ kind: "import",
520
+ spec: {
521
+ name: provider,
522
+ from: `@ai-sdk/${provider}`
523
+ }
524
+ }];
525
+ }
526
+ /** Build a {@link ModelInfo} entry from group, label, provider, and model ID. */
527
+ function model(group, label, provider, modelId) {
528
+ const id = `${provider}/${modelId}`;
529
+ return {
530
+ id,
531
+ label,
532
+ group,
533
+ values: {
534
+ function: {
535
+ kind: "functionCall",
536
+ callee: provider,
537
+ args: [{
538
+ kind: "primitive",
539
+ value: modelId
540
+ }],
541
+ binding: providerBinding(provider)
542
+ },
543
+ string: {
544
+ kind: "primitive",
545
+ value: id
546
+ }
547
+ }
548
+ };
549
+ }
550
+ /** Build a custom-value template entry for a provider (used in `groups.{provider}.customValueTemplates.function`). */
551
+ function customValueTemplate(provider) {
552
+ return {
553
+ kind: "functionCall",
554
+ callee: provider,
555
+ args: [{
556
+ kind: "primitive",
557
+ value: "$input"
558
+ }],
559
+ binding: providerBinding(provider)
560
+ };
561
+ }
562
+ /** Starter contents of `.evalution/config.ts` for the Vercel AI SDK. */
563
+ const CONFIG_FILE_CONTENTS = `import type { EvalutionConfig } from 'evalution';
564
+ import { FilePromptProvider, VercelAISDK } from 'evalution';
565
+
566
+ export default {
567
+ promptProviders: [
568
+ new FilePromptProvider({
569
+ sdk: new VercelAISDK(),
570
+ }),
571
+ ],
572
+ } satisfies EvalutionConfig;
573
+ `;
574
+ /**
575
+ * {@link SDKAdapter} implementation for the
576
+ * [Vercel AI SDK](https://sdk.vercel.ai/).
577
+ *
578
+ * - `getModelParameters` reads `CallSettings` from the SDK's `.d.ts` bundle
579
+ * and surfaces parameters with simple types that can be edited in the UI.
580
+ * - `executeConfig` delegates to `generateText`.
581
+ */
582
+ var VercelAISDK = class {
583
+ promptsHelperImport = PROMPTS_HELPER_CALL.import.from;
584
+ /** Onboarding task: install the SDK package, then drop a starter config. */
585
+ static setupTask = {
586
+ id: "vercel-ai-sdk",
587
+ label: "AI SDK",
588
+ icon: "vercel",
589
+ steps: [
590
+ {
591
+ kind: "install_package",
592
+ id: "install-ai",
593
+ package: "ai"
594
+ },
595
+ {
596
+ kind: "install_package",
597
+ id: "install-evalution-vercel-ai-sdk",
598
+ package: "@evalution/vercel-ai-sdk"
599
+ },
600
+ {
601
+ kind: "create_config",
602
+ id: "create-config",
603
+ path: CONFIG_FILE_RELATIVE_PATH,
604
+ contents: CONFIG_FILE_CONTENTS
605
+ }
606
+ ]
607
+ };
608
+ getModelCatalog() {
609
+ return Promise.resolve({
610
+ modelValueTypes: {
611
+ function: {
612
+ label: "Provider",
613
+ description: "Call provider function (e.g. openai(\"gpt-4o\"))"
614
+ },
615
+ string: {
616
+ label: "Gateway",
617
+ description: "Use a gateway model string (e.g. \"openai/gpt-4o\")"
618
+ }
619
+ },
620
+ groups: {
621
+ OpenAI: { customValueTemplates: { function: customValueTemplate("openai") } },
622
+ Anthropic: { customValueTemplates: { function: customValueTemplate("anthropic") } },
623
+ Google: { customValueTemplates: { function: customValueTemplate("google") } }
624
+ },
625
+ models: [
626
+ model("OpenAI", "GPT-5.5 Pro", "openai", "gpt-5.5-pro"),
627
+ model("OpenAI", "GPT-5.5", "openai", "gpt-5.5"),
628
+ model("OpenAI", "GPT-5.4 Pro", "openai", "gpt-5.4-pro"),
629
+ model("OpenAI", "GPT-5.4", "openai", "gpt-5.4"),
630
+ model("OpenAI", "GPT-5.4 mini", "openai", "gpt-5.4-mini"),
631
+ model("OpenAI", "GPT-5.4 nano", "openai", "gpt-5.4-nano"),
632
+ model("Anthropic", "Claude Opus 4.8", "anthropic", "claude-opus-4-8"),
633
+ model("Anthropic", "Claude Sonnet 4.6", "anthropic", "claude-sonnet-4-6"),
634
+ model("Anthropic", "Claude Haiku 4.5", "anthropic", "claude-haiku-4-5"),
635
+ model("Google", "Gemini 3.5 Flash", "google", "gemini-3.5-flash"),
636
+ model("Google", "Gemini 3.1 Pro Preview", "google", "gemini-3.1-pro-preview"),
637
+ model("Google", "Gemini 3.1 Flash-Lite", "google", "gemini-3.1-flash-lite")
638
+ ]
639
+ });
640
+ }
641
+ getModelParameters(rootDir) {
642
+ const dtsPath = findPackageDts("ai", "dist/index.d.ts", rootDir);
643
+ if (!dtsPath) return [];
644
+ const sourceText = fs.readFileSync(dtsPath, "utf-8");
645
+ const sourceFile = ts.createSourceFile(dtsPath, sourceText, ts.ScriptTarget.Latest, true);
646
+ const decl = findTypeDeclaration(sourceFile, "CallSettings");
647
+ if (!decl) return [];
648
+ return extractPropertiesFromDeclaration(decl, sourceFile).definitions;
649
+ }
650
+ async executeConfig(config, stream) {
651
+ if (stream) return (await streamText(config)).textStream;
652
+ else {
653
+ const result = await generateText(config);
654
+ return {
655
+ text: result.text,
656
+ usage: result.usage,
657
+ finishReason: result.finishReason
658
+ };
659
+ }
660
+ }
661
+ normalizePrompt(prompt) {
662
+ const { definitions, values } = prompt.extractedProps;
663
+ const modelValue = values?.[MODEL_KEY];
664
+ const systemValue = values?.[SYSTEM_KEY];
665
+ const messagesValue = values?.[MESSAGES_KEY];
666
+ const modelParameters = definitions.filter((d) => !RESERVED_KEYS.has(d.name)).map((def) => {
667
+ const value = values?.[def.name];
668
+ return {
669
+ def,
670
+ value,
671
+ editable: value ? isEditable(value) : true
672
+ };
673
+ });
674
+ return {
675
+ id: prompt.id,
676
+ providerId: prompt.providerId,
677
+ globalId: prompt.globalId,
678
+ name: prompt.name,
679
+ functionParameters: prompt.functionParameters,
680
+ metadata: prompt.metadata,
681
+ treePath: prompt.treePath,
682
+ model: modelValue,
683
+ modelEditable: modelValue ? isEditable(modelValue) : true,
684
+ system: systemValue,
685
+ systemEditable: systemValue ? isEditable(systemValue) : true,
686
+ messages: extractMessages(messagesValue),
687
+ messagesEditable: messagesValue ? isEditable(messagesValue) : true,
688
+ modelParameters
689
+ };
690
+ }
691
+ denormalizeUpdates(updates, _currentValues) {
692
+ const out = {};
693
+ if (MODEL_KEY in updates) out[MODEL_KEY] = updates.model ?? null;
694
+ if (SYSTEM_KEY in updates) out[SYSTEM_KEY] = updates.system ?? null;
695
+ if (MESSAGES_KEY in updates) out[MESSAGES_KEY] = updates.messages === null || updates.messages === void 0 ? null : messagesToValue(updates.messages);
696
+ if (updates.modelParameters) for (const [name, value] of Object.entries(updates.modelParameters)) out[name] = value;
697
+ return out;
698
+ }
699
+ };
700
+ function messagesToValue(msgs) {
701
+ return {
702
+ kind: "array",
703
+ elements: msgs.map((msg) => ({
704
+ kind: "object",
705
+ properties: {
706
+ role: {
707
+ kind: "primitive",
708
+ value: msg.role
709
+ },
710
+ content: msg.content
711
+ }
712
+ }))
713
+ };
714
+ }
715
+ const EMPTY_CONTENT = {
716
+ kind: "primitive",
717
+ value: ""
718
+ };
719
+ function extractMessages(value) {
720
+ if (!value) return [];
721
+ if (value.kind !== "array") return [{
722
+ role: "user",
723
+ content: value
724
+ }];
725
+ return value.elements.map((el) => {
726
+ if (el.kind !== "object") return {
727
+ role: "user",
728
+ content: el
729
+ };
730
+ const roleValue = el.properties.role;
731
+ const role = roleValue?.kind === "primitive" ? String(roleValue.value) : "user";
732
+ const content = el.properties.content ?? EMPTY_CONTENT;
733
+ const toolCalls = extractToolCalls(el.properties.toolCalls);
734
+ return toolCalls ? {
735
+ role,
736
+ content,
737
+ toolCalls
738
+ } : {
739
+ role,
740
+ content
741
+ };
742
+ });
743
+ }
744
+ function extractToolCalls(value) {
745
+ if (!value || value.kind !== "array") return void 0;
746
+ const out = [];
747
+ for (const el of value.elements) {
748
+ if (el.kind !== "object") continue;
749
+ const name = el.properties.toolName;
750
+ const args = el.properties.args;
751
+ out.push({
752
+ toolName: name?.kind === "primitive" ? String(name.value) : "",
753
+ args: args?.kind === "primitive" ? String(args.value) : ""
754
+ });
755
+ }
756
+ return out.length > 0 ? out : void 0;
757
+ }
758
+ //#endregion
759
+ export { MemoryTraceProvider as a, PROMPT_PROVIDER_ID_ATTRIBUTE as c, isEditable as d, setupStepCommand as i, SPAN_KIND_ATTRIBUTE as l, findPackageDts as n, PROMPT_ID_ATTRIBUTE as o, CONFIG_FILE_RELATIVE_PATH as r, PROMPT_NAME_ATTRIBUTE as s, VercelAISDK as t, createTracerForPrompt as u };