@llmops/sdk 1.0.0-beta.2 → 1.0.0-beta.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/dist/agents.cjs +1 -1
  2. package/dist/agents.d.cts +1 -1
  3. package/dist/agents.d.mts +1 -1
  4. package/dist/agents.mjs +1 -1
  5. package/dist/chunk-CxwUPGYo.mjs +21 -0
  6. package/dist/constants--ywcWP7q.cjs +18 -0
  7. package/dist/constants-BvnYU_pl.mjs +12 -0
  8. package/dist/eval.cjs +464 -0
  9. package/dist/eval.d.cts +240 -0
  10. package/dist/eval.d.mts +240 -0
  11. package/dist/eval.mjs +461 -0
  12. package/dist/express.cjs +29 -2
  13. package/dist/express.d.cts +7 -3
  14. package/dist/express.d.mts +7 -3
  15. package/dist/express.mjs +28 -1
  16. package/dist/hono.d.cts +2 -2
  17. package/dist/hono.d.mts +2 -2
  18. package/dist/{index-05byZKeu.d.mts → index-BZLzywwb.d.mts} +1 -1
  19. package/dist/{index-Beb26ZNG.d.cts → index-lgspeSNr.d.cts} +1 -1
  20. package/dist/index.cjs +3 -3
  21. package/dist/index.d.cts +4 -4
  22. package/dist/index.d.mts +4 -4
  23. package/dist/index.mjs +3 -3
  24. package/dist/interface-BbAwy96d.d.cts +223 -0
  25. package/dist/interface-Dz7B6QN1.d.mts +223 -0
  26. package/dist/nextjs.d.cts +2 -2
  27. package/dist/nextjs.d.mts +2 -2
  28. package/dist/store/d1.cjs +512 -0
  29. package/dist/store/d1.d.cts +60 -0
  30. package/dist/store/d1.d.mts +60 -0
  31. package/dist/store/d1.mjs +511 -0
  32. package/dist/store/pg.cjs +13634 -6
  33. package/dist/store/pg.d.cts +38 -2
  34. package/dist/store/pg.d.mts +38 -2
  35. package/dist/store/pg.mjs +13618 -2
  36. package/dist/store/sqlite.cjs +541 -0
  37. package/dist/store/sqlite.d.cts +50 -0
  38. package/dist/store/sqlite.d.mts +50 -0
  39. package/dist/store/sqlite.mjs +541 -0
  40. package/dist/types.d.cts +2 -2
  41. package/dist/types.d.mts +2 -2
  42. package/package.json +48 -3
  43. package/dist/express-B-wbCza5.cjs +0 -35
  44. package/dist/express-DMtc0d_Y.mjs +0 -30
  45. package/dist/index-DnWGper4.d.cts +0 -7
  46. package/dist/index-Dvz-L2Hf.d.mts +0 -7
  47. /package/dist/{agents-exporter-vcpgCF69.mjs → agents-exporter-CGxTzDeQ.mjs} +0 -0
  48. /package/dist/{agents-exporter-BZHCcFSd.d.mts → agents-exporter-CehKIArI.d.mts} +0 -0
  49. /package/dist/{agents-exporter-BuTq2n2y.cjs → agents-exporter-DizRE7CQ.cjs} +0 -0
  50. /package/dist/{agents-exporter-uzN3bkth.d.cts → agents-exporter-DkqkCcIx.d.cts} +0 -0
package/dist/eval.mjs ADDED
@@ -0,0 +1,461 @@
1
+ import { randomUUID } from "node:crypto";
2
+ import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
3
+ import { join } from "node:path";
4
+
5
+ //#region src/eval/dataset.ts
6
+ /**
7
+ * Wraps a plain array as an EvaluationDataset.
8
+ */
9
+ var InlineDataset = class {
10
+ constructor(items) {
11
+ this.items = items;
12
+ }
13
+ size() {
14
+ return this.items.length;
15
+ }
16
+ get(index) {
17
+ return this.items[index];
18
+ }
19
+ slice(start, end) {
20
+ return this.items.slice(start, end);
21
+ }
22
+ };
23
+
24
+ //#endregion
25
+ //#region src/eval/evaluate.ts
26
+ const RESET = "\x1B[0m";
27
+ const DIM = "\x1B[2m";
28
+ const BOLD = "\x1B[1m";
29
+ const CYAN = "\x1B[36m";
30
+ const GREEN = "\x1B[32m";
31
+ const RED = "\x1B[31m";
32
+ const YELLOW = "\x1B[33m";
33
+ async function pool(items, concurrency, fn) {
34
+ const executing = [];
35
+ for (const item of items) {
36
+ const p = fn(item).then(() => {
37
+ executing.splice(executing.indexOf(p), 1);
38
+ });
39
+ executing.push(p);
40
+ if (executing.length >= concurrency) await Promise.race(executing);
41
+ }
42
+ await Promise.all(executing);
43
+ }
44
+ function computeStats(values) {
45
+ const valid = values.filter((v) => !Number.isNaN(v));
46
+ if (valid.length === 0) return {
47
+ mean: 0,
48
+ min: 0,
49
+ max: 0,
50
+ median: 0,
51
+ count: 0
52
+ };
53
+ const sorted = [...valid].sort((a, b) => a - b);
54
+ const sum = sorted.reduce((a, b) => a + b, 0);
55
+ const mid = Math.floor(sorted.length / 2);
56
+ const median = sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
57
+ return {
58
+ mean: sum / sorted.length,
59
+ min: sorted[0],
60
+ max: sorted[sorted.length - 1],
61
+ median,
62
+ count: sorted.length
63
+ };
64
+ }
65
+ const isSilent = process.env.LLMOPS_EVAL_OUTPUT === "json";
66
+ const w = process.stderr;
67
+ function printHeader(name, total) {
68
+ if (isSilent) return;
69
+ w.write("\n");
70
+ w.write(` ${BOLD}${name}${RESET} ${DIM}(${total} datapoints)${RESET}\n`);
71
+ w.write(` ${DIM}${"─".repeat(50)}${RESET}\n`);
72
+ }
73
+ function printDatapointResult(idx, total, dp) {
74
+ if (isSilent) return;
75
+ const label = typeof dp.data === "object" && dp.data !== null ? JSON.stringify(dp.data).slice(0, 50) : String(dp.data).slice(0, 50);
76
+ if (dp.error) {
77
+ w.write(` ${RED}✗${RESET} ${DIM}[${idx + 1}/${total}]${RESET} ${label} ${RED}ERROR${RESET} ${DIM}${dp.error.slice(0, 60)}${RESET}\n`);
78
+ return;
79
+ }
80
+ const scoreStr = Object.entries(dp.scores).map(([name, val]) => {
81
+ if (Number.isNaN(val)) return `${DIM}${name}=NaN${RESET}`;
82
+ return `${val >= .8 ? GREEN : val >= .5 ? YELLOW : RED}${name}=${val.toFixed(2)}${RESET}`;
83
+ }).join(" ");
84
+ w.write(` ${GREEN}✓${RESET} ${DIM}[${idx + 1}/${total}]${RESET} ${label} ${scoreStr} ${DIM}${dp.durationMs}ms${RESET}\n`);
85
+ }
86
+ function scoreBar(score, width = 20) {
87
+ const filled = Math.round(score * width);
88
+ const empty = width - filled;
89
+ return "█".repeat(filled) + "░".repeat(empty);
90
+ }
91
+ function scoreColor(score) {
92
+ if (score >= .8) return GREEN;
93
+ if (score >= .5) return YELLOW;
94
+ return RED;
95
+ }
96
+ function printSummary(result) {
97
+ if (isSilent) return;
98
+ w.write("\n");
99
+ const entries = Object.entries(result.scores);
100
+ if (entries.length > 0) {
101
+ const maxNameLen = Math.max(...entries.map(([n]) => n.length), 10);
102
+ w.write(` ${DIM}${"Evaluator".padEnd(maxNameLen)} ${"Mean".padStart(6)} ${"Bar".padEnd(20)} ${"Min".padStart(5)} ${"Max".padStart(5)} ${"Med".padStart(5)}${RESET}\n`);
103
+ w.write(` ${DIM}${"─".repeat(maxNameLen + 50)}${RESET}\n`);
104
+ for (const [name, stats] of entries) {
105
+ const color = scoreColor(stats.mean);
106
+ const bar = scoreBar(stats.mean);
107
+ w.write(` ${name.padEnd(maxNameLen)} ${color}${stats.mean.toFixed(2).padStart(6)}${RESET} ${DIM}${bar}${RESET} ${stats.min.toFixed(2).padStart(5)} ${stats.max.toFixed(2).padStart(5)} ${stats.median.toFixed(2).padStart(5)}\n`);
108
+ }
109
+ }
110
+ const completed = result.count - result.errors;
111
+ w.write("\n");
112
+ w.write(` ${DIM}Duration${RESET} ${(result.durationMs / 1e3).toFixed(1)}s`);
113
+ w.write(` ${DIM}Passed${RESET} ${completed}/${result.count}`);
114
+ if (result.errors > 0) w.write(` ${RED}Failed ${result.errors}${RESET}`);
115
+ w.write(` ${DIM}Run${RESET} ${CYAN}${result.runId.slice(0, 8)}${RESET}`);
116
+ w.write("\n\n");
117
+ }
118
+ function saveResult(result, outputDir) {
119
+ const dir = join(outputDir, result.name);
120
+ mkdirSync(dir, { recursive: true });
121
+ writeFileSync(join(dir, `${Date.now()}.json`), JSON.stringify(result, null, 2));
122
+ }
123
+ async function runSingleExecutor(name, dataset, executor, evaluators, concurrency) {
124
+ const size = await dataset.size();
125
+ const datapoints = await dataset.slice(0, size);
126
+ const results = new Array(datapoints.length);
127
+ const startTime = Date.now();
128
+ printHeader(name, datapoints.length);
129
+ await pool(datapoints, concurrency, async (dp) => {
130
+ const idx = datapoints.indexOf(dp);
131
+ const dpStart = Date.now();
132
+ let output = null;
133
+ let error;
134
+ const scores = {};
135
+ try {
136
+ output = await executor(dp.data);
137
+ } catch (err) {
138
+ error = err instanceof Error ? err.message : String(err);
139
+ }
140
+ if (!error && output !== null) for (const [evalName, evaluator] of Object.entries(evaluators)) try {
141
+ const result = await evaluator(output, dp.target, dp.data);
142
+ if (typeof result === "number") scores[evalName] = result;
143
+ else for (const [subKey, subScore] of Object.entries(result)) scores[`${evalName}.${subKey}`] = subScore;
144
+ } catch (evalErr) {
145
+ scores[evalName] = NaN;
146
+ const msg = evalErr instanceof Error ? evalErr.message : String(evalErr);
147
+ if (!isSilent) w.write(` ${YELLOW}⚠${RESET} ${DIM}evaluator "${evalName}":${RESET} ${msg.slice(0, 80)}\n`);
148
+ }
149
+ const dpResult = {
150
+ data: dp.data,
151
+ target: dp.target,
152
+ metadata: dp.metadata,
153
+ output,
154
+ scores,
155
+ durationMs: Date.now() - dpStart,
156
+ error
157
+ };
158
+ results[idx] = dpResult;
159
+ printDatapointResult(idx, datapoints.length, dpResult);
160
+ });
161
+ return {
162
+ results,
163
+ durationMs: Date.now() - startTime
164
+ };
165
+ }
166
+ async function evaluate(options) {
167
+ const { name, data, executor, variants, evaluators, concurrency = 5, group, metadata, outputDir = process.env.LLMOPS_EVAL_OUTPUT_DIR || "./llmops-evals" } = options;
168
+ const runId = randomUUID();
169
+ if (executor && variants) throw new Error("evaluate(): provide either executor or variants, not both");
170
+ if (!executor && !variants) throw new Error("evaluate(): provide either executor or variants");
171
+ const dataset = Array.isArray(data) ? new InlineDataset(data) : data;
172
+ if (executor) {
173
+ const { results, durationMs } = await runSingleExecutor(name, dataset, executor, evaluators, concurrency);
174
+ const scoreNames = /* @__PURE__ */ new Set();
175
+ for (const r of results) for (const key of Object.keys(r.scores)) scoreNames.add(key);
176
+ const scores = {};
177
+ for (const scoreName of scoreNames) scores[scoreName] = computeStats(results.map((r) => r.scores[scoreName] ?? NaN));
178
+ const result = {
179
+ name,
180
+ runId,
181
+ group,
182
+ scores,
183
+ durationMs,
184
+ count: results.length,
185
+ errors: results.filter((r) => r.error).length,
186
+ metadata,
187
+ results
188
+ };
189
+ if (isSilent) process.stdout.write(JSON.stringify(result, null, 2));
190
+ else printSummary(result);
191
+ saveResult(result, outputDir);
192
+ return result;
193
+ }
194
+ const variantResults = {};
195
+ const totalStart = Date.now();
196
+ for (const [variantName, variantExecutor] of Object.entries(variants)) {
197
+ const { results, durationMs } = await runSingleExecutor(`${name}/${variantName}`, dataset, variantExecutor, evaluators, concurrency);
198
+ const scoreNames = /* @__PURE__ */ new Set();
199
+ for (const r of results) for (const key of Object.keys(r.scores)) scoreNames.add(key);
200
+ const scores = {};
201
+ for (const scoreName of scoreNames) scores[scoreName] = computeStats(results.map((r) => r.scores[scoreName] ?? NaN));
202
+ const variantResult = {
203
+ name: `${name}/${variantName}`,
204
+ runId,
205
+ group,
206
+ scores,
207
+ durationMs,
208
+ count: results.length,
209
+ errors: results.filter((r) => r.error).length,
210
+ metadata,
211
+ results
212
+ };
213
+ variantResults[variantName] = variantResult;
214
+ if (!isSilent) printSummary(variantResult);
215
+ saveResult(variantResult, outputDir);
216
+ }
217
+ const variantEvalResult = {
218
+ name,
219
+ runId,
220
+ group,
221
+ durationMs: Date.now() - totalStart,
222
+ metadata,
223
+ variants: variantResults
224
+ };
225
+ if (isSilent) process.stdout.write(JSON.stringify(variantEvalResult, null, 2));
226
+ return variantEvalResult;
227
+ }
228
+
229
+ //#endregion
230
+ //#region src/eval/compare.ts
231
+ /**
232
+ * Load an eval result from a JSON file.
233
+ */
234
+ function loadResult(filePath) {
235
+ try {
236
+ const content = readFileSync(filePath, "utf-8");
237
+ return JSON.parse(content);
238
+ } catch {
239
+ throw new Error(`Could not read eval result: ${filePath}`);
240
+ }
241
+ }
242
+ /**
243
+ * Compare two eval result files. First file is the baseline.
244
+ *
245
+ * Usage with version control:
246
+ * 1. Run eval → results saved to ./llmops-evals/my-eval.eval.json
247
+ * 2. Commit the file
248
+ * 3. Make changes, re-run eval
249
+ * 4. Compare: git stash the new result, compare old vs new
250
+ *
251
+ * Or compare two named eval files:
252
+ * ```ts
253
+ * const diff = await compare({
254
+ * files: ['./llmops-evals/baseline.eval.json', './llmops-evals/candidate.eval.json'],
255
+ * })
256
+ * ```
257
+ */
258
+ async function compare(options) {
259
+ const { files } = options;
260
+ const baselineRun = loadResult(files[0]);
261
+ const candidateRun = loadResult(files[1]);
262
+ const allScoreNames = new Set([...Object.keys(baselineRun.scores), ...Object.keys(candidateRun.scores)]);
263
+ const scores = {};
264
+ for (const scoreName of allScoreNames) {
265
+ const baselineMean = baselineRun.scores[scoreName]?.mean ?? 0;
266
+ const candidateMean = candidateRun.scores[scoreName]?.mean ?? 0;
267
+ scores[scoreName] = {
268
+ baseline: baselineMean,
269
+ candidate: candidateMean,
270
+ delta: candidateMean - baselineMean
271
+ };
272
+ }
273
+ const regressions = [];
274
+ const improvements = [];
275
+ const minLen = Math.min(baselineRun.results.length, candidateRun.results.length);
276
+ for (let i = 0; i < minLen; i++) {
277
+ const baselineResult = baselineRun.results[i];
278
+ const candidateResult = candidateRun.results[i];
279
+ for (const scoreName of allScoreNames) {
280
+ const baselineScore = baselineResult.scores[scoreName] ?? NaN;
281
+ const candidateScore = candidateResult.scores[scoreName] ?? NaN;
282
+ if (Number.isNaN(baselineScore) || Number.isNaN(candidateScore)) continue;
283
+ if (candidateScore < baselineScore) regressions.push({
284
+ data: baselineResult.data,
285
+ evaluator: scoreName,
286
+ baselineScore,
287
+ candidateScore
288
+ });
289
+ else if (candidateScore > baselineScore) improvements.push({
290
+ data: baselineResult.data,
291
+ evaluator: scoreName,
292
+ baselineScore,
293
+ candidateScore
294
+ });
295
+ }
296
+ }
297
+ const result = {
298
+ baseline: baselineRun.runId,
299
+ candidate: candidateRun.runId,
300
+ scores,
301
+ regressions,
302
+ improvements
303
+ };
304
+ const w$1 = process.stderr;
305
+ const RESET$1 = "\x1B[0m";
306
+ const DIM$1 = "\x1B[2m";
307
+ const BOLD$1 = "\x1B[1m";
308
+ const GREEN$1 = "\x1B[32m";
309
+ const RED$1 = "\x1B[31m";
310
+ const CYAN$1 = "\x1B[36m";
311
+ w$1.write("\n");
312
+ w$1.write(` ${BOLD$1}Compare${RESET$1} ${DIM$1}${baselineRun.name} → ${candidateRun.name}${RESET$1}\n`);
313
+ w$1.write(` ${DIM$1}${"─".repeat(50)}${RESET$1}\n\n`);
314
+ const scoreEntries = Object.entries(scores);
315
+ if (scoreEntries.length > 0) {
316
+ const maxNameLen = Math.max(...scoreEntries.map(([n]) => n.length), 10);
317
+ w$1.write(` ${DIM$1}${"Evaluator".padEnd(maxNameLen)} ${"Base".padStart(6)} ${"New".padStart(6)} ${"Delta".padStart(7)}${RESET$1}\n`);
318
+ w$1.write(` ${DIM$1}${"─".repeat(maxNameLen + 30)}${RESET$1}\n`);
319
+ for (const [scoreName, delta] of scoreEntries) {
320
+ const sign = delta.delta >= 0 ? "+" : "";
321
+ const color = delta.delta >= 0 ? GREEN$1 : RED$1;
322
+ const icon = delta.delta > 0 ? "▲" : delta.delta < 0 ? "▼" : "=";
323
+ w$1.write(` ${scoreName.padEnd(maxNameLen)} ${delta.baseline.toFixed(2).padStart(6)} ${DIM$1}→${RESET$1} ${delta.candidate.toFixed(2).padStart(6)} ${color}${sign}${delta.delta.toFixed(2).padStart(5)} ${icon}${RESET$1}\n`);
324
+ }
325
+ w$1.write("\n");
326
+ }
327
+ if (regressions.length > 0) {
328
+ w$1.write(` ${RED$1}▼ ${regressions.length} regression${regressions.length > 1 ? "s" : ""}${RESET$1}\n`);
329
+ for (const r of regressions.slice(0, 5)) {
330
+ const dataStr = typeof r.data === "string" ? r.data : JSON.stringify(r.data).slice(0, 50);
331
+ w$1.write(` ${DIM$1}${dataStr}${RESET$1} ${r.evaluator}: ${r.baselineScore.toFixed(2)} → ${RED$1}${r.candidateScore.toFixed(2)}${RESET$1}\n`);
332
+ }
333
+ if (regressions.length > 5) w$1.write(` ${DIM$1}... and ${regressions.length - 5} more${RESET$1}\n`);
334
+ w$1.write("\n");
335
+ }
336
+ if (improvements.length > 0) {
337
+ w$1.write(` ${GREEN$1}▲ ${improvements.length} improvement${improvements.length > 1 ? "s" : ""}${RESET$1}\n`);
338
+ for (const imp of improvements.slice(0, 5)) {
339
+ const dataStr = typeof imp.data === "string" ? imp.data : JSON.stringify(imp.data).slice(0, 50);
340
+ w$1.write(` ${DIM$1}${dataStr}${RESET$1} ${imp.evaluator}: ${imp.baselineScore.toFixed(2)} → ${GREEN$1}${imp.candidateScore.toFixed(2)}${RESET$1}\n`);
341
+ }
342
+ if (improvements.length > 5) w$1.write(` ${DIM$1}... and ${improvements.length - 5} more${RESET$1}\n`);
343
+ w$1.write("\n");
344
+ }
345
+ if (regressions.length === 0 && improvements.length === 0) w$1.write(` ${CYAN$1}No changes between runs${RESET$1}\n\n`);
346
+ return result;
347
+ }
348
+
349
+ //#endregion
350
+ //#region src/eval/judge.ts
351
+ function interpolate(template, vars) {
352
+ return template.replace(/\{\{(\w+(?:\.\w+)*)\}\}/g, (_, path) => {
353
+ const value = path.split(".").reduce((obj, key) => obj?.[key], vars);
354
+ if (value === void 0 || value === null) return "";
355
+ return typeof value === "string" ? value : JSON.stringify(value, null, 2);
356
+ });
357
+ }
358
+ function buildVars(output, target, data) {
359
+ const vars = {
360
+ output: typeof output === "string" ? output : JSON.stringify(output, null, 2),
361
+ target: typeof target === "string" ? target : JSON.stringify(target, null, 2),
362
+ data: typeof data === "string" ? data : JSON.stringify(data, null, 2)
363
+ };
364
+ if (target && typeof target === "object") for (const [k, v] of Object.entries(target)) vars[`target.${k}`] = v;
365
+ if (data && typeof data === "object") for (const [k, v] of Object.entries(data)) vars[`data.${k}`] = v;
366
+ return vars;
367
+ }
368
+ const DEFAULT_SYSTEM = `You are an expert evaluator. Your job is to grade an AI system's output.
369
+
370
+ Instructions:
371
+ - Read the grading criteria in the user message carefully.
372
+ - Evaluate the output objectively.
373
+ - Return ONLY valid JSON. No markdown, no explanation outside the JSON.
374
+ - The JSON must contain a "score" field with a number between 0.0 and 1.0.
375
+ - You may optionally include a "reasoning" field with a brief explanation.
376
+
377
+ Example response:
378
+ {"score": 0.85, "reasoning": "The response is mostly accurate but misses one detail."}`;
379
+ function defaultParse(response) {
380
+ let cleaned = response.replace(/```(?:json)?\n?/g, "").replace(/```$/g, "").trim();
381
+ const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
382
+ if (jsonMatch) cleaned = jsonMatch[0];
383
+ const parsed = JSON.parse(cleaned);
384
+ if (typeof parsed?.score === "number") return Math.max(0, Math.min(1, parsed.score));
385
+ if (typeof parsed === "number") return Math.max(0, Math.min(1, parsed));
386
+ if (typeof parsed === "object" && parsed !== null) {
387
+ const entries = Object.entries(parsed).filter(([k, v]) => typeof v === "number" && k !== "reasoning");
388
+ if (entries.length > 0) return Object.fromEntries(entries.map(([k, v]) => [k, Math.max(0, Math.min(1, v))]));
389
+ }
390
+ throw new Error(`Could not extract score from judge response: ${response.slice(0, 300)}`);
391
+ }
392
+ async function callLLM(client, model, system, userMessage, temperature) {
393
+ const providerConfig = client.provider();
394
+ const response = await providerConfig.fetch(`${providerConfig.baseURL}/chat/completions`, {
395
+ method: "POST",
396
+ headers: {
397
+ "Content-Type": "application/json",
398
+ Authorization: `Bearer ${providerConfig.apiKey}`
399
+ },
400
+ body: JSON.stringify({
401
+ model,
402
+ temperature,
403
+ messages: [{
404
+ role: "system",
405
+ content: system
406
+ }, {
407
+ role: "user",
408
+ content: userMessage
409
+ }]
410
+ })
411
+ });
412
+ if (!response.ok) {
413
+ const errorBody = await response.text().catch(() => "unknown error");
414
+ throw new Error(`Judge LLM call failed (${response.status}): ${errorBody.slice(0, 300)}`);
415
+ }
416
+ const content = (await response.json()).choices?.[0]?.message?.content;
417
+ if (!content) throw new Error("Judge LLM returned empty response");
418
+ return content;
419
+ }
420
+ /**
421
+ * Factory that returns an Evaluator which uses an LLM to score output.
422
+ *
423
+ * The judge:
424
+ * - Uses a system message that instructs the LLM to return JSON scores
425
+ * - Interpolates {{output}}, {{target}}, {{data}} and their fields in the prompt
426
+ * - Uses temperature 0 by default for deterministic scoring
427
+ * - Retries on parse failure (configurable)
428
+ * - Clamps scores to [0, 1]
429
+ *
430
+ * Usage:
431
+ * ```ts
432
+ * import { llmops } from '@llmops/sdk'
433
+ *
434
+ * const client = llmops()
435
+ * const accuracy = judgeScorer({
436
+ * model: '@openai/gpt-4o',
437
+ * prompt: `Rate the accuracy of this response.
438
+ * Expected: {{target.answer}}
439
+ * Actual: {{output}}`,
440
+ * client,
441
+ * })
442
+ * ```
443
+ */
444
+ function judgeScorer(options) {
445
+ const { model, prompt, client, system = DEFAULT_SYSTEM, temperature = 0, maxRetries = 1, parse = defaultParse } = options;
446
+ return async (output, target, data) => {
447
+ const userMessage = interpolate(prompt, buildVars(output, target, data));
448
+ let lastError = null;
449
+ const attempts = 1 + maxRetries;
450
+ for (let attempt = 0; attempt < attempts; attempt++) try {
451
+ return parse(await callLLM(client, model, system, userMessage, temperature));
452
+ } catch (err) {
453
+ lastError = err instanceof Error ? err : new Error(String(err));
454
+ if (lastError.message.includes("Judge LLM call failed")) throw lastError;
455
+ }
456
+ throw lastError ?? /* @__PURE__ */ new Error("Judge scoring failed");
457
+ };
458
+ }
459
+
460
+ //#endregion
461
+ export { InlineDataset, compare, evaluate, judgeScorer };
package/dist/express.cjs CHANGED
@@ -1,3 +1,30 @@
1
- const require_express = require('./express-B-wbCza5.cjs');
1
+ let node_stream = require("node:stream");
2
2
 
3
- exports.createLLMOpsMiddleware = require_express.createLLMOpsMiddleware;
3
+ //#region src/lib/express/index.ts
4
+ function createLLMOpsMiddleware(client) {
5
+ const basePath = client.config.basePath;
6
+ return async (req, res, next) => {
7
+ let urlPath = req.originalUrl;
8
+ if (basePath && urlPath.startsWith(basePath)) urlPath = urlPath.slice(basePath.length) || "/";
9
+ const url = new URL(urlPath, `${req.protocol}://${req.get("host")}`);
10
+ const request = new Request(url, {
11
+ method: req.method,
12
+ headers: req.headers,
13
+ body: ["GET", "HEAD"].includes(req.method) ? void 0 : JSON.stringify(req.body)
14
+ });
15
+ const response = await client.handler(request);
16
+ if (response.status === 404) return next();
17
+ response.headers?.forEach((value, key) => {
18
+ res.setHeader(key, value);
19
+ });
20
+ res.status(response.status);
21
+ if ((response.headers?.get("content-type"))?.includes("text/event-stream") && response.body) node_stream.Readable.fromWeb(response.body).pipe(res);
22
+ else {
23
+ const body = await response.text();
24
+ res.send(body);
25
+ }
26
+ };
27
+ }
28
+
29
+ //#endregion
30
+ exports.createLLMOpsMiddleware = createLLMOpsMiddleware;
@@ -1,4 +1,8 @@
1
- import "./agents-exporter-uzN3bkth.cjs";
2
- import "./index-Beb26ZNG.cjs";
3
- import { t as createLLMOpsMiddleware } from "./index-DnWGper4.cjs";
1
+ import "./agents-exporter-DkqkCcIx.cjs";
2
+ import { t as LLMOpsClient } from "./index-lgspeSNr.cjs";
3
+ import { NextFunction, Request, Response } from "express";
4
+
5
+ //#region src/lib/express/index.d.ts
6
+ declare function createLLMOpsMiddleware(client: LLMOpsClient): (req: Request, res: Response, next: NextFunction) => Promise<void>;
7
+ //#endregion
4
8
  export { createLLMOpsMiddleware };
@@ -1,4 +1,8 @@
1
- import "./agents-exporter-BZHCcFSd.mjs";
2
- import "./index-05byZKeu.mjs";
3
- import { t as createLLMOpsMiddleware } from "./index-Dvz-L2Hf.mjs";
1
+ import "./agents-exporter-CehKIArI.mjs";
2
+ import { t as LLMOpsClient } from "./index-BZLzywwb.mjs";
3
+ import { NextFunction, Request, Response } from "express";
4
+
5
+ //#region src/lib/express/index.d.ts
6
+ declare function createLLMOpsMiddleware(client: LLMOpsClient): (req: Request, res: Response, next: NextFunction) => Promise<void>;
7
+ //#endregion
4
8
  export { createLLMOpsMiddleware };
package/dist/express.mjs CHANGED
@@ -1,3 +1,30 @@
1
- import { t as createLLMOpsMiddleware } from "./express-DMtc0d_Y.mjs";
1
+ import { Readable } from "node:stream";
2
2
 
3
+ //#region src/lib/express/index.ts
4
+ function createLLMOpsMiddleware(client) {
5
+ const basePath = client.config.basePath;
6
+ return async (req, res, next) => {
7
+ let urlPath = req.originalUrl;
8
+ if (basePath && urlPath.startsWith(basePath)) urlPath = urlPath.slice(basePath.length) || "/";
9
+ const url = new URL(urlPath, `${req.protocol}://${req.get("host")}`);
10
+ const request = new Request(url, {
11
+ method: req.method,
12
+ headers: req.headers,
13
+ body: ["GET", "HEAD"].includes(req.method) ? void 0 : JSON.stringify(req.body)
14
+ });
15
+ const response = await client.handler(request);
16
+ if (response.status === 404) return next();
17
+ response.headers?.forEach((value, key) => {
18
+ res.setHeader(key, value);
19
+ });
20
+ res.status(response.status);
21
+ if ((response.headers?.get("content-type"))?.includes("text/event-stream") && response.body) Readable.fromWeb(response.body).pipe(res);
22
+ else {
23
+ const body = await response.text();
24
+ res.send(body);
25
+ }
26
+ };
27
+ }
28
+
29
+ //#endregion
3
30
  export { createLLMOpsMiddleware };
package/dist/hono.d.cts CHANGED
@@ -1,5 +1,5 @@
1
- import "./agents-exporter-uzN3bkth.cjs";
2
- import { t as LLMOpsClient } from "./index-Beb26ZNG.cjs";
1
+ import "./agents-exporter-DkqkCcIx.cjs";
2
+ import { t as LLMOpsClient } from "./index-lgspeSNr.cjs";
3
3
  import { MiddlewareHandler } from "hono";
4
4
 
5
5
  //#region src/lib/hono/index.d.ts
package/dist/hono.d.mts CHANGED
@@ -1,5 +1,5 @@
1
- import "./agents-exporter-BZHCcFSd.mjs";
2
- import { t as LLMOpsClient } from "./index-05byZKeu.mjs";
1
+ import "./agents-exporter-CehKIArI.mjs";
2
+ import { t as LLMOpsClient } from "./index-BZLzywwb.mjs";
3
3
  import { MiddlewareHandler } from "hono";
4
4
 
5
5
  //#region src/lib/hono/index.d.ts
@@ -1,4 +1,4 @@
1
- import { a as AgentsTracingExporter } from "./agents-exporter-BZHCcFSd.mjs";
1
+ import { a as AgentsTracingExporter } from "./agents-exporter-CehKIArI.mjs";
2
2
  import { LLMOpsConfig, ValidatedLLMOpsConfig } from "@llmops/core";
3
3
 
4
4
  //#region src/telemetry/langchain-client.d.ts
@@ -1,4 +1,4 @@
1
- import { a as AgentsTracingExporter } from "./agents-exporter-uzN3bkth.cjs";
1
+ import { a as AgentsTracingExporter } from "./agents-exporter-DkqkCcIx.cjs";
2
2
  import { LLMOpsConfig, ValidatedLLMOpsConfig } from "@llmops/core";
3
3
 
4
4
  //#region src/telemetry/langchain-client.d.ts
package/dist/index.cjs CHANGED
@@ -1,5 +1,5 @@
1
- const require_express = require('./express-B-wbCza5.cjs');
2
- const require_agents_exporter = require('./agents-exporter-BuTq2n2y.cjs');
1
+ const require_agents_exporter = require('./agents-exporter-DizRE7CQ.cjs');
2
+ const require_constants = require('./constants--ywcWP7q.cjs');
3
3
  let __llmops_core = require("@llmops/core");
4
4
  let __llmops_app = require("@llmops/app");
5
5
 
@@ -229,8 +229,8 @@ function createLLMOpsSpanExporter(config) {
229
229
  }
230
230
 
231
231
  //#endregion
232
+ exports.COST_SUMMARY_GROUP_BY = require_constants.COST_SUMMARY_GROUP_BY;
232
233
  exports.createLLMOpsAgentsExporter = require_agents_exporter.createLLMOpsAgentsExporter;
233
234
  exports.createLLMOpsLangChainClient = createLLMOpsLangChainClient;
234
- exports.createLLMOpsMiddleware = require_express.createLLMOpsMiddleware;
235
235
  exports.createLLMOpsSpanExporter = createLLMOpsSpanExporter;
236
236
  exports.llmops = createLLMOps;
package/dist/index.d.cts CHANGED
@@ -1,6 +1,6 @@
1
- import { a as AgentsTracingExporter, o as LLMOpsAgentsExporterConfig, s as createLLMOpsAgentsExporter } from "./agents-exporter-uzN3bkth.cjs";
2
- import { a as LLMOpsLangChainClientConfig, i as createLLMOps, n as ProviderOptions, o as LangChainTracingClient, r as TraceContext, s as createLLMOpsLangChainClient, t as LLMOpsClient } from "./index-Beb26ZNG.cjs";
3
- import { t as createLLMOpsMiddleware } from "./index-DnWGper4.cjs";
1
+ import { a as AgentsTracingExporter, o as LLMOpsAgentsExporterConfig, s as createLLMOpsAgentsExporter } from "./agents-exporter-DkqkCcIx.cjs";
2
+ import { a as LLMOpsLangChainClientConfig, i as createLLMOps, n as ProviderOptions, o as LangChainTracingClient, r as TraceContext, s as createLLMOpsLangChainClient, t as LLMOpsClient } from "./index-lgspeSNr.cjs";
3
+ import { a as TraceUpsert, i as SpanInsert, n as LLMRequestInsert, o as COST_SUMMARY_GROUP_BY, r as SpanEventInsert, s as CostSummaryGroupBy, t as TelemetryStore } from "./interface-BbAwy96d.cjs";
4
4
 
5
5
  //#region src/telemetry/exporter.d.ts
6
6
 
@@ -92,4 +92,4 @@ interface LLMOpsExporterConfig {
92
92
  */
93
93
  declare function createLLMOpsSpanExporter(config: LLMOpsExporterConfig): SpanExporter;
94
94
  //#endregion
95
- export { type AgentsTracingExporter, type LLMOpsAgentsExporterConfig, type LLMOpsClient, type LLMOpsExporterConfig, type LLMOpsLangChainClientConfig, type LangChainTracingClient, type ProviderOptions, type SpanExporter, type TraceContext, createLLMOpsAgentsExporter, createLLMOpsLangChainClient, createLLMOpsMiddleware, createLLMOpsSpanExporter, createLLMOps as llmops };
95
+ export { type AgentsTracingExporter, COST_SUMMARY_GROUP_BY, type CostSummaryGroupBy, type LLMOpsAgentsExporterConfig, type LLMOpsClient, type LLMOpsExporterConfig, type LLMOpsLangChainClientConfig, type LLMRequestInsert, type LangChainTracingClient, type ProviderOptions, type SpanEventInsert, type SpanExporter, type SpanInsert, type TelemetryStore, type TraceContext, type TraceUpsert, createLLMOpsAgentsExporter, createLLMOpsLangChainClient, createLLMOpsSpanExporter, createLLMOps as llmops };
package/dist/index.d.mts CHANGED
@@ -1,6 +1,6 @@
1
- import { a as AgentsTracingExporter, o as LLMOpsAgentsExporterConfig, s as createLLMOpsAgentsExporter } from "./agents-exporter-BZHCcFSd.mjs";
2
- import { a as LLMOpsLangChainClientConfig, i as createLLMOps, n as ProviderOptions, o as LangChainTracingClient, r as TraceContext, s as createLLMOpsLangChainClient, t as LLMOpsClient } from "./index-05byZKeu.mjs";
3
- import { t as createLLMOpsMiddleware } from "./index-Dvz-L2Hf.mjs";
1
+ import { a as AgentsTracingExporter, o as LLMOpsAgentsExporterConfig, s as createLLMOpsAgentsExporter } from "./agents-exporter-CehKIArI.mjs";
2
+ import { a as LLMOpsLangChainClientConfig, i as createLLMOps, n as ProviderOptions, o as LangChainTracingClient, r as TraceContext, s as createLLMOpsLangChainClient, t as LLMOpsClient } from "./index-BZLzywwb.mjs";
3
+ import { a as TraceUpsert, i as SpanInsert, n as LLMRequestInsert, o as COST_SUMMARY_GROUP_BY, r as SpanEventInsert, s as CostSummaryGroupBy, t as TelemetryStore } from "./interface-Dz7B6QN1.mjs";
4
4
 
5
5
  //#region src/telemetry/exporter.d.ts
6
6
 
@@ -92,4 +92,4 @@ interface LLMOpsExporterConfig {
92
92
  */
93
93
  declare function createLLMOpsSpanExporter(config: LLMOpsExporterConfig): SpanExporter;
94
94
  //#endregion
95
- export { type AgentsTracingExporter, type LLMOpsAgentsExporterConfig, type LLMOpsClient, type LLMOpsExporterConfig, type LLMOpsLangChainClientConfig, type LangChainTracingClient, type ProviderOptions, type SpanExporter, type TraceContext, createLLMOpsAgentsExporter, createLLMOpsLangChainClient, createLLMOpsMiddleware, createLLMOpsSpanExporter, createLLMOps as llmops };
95
+ export { type AgentsTracingExporter, COST_SUMMARY_GROUP_BY, type CostSummaryGroupBy, type LLMOpsAgentsExporterConfig, type LLMOpsClient, type LLMOpsExporterConfig, type LLMOpsLangChainClientConfig, type LLMRequestInsert, type LangChainTracingClient, type ProviderOptions, type SpanEventInsert, type SpanExporter, type SpanInsert, type TelemetryStore, type TraceContext, type TraceUpsert, createLLMOpsAgentsExporter, createLLMOpsLangChainClient, createLLMOpsSpanExporter, createLLMOps as llmops };
package/dist/index.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { t as createLLMOpsMiddleware } from "./express-DMtc0d_Y.mjs";
2
- import { t as createLLMOpsAgentsExporter } from "./agents-exporter-vcpgCF69.mjs";
1
+ import { t as createLLMOpsAgentsExporter } from "./agents-exporter-CGxTzDeQ.mjs";
2
+ import { t as COST_SUMMARY_GROUP_BY } from "./constants-BvnYU_pl.mjs";
3
3
  import { LLMOPS_INTERNAL_HEADER, LLMOPS_SPAN_NAME_HEADER, LLMOPS_TRACE_ID_HEADER, LLMOPS_TRACE_NAME_HEADER } from "@llmops/core";
4
4
  import { createApp } from "@llmops/app";
5
5
 
@@ -229,4 +229,4 @@ function createLLMOpsSpanExporter(config) {
229
229
  }
230
230
 
231
231
  //#endregion
232
- export { createLLMOpsAgentsExporter, createLLMOpsLangChainClient, createLLMOpsMiddleware, createLLMOpsSpanExporter, createLLMOps as llmops };
232
+ export { COST_SUMMARY_GROUP_BY, createLLMOpsAgentsExporter, createLLMOpsLangChainClient, createLLMOpsSpanExporter, createLLMOps as llmops };