@axiom-lattice/gateway 2.1.72 → 2.1.74
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +8 -8
- package/CHANGELOG.md +21 -0
- package/dist/index.js +624 -44
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +605 -25
- package/dist/index.mjs.map +1 -1
- package/package.json +6 -5
- package/src/controllers/eval.ts +469 -0
- package/src/controllers/workflow-tracking.ts +3 -1
- package/src/routes/index.ts +3 -0
- package/src/services/eval-runner.ts +228 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
import { EventEmitter } from "events";
|
|
2
|
+
import { getStoreLattice, modelLatticeManager } from "@axiom-lattice/core";
|
|
3
|
+
import { LatticeEvalProject } from "@axiom-lattice/agent-eval";
|
|
4
|
+
import type { EvalStore } from "@axiom-lattice/protocols";
|
|
5
|
+
import type {
|
|
6
|
+
CaseRunResult,
|
|
7
|
+
LatticeEvalBatchReport,
|
|
8
|
+
LatticeEvalProjectType,
|
|
9
|
+
LatticeEvalSuiteType,
|
|
10
|
+
LatticeEvalLogEvent,
|
|
11
|
+
OutputType,
|
|
12
|
+
} from "@axiom-lattice/agent-eval";
|
|
13
|
+
import { v4 as uuidv4 } from "uuid";
|
|
14
|
+
|
|
15
|
+
export interface EvalStreamEvent {
|
|
16
|
+
type: "progress" | "completed" | "error";
|
|
17
|
+
runId: string;
|
|
18
|
+
data: Record<string, unknown>;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
interface RunContext {
|
|
22
|
+
runId: string;
|
|
23
|
+
projectId: string;
|
|
24
|
+
tenantId: string;
|
|
25
|
+
abortController: AbortController;
|
|
26
|
+
promise: Promise<{ report: LatticeEvalBatchReport }>;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function mapLogs(logs: LatticeEvalLogEvent[]): Array<{ timestamp: string; level: string; message: string; data?: unknown }> {
|
|
30
|
+
return logs.map((l) => ({
|
|
31
|
+
timestamp: l.ts,
|
|
32
|
+
level: l.level,
|
|
33
|
+
message: l.message,
|
|
34
|
+
data: l.data,
|
|
35
|
+
}));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
class EvalRunner {
|
|
39
|
+
private runs = new Map<string, RunContext>();
|
|
40
|
+
private eventEmitter = new EventEmitter();
|
|
41
|
+
|
|
42
|
+
getEventEmitter(): EventEmitter {
|
|
43
|
+
return this.eventEmitter;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async startRun(tenantId: string, projectId: string): Promise<string> {
|
|
47
|
+
const store = this.getEvalStore();
|
|
48
|
+
const project = await store.getProjectById(tenantId, projectId);
|
|
49
|
+
if (!project) throw new Error("Project not found");
|
|
50
|
+
|
|
51
|
+
const existingRuns = await store.getRunsByTenant(tenantId, { projectId, status: "running" });
|
|
52
|
+
if (existingRuns.length > 0) {
|
|
53
|
+
throw new Error("A run is already in progress for this project");
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const suites = await store.getSuitesByProject(tenantId, projectId);
|
|
57
|
+
const evalSuites: LatticeEvalSuiteType[] = [];
|
|
58
|
+
let totalCases = 0;
|
|
59
|
+
|
|
60
|
+
for (const suite of suites) {
|
|
61
|
+
const cases = await store.getCasesBySuite(tenantId, suite.id);
|
|
62
|
+
totalCases += cases.length;
|
|
63
|
+
evalSuites.push({
|
|
64
|
+
suiteName: suite.name,
|
|
65
|
+
cases: cases.map((c) => ({
|
|
66
|
+
caseId: c.id,
|
|
67
|
+
input: { message: c.inputMessage, files: c.inputFiles },
|
|
68
|
+
steps: c.steps as Array<{ agent_id: string; override_message?: string }>,
|
|
69
|
+
output: { type: c.outputType } as OutputType,
|
|
70
|
+
eval: {
|
|
71
|
+
content_assertion: c.contentAssertion,
|
|
72
|
+
eval_rubrics: c.rubrics?.map((r) => ({
|
|
73
|
+
dimension: r.name,
|
|
74
|
+
weight: r.weight,
|
|
75
|
+
description: r.description,
|
|
76
|
+
})),
|
|
77
|
+
},
|
|
78
|
+
})),
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const runId = uuidv4();
|
|
83
|
+
const concurrency = (project as unknown as Record<string, unknown>).concurrency as number || 3;
|
|
84
|
+
await store.createRun(tenantId, projectId, runId, { totalCases, concurrency });
|
|
85
|
+
|
|
86
|
+
const judgeCfg = project.judgeModelConfig as Record<string, unknown>;
|
|
87
|
+
const hasModelKey = Boolean(judgeCfg.modelKey);
|
|
88
|
+
const hasApiKey = Boolean(judgeCfg.apiKeyEnvName || judgeCfg.apiKey);
|
|
89
|
+
const hasCredentials = hasApiKey;
|
|
90
|
+
|
|
91
|
+
let judgeModelConfig: { modelKey?: string; model?: any } = {};
|
|
92
|
+
|
|
93
|
+
if (hasModelKey) {
|
|
94
|
+
judgeModelConfig = { modelKey: judgeCfg.modelKey as string };
|
|
95
|
+
} else if (!hasCredentials) {
|
|
96
|
+
const firstModel = modelLatticeManager.getAllLattices()[0];
|
|
97
|
+
if (firstModel) {
|
|
98
|
+
judgeModelConfig = { modelKey: firstModel.key };
|
|
99
|
+
} else {
|
|
100
|
+
judgeModelConfig = { model: judgeCfg as any };
|
|
101
|
+
}
|
|
102
|
+
} else {
|
|
103
|
+
judgeModelConfig = { model: judgeCfg as any };
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const projectConfig: LatticeEvalProjectType = {
|
|
107
|
+
projectName: project.name,
|
|
108
|
+
version: project.version,
|
|
109
|
+
description: project.description,
|
|
110
|
+
suites: evalSuites,
|
|
111
|
+
judge_agent_config: judgeModelConfig,
|
|
112
|
+
lattice_server_config: {
|
|
113
|
+
base_url: (project.targetServerConfig as Record<string, string>).base_url || "",
|
|
114
|
+
api_key: (project.targetServerConfig as Record<string, string>).api_key || "",
|
|
115
|
+
tenant_id: tenantId,
|
|
116
|
+
},
|
|
117
|
+
concurrency,
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
const abortController = new AbortController();
|
|
121
|
+
const stats = { passed: 0, failed: 0, totalScore: 0 };
|
|
122
|
+
|
|
123
|
+
const onCaseComplete = async (result: CaseRunResult, suiteName: string): Promise<void> => {
|
|
124
|
+
const passed = result.result?.pass ?? false;
|
|
125
|
+
const score = result.result?.final_score ?? 0;
|
|
126
|
+
|
|
127
|
+
if (passed) stats.passed++; else stats.failed++;
|
|
128
|
+
stats.totalScore += score;
|
|
129
|
+
|
|
130
|
+
const completedCount = stats.passed + stats.failed;
|
|
131
|
+
|
|
132
|
+
await store.createRunResult(tenantId, runId, uuidv4(), {
|
|
133
|
+
suiteName,
|
|
134
|
+
caseId: result.caseId,
|
|
135
|
+
pass: passed,
|
|
136
|
+
score,
|
|
137
|
+
summary: result.result?.summary,
|
|
138
|
+
dimensionResults: result.result?.dimension_results?.map((d) => ({
|
|
139
|
+
name: d.name,
|
|
140
|
+
score: d.score,
|
|
141
|
+
reason: d.reason,
|
|
142
|
+
})),
|
|
143
|
+
durationMs: result.duration_ms,
|
|
144
|
+
messages: result.messages,
|
|
145
|
+
logs: mapLogs(result.logs),
|
|
146
|
+
error: result.error,
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
const avgScore = completedCount > 0 ? stats.totalScore / completedCount : 0;
|
|
150
|
+
await store.updateRunStatus(tenantId, runId, {
|
|
151
|
+
passedCases: stats.passed,
|
|
152
|
+
failedCases: stats.failed,
|
|
153
|
+
avgScore,
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
this.eventEmitter.emit(`run:${runId}`, {
|
|
157
|
+
type: "progress",
|
|
158
|
+
runId,
|
|
159
|
+
data: { suiteName, caseId: result.caseId, pass: passed, score, completed: completedCount, total: totalCases },
|
|
160
|
+
} satisfies EvalStreamEvent);
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
const runPromise = (async () => {
|
|
164
|
+
try {
|
|
165
|
+
const evalProject = new LatticeEvalProject(projectConfig, onCaseComplete);
|
|
166
|
+
const { report } = await evalProject.runAllSuitesBatch(concurrency);
|
|
167
|
+
|
|
168
|
+
const completedCount = stats.passed + stats.failed;
|
|
169
|
+
await store.updateRunStatus(tenantId, runId, {
|
|
170
|
+
status: "completed",
|
|
171
|
+
completedAt: new Date(),
|
|
172
|
+
passedCases: stats.passed,
|
|
173
|
+
failedCases: stats.failed,
|
|
174
|
+
avgScore: completedCount > 0 ? stats.totalScore / completedCount : 0,
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
this.eventEmitter.emit(`run:${runId}`, {
|
|
178
|
+
type: "completed",
|
|
179
|
+
runId,
|
|
180
|
+
data: { passed: stats.passed, failed: stats.failed, avgScore: completedCount > 0 ? stats.totalScore / completedCount : 0 },
|
|
181
|
+
} satisfies EvalStreamEvent);
|
|
182
|
+
|
|
183
|
+
return { report };
|
|
184
|
+
} catch (err) {
|
|
185
|
+
const errorMsg = (err as Error).message;
|
|
186
|
+
await store.updateRunStatus(tenantId, runId, {
|
|
187
|
+
status: "failed",
|
|
188
|
+
error: errorMsg,
|
|
189
|
+
completedAt: new Date(),
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
this.eventEmitter.emit(`run:${runId}`, {
|
|
193
|
+
type: "error",
|
|
194
|
+
runId,
|
|
195
|
+
data: { message: errorMsg },
|
|
196
|
+
} satisfies EvalStreamEvent);
|
|
197
|
+
|
|
198
|
+
throw err;
|
|
199
|
+
} finally {
|
|
200
|
+
this.runs.delete(runId);
|
|
201
|
+
}
|
|
202
|
+
})();
|
|
203
|
+
|
|
204
|
+
this.runs.set(runId, { runId, projectId, tenantId, abortController, promise: runPromise });
|
|
205
|
+
runPromise.catch(() => {});
|
|
206
|
+
|
|
207
|
+
return runId;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
async abortRun(runId: string): Promise<boolean> {
|
|
211
|
+
const ctx = this.runs.get(runId);
|
|
212
|
+
if (!ctx) return false;
|
|
213
|
+
ctx.abortController.abort();
|
|
214
|
+
const store = this.getEvalStore();
|
|
215
|
+
await store.updateRunStatus(ctx.tenantId, runId, { status: "aborted", completedAt: new Date() });
|
|
216
|
+
return true;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
isRunning(runId: string): boolean {
|
|
220
|
+
return this.runs.has(runId);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
private getEvalStore(): EvalStore {
|
|
224
|
+
return getStoreLattice("default", "eval").store as EvalStore;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
export const evalRunner = new EvalRunner();
|