@axiom-lattice/pg-stores 1.0.53 → 1.0.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,880 @@
1
+ /**
2
+ * PostgreSQL implementation of EvalStore
3
+ */
4
+
5
+ import { Pool } from "pg";
6
+ import type { PoolConfig } from "pg";
7
+ import {
8
+ EvalStore,
9
+ EvalProject,
10
+ CreateEvalProjectRequest,
11
+ EvalSuite,
12
+ CreateEvalSuiteRequest,
13
+ EvalCase,
14
+ CreateEvalCaseRequest,
15
+ EvalRun,
16
+ CreateEvalRunRequest,
17
+ EvalRunResult,
18
+ EvalProjectReport,
19
+ } from "@axiom-lattice/protocols";
20
+ import { MigrationManager } from "../migrations/migration";
21
+ import { evalMigrations } from "../migrations/eval_migrations";
22
+ import { v4 as uuidv4 } from "uuid";
23
+
24
+ /** PostgreSQL EvalStore options */
25
+ export interface PostgreSQLEvalStoreOptions {
26
+ /** PostgreSQL connection pool configuration */
27
+ poolConfig: string | PoolConfig;
28
+ /** Whether to run migrations automatically on initialization @default true */
29
+ autoMigrate?: boolean;
30
+ }
31
+
32
+ /**
33
+ * PostgreSQL implementation of EvalStore
34
+ *
35
+ * Features:
36
+ * - Multi-tenant isolation via tenant_id
37
+ * - Full CRUD for projects, suites, cases, runs, run results
38
+ * - Project report aggregation
39
+ */
40
+ export class PostgreSQLEvalStore implements EvalStore {
41
+ private pool: Pool;
42
+ private migrationManager: MigrationManager;
43
+ private initialized: boolean = false;
44
+ private ownsPool: boolean = true;
45
+ private initPromise: Promise<void> | null = null;
46
+
47
+ constructor(options: PostgreSQLEvalStoreOptions) {
48
+ // Create Pool from config
49
+ if (typeof options.poolConfig === "string") {
50
+ this.pool = new Pool({ connectionString: options.poolConfig });
51
+ } else {
52
+ this.pool = new Pool(options.poolConfig);
53
+ }
54
+
55
+ this.migrationManager = new MigrationManager(this.pool);
56
+ for (const m of evalMigrations) {
57
+ this.migrationManager.register(m);
58
+ }
59
+
60
+ // Auto-migrate by default
61
+ if (options.autoMigrate !== false) {
62
+ this.initialize().catch((error) => {
63
+ console.error("Failed to initialize PostgreSQLEvalStore:", error);
64
+ throw error;
65
+ });
66
+ }
67
+ }
68
+
69
+ /** Dispose resources and close the connection pool */
70
+ async dispose(): Promise<void> {
71
+ if (this.ownsPool && this.pool) {
72
+ await this.pool.end();
73
+ }
74
+ }
75
+
76
+ /**
77
+ * Initialize the store and run migrations
78
+ * Uses a promise-based lock to prevent concurrent initialization
79
+ */
80
+ async initialize(): Promise<void> {
81
+ if (this.initialized) {
82
+ return;
83
+ }
84
+
85
+ if (this.initPromise) {
86
+ return this.initPromise;
87
+ }
88
+
89
+ this.initPromise = (async () => {
90
+ try {
91
+ await this.migrationManager.migrate();
92
+ this.initialized = true;
93
+ } finally {
94
+ this.initPromise = null;
95
+ }
96
+ })();
97
+
98
+ return this.initPromise;
99
+ }
100
+
101
+ /** Ensure store is initialized */
102
+ private async ensureInitialized(): Promise<void> {
103
+ if (!this.initialized) {
104
+ await this.initialize();
105
+ }
106
+ }
107
+
108
+ // ---------------------------------------------------------------------------
109
+ // Row mappers
110
+ // ---------------------------------------------------------------------------
111
+
112
+ private mapRowToProject(row: Record<string, unknown>): EvalProject {
113
+ return {
114
+ id: row.id as string,
115
+ tenantId: row.tenant_id as string,
116
+ name: row.name as string,
117
+ description: row.description as string | undefined,
118
+ version: row.version as string | undefined,
119
+ judgeModelConfig: this.parseRequiredJson<Record<string, unknown>>(row.judge_model_config, {}),
120
+ targetServerConfig: this.parseRequiredJson<Record<string, unknown>>(row.target_server_config, {}),
121
+ concurrency: (row.concurrency as number) ?? 3,
122
+ reportConfig: this.parseOptionalJson<Record<string, unknown>>(row.report_config),
123
+ createdAt: new Date(row.created_at as string),
124
+ updatedAt: new Date(row.updated_at as string),
125
+ };
126
+ }
127
+
128
+ private mapRowToSuite(row: Record<string, unknown>): EvalSuite {
129
+ return {
130
+ id: row.id as string,
131
+ tenantId: row.tenant_id as string,
132
+ projectId: row.project_id as string,
133
+ name: row.name as string,
134
+ createdAt: new Date(row.created_at as string),
135
+ updatedAt: new Date(row.updated_at as string),
136
+ caseCount: row.case_count !== undefined ? (row.case_count as number) : undefined,
137
+ };
138
+ }
139
+
140
+ private mapRowToCase(row: Record<string, unknown>): EvalCase {
141
+ return {
142
+ id: row.id as string,
143
+ tenantId: row.tenant_id as string,
144
+ suiteId: row.suite_id as string,
145
+ inputMessage: row.input_message as string,
146
+ inputFiles: this.parseOptionalJson<Record<string, string>>(row.input_files),
147
+ steps: this.parseRequiredJson<Array<{ agent_id: string; override_message?: string }>>(row.steps, []),
148
+ outputType: (row.output_type as EvalCase["outputType"]) || "message_content",
149
+ contentAssertion: (row.content_assertion as string) || "",
150
+ rubrics: this.parseOptionalJson<Array<{ name: string; weight: number; description: string }>>(row.rubrics),
151
+ createdAt: new Date(row.created_at as string),
152
+ updatedAt: new Date(row.updated_at as string),
153
+ };
154
+ }
155
+
156
+ private mapRowToRun(row: Record<string, unknown>): EvalRun {
157
+ return {
158
+ id: row.id as string,
159
+ projectId: row.project_id as string,
160
+ tenantId: row.tenant_id as string,
161
+ status: row.status as EvalRun["status"],
162
+ concurrency: (row.concurrency as number) ?? 3,
163
+ totalCases: (row.total_cases as number) ?? 0,
164
+ passedCases: (row.passed_cases as number) ?? 0,
165
+ failedCases: (row.failed_cases as number) ?? 0,
166
+ avgScore: (row.avg_score as number) ?? 0,
167
+ error: row.error as string | undefined,
168
+ createdAt: new Date(row.created_at as string),
169
+ startedAt: row.started_at ? new Date(row.started_at as string) : undefined,
170
+ completedAt: row.completed_at ? new Date(row.completed_at as string) : undefined,
171
+ };
172
+ }
173
+
174
+ private mapRowToRunResult(row: Record<string, unknown>): EvalRunResult {
175
+ return {
176
+ id: row.id as string,
177
+ runId: row.run_id as string,
178
+ suiteName: row.suite_name as string,
179
+ caseId: row.case_id as string | undefined,
180
+ pass: (row.pass as boolean) ?? false,
181
+ score: (row.score as number) ?? 0,
182
+ summary: row.summary as string | undefined,
183
+ dimensionResults: this.parseOptionalJson<Array<{ name: string; score: number; reason: string }>>(row.dimension_results),
184
+ durationMs: row.duration_ms as number | undefined,
185
+ messages: this.parseOptionalJson<Array<{ role: string; content: string; id?: string }>>(row.messages),
186
+ logs: this.parseOptionalJson<Array<{ timestamp: string; level: string; message: string; data?: unknown }>>(row.logs),
187
+ error: row.error as string | undefined,
188
+ createdAt: new Date(row.created_at as string),
189
+ };
190
+ }
191
+
192
+ private parseRequiredJson<T>(val: unknown, fallback: T): T {
193
+ if (val == null) return fallback;
194
+ if (typeof val === "string") {
195
+ try {
196
+ return JSON.parse(val) as T;
197
+ } catch {
198
+ return fallback;
199
+ }
200
+ }
201
+ return val as T;
202
+ }
203
+
204
+ private parseOptionalJson<T>(val: unknown): T | undefined {
205
+ if (val == null) return undefined;
206
+ if (typeof val === "string") {
207
+ try {
208
+ return JSON.parse(val) as T;
209
+ } catch {
210
+ return undefined;
211
+ }
212
+ }
213
+ return val as T;
214
+ }
215
+
216
+ // ---------------------------------------------------------------------------
217
+ // Projects
218
+ // ---------------------------------------------------------------------------
219
+
220
+ /** Get all eval projects for a tenant */
221
+ async getProjectsByTenant(tenantId: string): Promise<EvalProject[]> {
222
+ await this.ensureInitialized();
223
+ const { rows } = await this.pool.query<{
224
+ id: string; tenant_id: string; name: string; description: string | null;
225
+ version: string | null; judge_model_config: unknown; target_server_config: unknown;
226
+ concurrency: number; report_config: unknown; created_at: string; updated_at: string;
227
+ }>(
228
+ `SELECT id, tenant_id, name, description, version,
229
+ judge_model_config, target_server_config, concurrency,
230
+ report_config, created_at, updated_at
231
+ FROM lattice_eval_projects
232
+ WHERE tenant_id = $1
233
+ ORDER BY created_at DESC`,
234
+ [tenantId]
235
+ );
236
+ return rows.map((r) => this.mapRowToProject(r as unknown as Record<string, unknown>));
237
+ }
238
+
239
+ /** Get a single eval project by ID for a tenant */
240
+ async getProjectById(tenantId: string, id: string): Promise<EvalProject | null> {
241
+ await this.ensureInitialized();
242
+ const { rows } = await this.pool.query<{
243
+ id: string; tenant_id: string; name: string; description: string | null;
244
+ version: string | null; judge_model_config: unknown; target_server_config: unknown;
245
+ concurrency: number; report_config: unknown; created_at: string; updated_at: string;
246
+ }>(
247
+ `SELECT id, tenant_id, name, description, version,
248
+ judge_model_config, target_server_config, concurrency,
249
+ report_config, created_at, updated_at
250
+ FROM lattice_eval_projects
251
+ WHERE id = $1 AND tenant_id = $2`,
252
+ [id, tenantId]
253
+ );
254
+ return rows.length ? this.mapRowToProject(rows[0] as unknown as Record<string, unknown>) : null;
255
+ }
256
+
257
+ /** Create a new eval project */
258
+ async createProject(
259
+ tenantId: string,
260
+ id: string,
261
+ data: CreateEvalProjectRequest
262
+ ): Promise<EvalProject> {
263
+ await this.ensureInitialized();
264
+ const actualId = id || uuidv4();
265
+ const { rows } = await this.pool.query<{
266
+ id: string; tenant_id: string; name: string; description: string | null;
267
+ version: string | null; judge_model_config: unknown; target_server_config: unknown;
268
+ concurrency: number; report_config: unknown; created_at: string; updated_at: string;
269
+ }>(
270
+ `INSERT INTO lattice_eval_projects
271
+ (id, tenant_id, name, description, version, judge_model_config, target_server_config, concurrency, report_config)
272
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
273
+ RETURNING id, tenant_id, name, description, version,
274
+ judge_model_config, target_server_config, concurrency,
275
+ report_config, created_at, updated_at`,
276
+ [
277
+ actualId,
278
+ tenantId,
279
+ data.name,
280
+ data.description || null,
281
+ data.version || null,
282
+ JSON.stringify(data.judgeModelConfig),
283
+ JSON.stringify(data.targetServerConfig),
284
+ data.concurrency ?? 3,
285
+ data.reportConfig === undefined ? null : JSON.stringify(data.reportConfig),
286
+ ]
287
+ );
288
+ return this.mapRowToProject(rows[0] as unknown as Record<string, unknown>);
289
+ }
290
+
291
+ /** Update an eval project */
292
+ async updateProject(
293
+ tenantId: string,
294
+ id: string,
295
+ updates: Partial<CreateEvalProjectRequest>
296
+ ): Promise<EvalProject | null> {
297
+ await this.ensureInitialized();
298
+ const set: string[] = [];
299
+ const vals: unknown[] = [];
300
+ let i = 1;
301
+ if (updates.name !== undefined) { set.push(`name = $${i++}`); vals.push(updates.name); }
302
+ if (updates.description !== undefined) { set.push(`description = $${i++}`); vals.push(updates.description); }
303
+ if (updates.version !== undefined) { set.push(`version = $${i++}`); vals.push(updates.version); }
304
+ if (updates.judgeModelConfig !== undefined) { set.push(`judge_model_config = $${i++}`); vals.push(JSON.stringify(updates.judgeModelConfig)); }
305
+ if (updates.targetServerConfig !== undefined) { set.push(`target_server_config = $${i++}`); vals.push(JSON.stringify(updates.targetServerConfig)); }
306
+ if (updates.concurrency !== undefined) { set.push(`concurrency = $${i++}`); vals.push(updates.concurrency); }
307
+ if (updates.reportConfig !== undefined) { set.push(`report_config = $${i++}`); vals.push(JSON.stringify(updates.reportConfig)); }
308
+ if (set.length === 0) return this.getProjectById(tenantId, id);
309
+ set.push(`updated_at = NOW()`);
310
+ vals.push(id, tenantId);
311
+ const { rows } = await this.pool.query<{
312
+ id: string; tenant_id: string; name: string; description: string | null;
313
+ version: string | null; judge_model_config: unknown; target_server_config: unknown;
314
+ concurrency: number; report_config: unknown; created_at: string; updated_at: string;
315
+ }>(
316
+ `UPDATE lattice_eval_projects SET ${set.join(", ")}
317
+ WHERE id = $${i} AND tenant_id = $${i + 1}
318
+ RETURNING id, tenant_id, name, description, version,
319
+ judge_model_config, target_server_config, concurrency,
320
+ report_config, created_at, updated_at`,
321
+ vals
322
+ );
323
+ return rows.length ? this.mapRowToProject(rows[0] as unknown as Record<string, unknown>) : null;
324
+ }
325
+
326
+ /** Delete an eval project */
327
+ async deleteProject(tenantId: string, id: string): Promise<boolean> {
328
+ await this.ensureInitialized();
329
+ const result = await this.pool.query(
330
+ `DELETE FROM lattice_eval_projects WHERE id = $1 AND tenant_id = $2`,
331
+ [id, tenantId]
332
+ );
333
+ return (result.rowCount ?? 0) > 0;
334
+ }
335
+
336
+ // ---------------------------------------------------------------------------
337
+ // Suites
338
+ // ---------------------------------------------------------------------------
339
+
340
+ /** Get all suites in a project with case counts */
341
+ async getSuitesByProject(tenantId: string, projectId: string): Promise<EvalSuite[]> {
342
+ await this.ensureInitialized();
343
+ const { rows } = await this.pool.query<{
344
+ id: string; tenant_id: string; project_id: string; name: string;
345
+ created_at: string; updated_at: string; case_count: number;
346
+ }>(
347
+ `SELECT s.id, s.tenant_id, s.project_id, s.name, s.created_at, s.updated_at,
348
+ COUNT(c.id)::int AS case_count
349
+ FROM lattice_eval_suites s
350
+ LEFT JOIN lattice_eval_cases c ON c.suite_id = s.id
351
+ WHERE s.tenant_id = $1 AND s.project_id = $2
352
+ GROUP BY s.id
353
+ ORDER BY s.created_at DESC`,
354
+ [tenantId, projectId]
355
+ );
356
+ return rows.map((r) => this.mapRowToSuite(r as unknown as Record<string, unknown>));
357
+ }
358
+
359
+ /** Get a single suite by ID with case count */
360
+ async getSuiteById(tenantId: string, id: string): Promise<EvalSuite | null> {
361
+ await this.ensureInitialized();
362
+ const { rows } = await this.pool.query<{
363
+ id: string; tenant_id: string; project_id: string; name: string;
364
+ created_at: string; updated_at: string; case_count: number;
365
+ }>(
366
+ `SELECT s.id, s.tenant_id, s.project_id, s.name, s.created_at, s.updated_at,
367
+ COUNT(c.id)::int AS case_count
368
+ FROM lattice_eval_suites s
369
+ LEFT JOIN lattice_eval_cases c ON c.suite_id = s.id
370
+ WHERE s.tenant_id = $1 AND s.id = $2
371
+ GROUP BY s.id`,
372
+ [tenantId, id]
373
+ );
374
+ return rows.length ? this.mapRowToSuite(rows[0] as unknown as Record<string, unknown>) : null;
375
+ }
376
+
377
+ /** Create a new eval suite */
378
+ async createSuite(
379
+ tenantId: string,
380
+ projectId: string,
381
+ id: string,
382
+ data: CreateEvalSuiteRequest
383
+ ): Promise<EvalSuite> {
384
+ await this.ensureInitialized();
385
+ const actualId = id || uuidv4();
386
+ const { rows } = await this.pool.query<{
387
+ id: string; tenant_id: string; project_id: string; name: string;
388
+ created_at: string; updated_at: string;
389
+ }>(
390
+ `INSERT INTO lattice_eval_suites (id, tenant_id, project_id, name)
391
+ VALUES ($1, $2, $3, $4)
392
+ RETURNING id, tenant_id, project_id, name, created_at, updated_at`,
393
+ [actualId, tenantId, projectId, data.name]
394
+ );
395
+ return this.mapRowToSuite(rows[0] as unknown as Record<string, unknown>);
396
+ }
397
+
398
+ /** Update a suite's name */
399
+ async updateSuite(
400
+ tenantId: string,
401
+ id: string,
402
+ updates: Partial<CreateEvalSuiteRequest>
403
+ ): Promise<EvalSuite | null> {
404
+ await this.ensureInitialized();
405
+ const set: string[] = [];
406
+ const vals: unknown[] = [];
407
+ let i = 1;
408
+ if (updates.name !== undefined) { set.push(`name = $${i++}`); vals.push(updates.name); }
409
+ if (set.length === 0) return this.getSuiteById(tenantId, id);
410
+ set.push(`updated_at = NOW()`);
411
+ vals.push(id, tenantId);
412
+ const { rows } = await this.pool.query<{
413
+ id: string; tenant_id: string; project_id: string; name: string;
414
+ created_at: string; updated_at: string;
415
+ }>(
416
+ `UPDATE lattice_eval_suites SET ${set.join(", ")}
417
+ WHERE id = $${i} AND tenant_id = $${i + 1}
418
+ RETURNING id, tenant_id, project_id, name, created_at, updated_at`,
419
+ vals
420
+ );
421
+ return rows.length ? this.mapRowToSuite(rows[0] as unknown as Record<string, unknown>) : null;
422
+ }
423
+
424
+ /** Delete a suite */
425
+ async deleteSuite(tenantId: string, id: string): Promise<boolean> {
426
+ await this.ensureInitialized();
427
+ const result = await this.pool.query(
428
+ `DELETE FROM lattice_eval_suites WHERE id = $1 AND tenant_id = $2`,
429
+ [id, tenantId]
430
+ );
431
+ return (result.rowCount ?? 0) > 0;
432
+ }
433
+
434
+ // ---------------------------------------------------------------------------
435
+ // Cases
436
+ // ---------------------------------------------------------------------------
437
+
438
+ /** Get all test cases in a suite */
439
+ async getCasesBySuite(tenantId: string, suiteId: string): Promise<EvalCase[]> {
440
+ await this.ensureInitialized();
441
+ const { rows } = await this.pool.query<{
442
+ id: string; tenant_id: string; suite_id: string;
443
+ input_message: string; input_files: unknown; steps: unknown;
444
+ output_type: string; content_assertion: string; rubrics: unknown;
445
+ created_at: string; updated_at: string;
446
+ }>(
447
+ `SELECT id, tenant_id, suite_id,
448
+ input_message, input_files, steps,
449
+ output_type, content_assertion, rubrics,
450
+ created_at, updated_at
451
+ FROM lattice_eval_cases
452
+ WHERE tenant_id = $1 AND suite_id = $2
453
+ ORDER BY created_at`,
454
+ [tenantId, suiteId]
455
+ );
456
+ return rows.map((r) => this.mapRowToCase(r as unknown as Record<string, unknown>));
457
+ }
458
+
459
+ /** Get a single test case by ID */
460
+ async getCaseById(tenantId: string, id: string): Promise<EvalCase | null> {
461
+ await this.ensureInitialized();
462
+ const { rows } = await this.pool.query<{
463
+ id: string; tenant_id: string; suite_id: string;
464
+ input_message: string; input_files: unknown; steps: unknown;
465
+ output_type: string; content_assertion: string; rubrics: unknown;
466
+ created_at: string; updated_at: string;
467
+ }>(
468
+ `SELECT id, tenant_id, suite_id,
469
+ input_message, input_files, steps,
470
+ output_type, content_assertion, rubrics,
471
+ created_at, updated_at
472
+ FROM lattice_eval_cases
473
+ WHERE id = $1 AND tenant_id = $2`,
474
+ [id, tenantId]
475
+ );
476
+ return rows.length ? this.mapRowToCase(rows[0] as unknown as Record<string, unknown>) : null;
477
+ }
478
+
479
+ /** Create a new test case */
480
+ async createCase(
481
+ tenantId: string,
482
+ suiteId: string,
483
+ id: string,
484
+ data: CreateEvalCaseRequest
485
+ ): Promise<EvalCase> {
486
+ await this.ensureInitialized();
487
+ const actualId = id || uuidv4();
488
+ const { rows } = await this.pool.query<{
489
+ id: string; tenant_id: string; suite_id: string;
490
+ input_message: string; input_files: unknown; steps: unknown;
491
+ output_type: string; content_assertion: string; rubrics: unknown;
492
+ created_at: string; updated_at: string;
493
+ }>(
494
+ `INSERT INTO lattice_eval_cases
495
+ (id, tenant_id, suite_id, input_message, input_files, steps, output_type, content_assertion, rubrics)
496
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
497
+ RETURNING id, tenant_id, suite_id,
498
+ input_message, input_files, steps,
499
+ output_type, content_assertion, rubrics,
500
+ created_at, updated_at`,
501
+ [
502
+ actualId,
503
+ tenantId,
504
+ suiteId,
505
+ data.inputMessage,
506
+ JSON.stringify(data.inputFiles || {}),
507
+ JSON.stringify(data.steps),
508
+ data.outputType,
509
+ data.contentAssertion || null,
510
+ JSON.stringify(data.rubrics || []),
511
+ ]
512
+ );
513
+ return this.mapRowToCase(rows[0] as unknown as Record<string, unknown>);
514
+ }
515
+
516
+ /** Update a test case */
517
+ async updateCase(
518
+ tenantId: string,
519
+ id: string,
520
+ updates: Partial<CreateEvalCaseRequest>
521
+ ): Promise<EvalCase | null> {
522
+ await this.ensureInitialized();
523
+ const set: string[] = [];
524
+ const vals: unknown[] = [];
525
+ let i = 1;
526
+ if (updates.inputMessage !== undefined) { set.push(`input_message = $${i++}`); vals.push(updates.inputMessage); }
527
+ if (updates.inputFiles !== undefined) { set.push(`input_files = $${i++}`); vals.push(JSON.stringify(updates.inputFiles)); }
528
+ if (updates.steps !== undefined) { set.push(`steps = $${i++}`); vals.push(JSON.stringify(updates.steps)); }
529
+ if (updates.outputType !== undefined) { set.push(`output_type = $${i++}`); vals.push(updates.outputType); }
530
+ if (updates.contentAssertion !== undefined) { set.push(`content_assertion = $${i++}`); vals.push(updates.contentAssertion); }
531
+ if (updates.rubrics !== undefined) { set.push(`rubrics = $${i++}`); vals.push(JSON.stringify(updates.rubrics)); }
532
+ if (set.length === 0) return this.getCaseById(tenantId, id);
533
+ set.push(`updated_at = NOW()`);
534
+ vals.push(id, tenantId);
535
+ const { rows } = await this.pool.query<{
536
+ id: string; tenant_id: string; suite_id: string;
537
+ input_message: string; input_files: unknown; steps: unknown;
538
+ output_type: string; content_assertion: string; rubrics: unknown;
539
+ created_at: string; updated_at: string;
540
+ }>(
541
+ `UPDATE lattice_eval_cases SET ${set.join(", ")}
542
+ WHERE id = $${i} AND tenant_id = $${i + 1}
543
+ RETURNING id, tenant_id, suite_id,
544
+ input_message, input_files, steps,
545
+ output_type, content_assertion, rubrics,
546
+ created_at, updated_at`,
547
+ vals
548
+ );
549
+ return rows.length ? this.mapRowToCase(rows[0] as unknown as Record<string, unknown>) : null;
550
+ }
551
+
552
+ /** Delete a test case */
553
+ async deleteCase(tenantId: string, id: string): Promise<boolean> {
554
+ await this.ensureInitialized();
555
+ const result = await this.pool.query(
556
+ `DELETE FROM lattice_eval_cases WHERE id = $1 AND tenant_id = $2`,
557
+ [id, tenantId]
558
+ );
559
+ return (result.rowCount ?? 0) > 0;
560
+ }
561
+
562
+ // ---------------------------------------------------------------------------
563
+ // Runs
564
+ // ---------------------------------------------------------------------------
565
+
566
+ /** Get all runs for a tenant, optionally filtered by project or status */
567
+ async getRunsByTenant(
568
+ tenantId: string,
569
+ opts?: { projectId?: string; status?: string }
570
+ ): Promise<EvalRun[]> {
571
+ await this.ensureInitialized();
572
+ const conditions: string[] = ["tenant_id = $1"];
573
+ const vals: unknown[] = [tenantId];
574
+ let i = 2;
575
+ if (opts?.projectId) { conditions.push(`project_id = $${i++}`); vals.push(opts.projectId); }
576
+ if (opts?.status) { conditions.push(`status = $${i++}`); vals.push(opts.status); }
577
+ const { rows } = await this.pool.query<{
578
+ id: string; project_id: string; tenant_id: string;
579
+ status: string; concurrency: number; total_cases: number;
580
+ passed_cases: number; failed_cases: number; avg_score: number;
581
+ error: string | null; created_at: string; started_at: string | null;
582
+ completed_at: string | null;
583
+ }>(
584
+ `SELECT id, project_id, tenant_id,
585
+ status, concurrency, total_cases,
586
+ passed_cases, failed_cases, avg_score,
587
+ error, created_at, started_at, completed_at
588
+ FROM lattice_eval_runs
589
+ WHERE ${conditions.join(" AND ")}
590
+ ORDER BY created_at DESC`,
591
+ vals
592
+ );
593
+ return rows.map((r) => this.mapRowToRun(r as unknown as Record<string, unknown>));
594
+ }
595
+
596
+ /** Get a single run by ID */
597
+ async getRunById(tenantId: string, id: string): Promise<EvalRun | null> {
598
+ await this.ensureInitialized();
599
+ const { rows } = await this.pool.query<{
600
+ id: string; project_id: string; tenant_id: string;
601
+ status: string; concurrency: number; total_cases: number;
602
+ passed_cases: number; failed_cases: number; avg_score: number;
603
+ error: string | null; created_at: string; started_at: string | null;
604
+ completed_at: string | null;
605
+ }>(
606
+ `SELECT id, project_id, tenant_id,
607
+ status, concurrency, total_cases,
608
+ passed_cases, failed_cases, avg_score,
609
+ error, created_at, started_at, completed_at
610
+ FROM lattice_eval_runs
611
+ WHERE id = $1 AND tenant_id = $2`,
612
+ [id, tenantId]
613
+ );
614
+ return rows.length ? this.mapRowToRun(rows[0] as unknown as Record<string, unknown>) : null;
615
+ }
616
+
617
+ /** Create a new eval run */
618
+ async createRun(
619
+ tenantId: string,
620
+ projectId: string,
621
+ id: string,
622
+ data: CreateEvalRunRequest
623
+ ): Promise<EvalRun> {
624
+ await this.ensureInitialized();
625
+ const actualId = id || uuidv4();
626
+ const { rows } = await this.pool.query<{
627
+ id: string; project_id: string; tenant_id: string;
628
+ status: string; concurrency: number; total_cases: number;
629
+ passed_cases: number; failed_cases: number; avg_score: number;
630
+ error: string | null; created_at: string; started_at: string | null;
631
+ completed_at: string | null;
632
+ }>(
633
+ `INSERT INTO lattice_eval_runs
634
+ (id, project_id, tenant_id, status, concurrency, total_cases, passed_cases, failed_cases, avg_score, started_at)
635
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
636
+ RETURNING id, project_id, tenant_id,
637
+ status, concurrency, total_cases,
638
+ passed_cases, failed_cases, avg_score,
639
+ error, created_at, started_at, completed_at`,
640
+ [
641
+ actualId,
642
+ projectId,
643
+ tenantId,
644
+ "running",
645
+ data.concurrency,
646
+ data.totalCases,
647
+ 0,
648
+ 0,
649
+ 0,
650
+ new Date(),
651
+ ]
652
+ );
653
+ return this.mapRowToRun(rows[0] as unknown as Record<string, unknown>);
654
+ }
655
+
656
+ /** Update a run's status and aggregate fields */
657
+ async updateRunStatus(
658
+ tenantId: string,
659
+ id: string,
660
+ updates: {
661
+ status?: EvalRun["status"];
662
+ passedCases?: number;
663
+ failedCases?: number;
664
+ avgScore?: number;
665
+ error?: string;
666
+ completedAt?: Date;
667
+ }
668
+ ): Promise<EvalRun | null> {
669
+ await this.ensureInitialized();
670
+ const set: string[] = [];
671
+ const vals: unknown[] = [];
672
+ let i = 1;
673
+ if (updates.status !== undefined) { set.push(`status = $${i++}`); vals.push(updates.status); }
674
+ if (updates.passedCases !== undefined) { set.push(`passed_cases = $${i++}`); vals.push(updates.passedCases); }
675
+ if (updates.failedCases !== undefined) { set.push(`failed_cases = $${i++}`); vals.push(updates.failedCases); }
676
+ if (updates.avgScore !== undefined) { set.push(`avg_score = $${i++}`); vals.push(updates.avgScore); }
677
+ if (updates.completedAt !== undefined) { set.push(`completed_at = $${i++}`); vals.push(updates.completedAt); }
678
+ if (updates.error !== undefined) { set.push(`error = $${i++}`); vals.push(updates.error); }
679
+ if (set.length === 0) return this.getRunById(tenantId, id);
680
+ vals.push(id, tenantId);
681
+ const { rows } = await this.pool.query<{
682
+ id: string; project_id: string; tenant_id: string;
683
+ status: string; concurrency: number; total_cases: number;
684
+ passed_cases: number; failed_cases: number; avg_score: number;
685
+ error: string | null; created_at: string; started_at: string | null;
686
+ completed_at: string | null;
687
+ }>(
688
+ `UPDATE lattice_eval_runs SET ${set.join(", ")}
689
+ WHERE id = $${i} AND tenant_id = $${i + 1}
690
+ RETURNING id, project_id, tenant_id,
691
+ status, concurrency, total_cases,
692
+ passed_cases, failed_cases, avg_score,
693
+ error, created_at, started_at, completed_at`,
694
+ vals
695
+ );
696
+ return rows.length ? this.mapRowToRun(rows[0] as unknown as Record<string, unknown>) : null;
697
+ }
698
+
699
+ /** Delete a run and its results */
700
+ async deleteRun(tenantId: string, id: string): Promise<boolean> {
701
+ await this.ensureInitialized();
702
+ // Delete results first, then the run
703
+ await this.pool.query(`DELETE FROM lattice_eval_run_results WHERE run_id = $1 AND tenant_id = $2`, [id, tenantId]);
704
+ const { rowCount } = await this.pool.query(
705
+ `DELETE FROM lattice_eval_runs WHERE id = $1 AND tenant_id = $2`, [id, tenantId]
706
+ );
707
+ return (rowCount ?? 0) > 0;
708
+ }
709
+
710
+ // ---------------------------------------------------------------------------
711
+ // Run Results
712
+ // ---------------------------------------------------------------------------
713
+
714
+ /** Get all results belonging to a run */
715
+ async getResultsByRun(tenantId: string, runId: string): Promise<EvalRunResult[]> {
716
+ await this.ensureInitialized();
717
+ const { rows } = await this.pool.query<{
718
+ id: string; run_id: string; suite_name: string;
719
+ case_id: string | null; pass: boolean; score: number;
720
+ summary: string | null; dimension_results: unknown;
721
+ duration_ms: number | null; messages: unknown; logs: unknown;
722
+ error: string | null; created_at: string;
723
+ }>(
724
+ `SELECT rr.id, rr.run_id, rr.suite_name, rr.case_id,
725
+ rr.pass, rr.score, rr.summary, rr.dimension_results,
726
+ rr.duration_ms, rr.messages, rr.logs, rr.error, rr.created_at
727
+ FROM lattice_eval_run_results rr
728
+ INNER JOIN lattice_eval_runs r ON r.id = rr.run_id
729
+ WHERE r.tenant_id = $1 AND rr.run_id = $2
730
+ ORDER BY rr.created_at`,
731
+ [tenantId, runId]
732
+ );
733
+ return rows.map((r) => this.mapRowToRunResult(r as unknown as Record<string, unknown>));
734
+ }
735
+
736
+ /** Create a result for a case within a run */
737
+ async createRunResult(
738
+ tenantId: string,
739
+ runId: string,
740
+ id: string,
741
+ data: Omit<EvalRunResult, "id" | "runId" | "createdAt">
742
+ ): Promise<EvalRunResult> {
743
+ await this.ensureInitialized();
744
+ const actualId = id || uuidv4();
745
+ const { rows } = await this.pool.query<{
746
+ id: string; run_id: string; suite_name: string;
747
+ case_id: string | null; pass: boolean; score: number;
748
+ summary: string | null; dimension_results: unknown;
749
+ duration_ms: number | null; messages: unknown; logs: unknown;
750
+ error: string | null; created_at: string;
751
+ }>(
752
+ `INSERT INTO lattice_eval_run_results
753
+ (id, run_id, suite_name, case_id, pass, score, summary, dimension_results, duration_ms, messages, logs, error)
754
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
755
+ RETURNING id, run_id, suite_name, case_id,
756
+ pass, score, summary, dimension_results,
757
+ duration_ms, messages, logs, error, created_at`,
758
+ [
759
+ actualId,
760
+ runId,
761
+ data.suiteName,
762
+ data.caseId || null,
763
+ data.pass,
764
+ data.score,
765
+ data.summary || null,
766
+ JSON.stringify(data.dimensionResults || []),
767
+ data.durationMs ?? null,
768
+ JSON.stringify(data.messages || []),
769
+ JSON.stringify(data.logs || []),
770
+ data.error || null,
771
+ ]
772
+ );
773
+ return this.mapRowToRunResult(rows[0] as unknown as Record<string, unknown>);
774
+ }
775
+
776
+ /** Update a run result with tenant isolation via the parent run */
777
+ async updateRunResult(
778
+ tenantId: string,
779
+ id: string,
780
+ updates: Partial<EvalRunResult>
781
+ ): Promise<EvalRunResult | null> {
782
+ await this.ensureInitialized();
783
+ if (updates.runId !== undefined) {
784
+ throw new Error("runId cannot be updated on an existing result");
785
+ }
786
+ const set: string[] = [];
787
+ const vals: unknown[] = [];
788
+ let i = 1;
789
+ if (updates.suiteName !== undefined) { set.push(`suite_name = $${i++}`); vals.push(updates.suiteName); }
790
+ if (updates.caseId !== undefined) { set.push(`case_id = $${i++}`); vals.push(updates.caseId); }
791
+ if (updates.pass !== undefined) { set.push(`pass = $${i++}`); vals.push(updates.pass); }
792
+ if (updates.score !== undefined) { set.push(`score = $${i++}`); vals.push(updates.score); }
793
+ if (updates.summary !== undefined) { set.push(`summary = $${i++}`); vals.push(updates.summary); }
794
+ if (updates.dimensionResults !== undefined) { set.push(`dimension_results = $${i++}`); vals.push(JSON.stringify(updates.dimensionResults)); }
795
+ if (updates.durationMs !== undefined) { set.push(`duration_ms = $${i++}`); vals.push(updates.durationMs); }
796
+ if (updates.messages !== undefined) { set.push(`messages = $${i++}`); vals.push(JSON.stringify(updates.messages)); }
797
+ if (updates.logs !== undefined) { set.push(`logs = $${i++}`); vals.push(JSON.stringify(updates.logs)); }
798
+ if (updates.error !== undefined) { set.push(`error = $${i++}`); vals.push(updates.error); }
799
+ if (set.length === 0) return this.getRunResultById(tenantId, id);
800
+ vals.push(id, tenantId);
801
+ const { rows } = await this.pool.query<{
802
+ id: string; run_id: string; suite_name: string;
803
+ case_id: string | null; pass: boolean; score: number;
804
+ summary: string | null; dimension_results: unknown;
805
+ duration_ms: number | null; messages: unknown; logs: unknown;
806
+ error: string | null; created_at: string;
807
+ }>(
808
+ `UPDATE lattice_eval_run_results SET ${set.join(", ")}
809
+ WHERE id = $${i}
810
+ AND run_id IN (SELECT id FROM lattice_eval_runs WHERE tenant_id = $${i + 1})
811
+ RETURNING id, run_id, suite_name, case_id,
812
+ pass, score, summary, dimension_results,
813
+ duration_ms, messages, logs, error, created_at`,
814
+ vals
815
+ );
816
+ return rows.length ? this.mapRowToRunResult(rows[0] as unknown as Record<string, unknown>) : null;
817
+ }
818
+
819
+ /** Get a single run result by ID with tenant isolation */
820
+ private async getRunResultById(tenantId: string, id: string): Promise<EvalRunResult | null> {
821
+ await this.ensureInitialized();
822
+ const { rows } = await this.pool.query<{
823
+ id: string; run_id: string; suite_name: string;
824
+ case_id: string | null; pass: boolean; score: number;
825
+ summary: string | null; dimension_results: unknown;
826
+ duration_ms: number | null; messages: unknown; logs: unknown;
827
+ error: string | null; created_at: string;
828
+ }>(
829
+ `SELECT rr.id, rr.run_id, rr.suite_name, rr.case_id,
830
+ rr.pass, rr.score, rr.summary, rr.dimension_results,
831
+ rr.duration_ms, rr.messages, rr.logs, rr.error, rr.created_at
832
+ FROM lattice_eval_run_results rr
833
+ INNER JOIN lattice_eval_runs r ON r.id = rr.run_id
834
+ WHERE rr.id = $1 AND r.tenant_id = $2`,
835
+ [id, tenantId]
836
+ );
837
+ return rows.length ? this.mapRowToRunResult(rows[0] as unknown as Record<string, unknown>) : null;
838
+ }
839
+
840
+ // ---------------------------------------------------------------------------
841
+ // Reports
842
+ // ---------------------------------------------------------------------------
843
+
844
+ /** Aggregate report for a project including all runs */
845
+ async getProjectReport(tenantId: string, projectId: string): Promise<EvalProjectReport | null> {
846
+ await this.ensureInitialized();
847
+ const projectRows = await this.pool.query<{
848
+ id: string; tenant_id: string; name: string; description: string | null;
849
+ version: string | null; judge_model_config: unknown; target_server_config: unknown;
850
+ concurrency: number; report_config: unknown; created_at: string; updated_at: string;
851
+ }>(
852
+ `SELECT id, tenant_id, name, description, version,
853
+ judge_model_config, target_server_config, concurrency,
854
+ report_config, created_at, updated_at
855
+ FROM lattice_eval_projects
856
+ WHERE id = $1 AND tenant_id = $2`,
857
+ [projectId, tenantId]
858
+ );
859
+ if (projectRows.rows.length === 0) return null;
860
+
861
+ const project = this.mapRowToProject(projectRows.rows[0] as unknown as Record<string, unknown>);
862
+ const runs = await this.getRunsByTenant(tenantId, { projectId });
863
+ const totalRuns = runs.length;
864
+ const latestPassRate = runs.length > 0
865
+ ? (runs[0].totalCases > 0 ? runs[0].passedCases / runs[0].totalCases : 0)
866
+ : 0;
867
+ const avgScore = runs.length > 0
868
+ ? runs.reduce((sum, r) => sum + r.avgScore, 0) / runs.length
869
+ : 0;
870
+
871
+ return {
872
+ projectId: project.id,
873
+ projectName: project.name,
874
+ totalRuns,
875
+ latestPassRate,
876
+ avgScore,
877
+ runs,
878
+ };
879
+ }
880
+ }