promptfoo 0.91.3 → 0.92.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/drizzle/0006_harsh_caretaker.sql +42 -0
- package/dist/drizzle/0007_cloudy_wong.sql +1 -0
- package/dist/drizzle/meta/0006_snapshot.json +721 -0
- package/dist/drizzle/meta/0007_snapshot.json +723 -0
- package/dist/drizzle/meta/_journal.json +14 -0
- package/dist/package.json +10 -8
- package/dist/src/app/assets/{index-C6z1nbLN.js → index-CMDD1oSm.js} +233 -231
- package/dist/src/app/assets/{index.es-oqbvfIxR.js → index.es-D8cSwMq4.js} +1 -1
- package/dist/src/app/assets/{sync-D2s75VlC.js → sync-DJZvzYiS.js} +1 -1
- package/dist/src/app/index.html +1 -1
- package/dist/src/assertions.js +2 -2
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/commands/cache.d.ts.map +1 -1
- package/dist/src/commands/cache.js +0 -2
- package/dist/src/commands/cache.js.map +1 -1
- package/dist/src/commands/eval.d.ts.map +1 -1
- package/dist/src/commands/eval.js +19 -16
- package/dist/src/commands/eval.js.map +1 -1
- package/dist/src/commands/export.d.ts.map +1 -1
- package/dist/src/commands/export.js +8 -31
- package/dist/src/commands/export.js.map +1 -1
- package/dist/src/commands/import.d.ts.map +1 -1
- package/dist/src/commands/import.js +52 -13
- package/dist/src/commands/import.js.map +1 -1
- package/dist/src/commands/list.d.ts.map +1 -1
- package/dist/src/commands/list.js +35 -7
- package/dist/src/commands/list.js.map +1 -1
- package/dist/src/commands/share.d.ts +2 -2
- package/dist/src/commands/share.d.ts.map +1 -1
- package/dist/src/commands/share.js +12 -13
- package/dist/src/commands/share.js.map +1 -1
- package/dist/src/commands/show.d.ts.map +1 -1
- package/dist/src/commands/show.js +10 -6
- package/dist/src/commands/show.js.map +1 -1
- package/dist/src/constants.d.ts +1 -0
- package/dist/src/constants.d.ts.map +1 -1
- package/dist/src/constants.js +2 -1
- package/dist/src/constants.js.map +1 -1
- package/dist/src/database/index.js +1 -1
- package/dist/src/database/index.js.map +1 -1
- package/dist/src/database/tables.d.ts +602 -4
- package/dist/src/database/tables.d.ts.map +1 -1
- package/dist/src/database/tables.js +67 -8
- package/dist/src/database/tables.js.map +1 -1
- package/dist/src/database/types.d.ts +3 -3
- package/dist/src/database/types.d.ts.map +1 -1
- package/dist/src/evaluator.d.ts +3 -2
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +75 -104
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/evaluatorHelpers.d.ts.map +1 -1
- package/dist/src/evaluatorHelpers.js +2 -1
- package/dist/src/evaluatorHelpers.js.map +1 -1
- package/dist/src/index.d.ts +2 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +12 -9
- package/dist/src/index.js.map +1 -1
- package/dist/src/models/eval.d.ts +95 -0
- package/dist/src/models/eval.d.ts.map +1 -0
- package/dist/src/models/eval.js +390 -0
- package/dist/src/models/eval.js.map +1 -0
- package/dist/src/models/evalResult.d.ts +50 -0
- package/dist/src/models/evalResult.d.ts.map +1 -0
- package/dist/src/models/evalResult.js +122 -0
- package/dist/src/models/evalResult.js.map +1 -0
- package/dist/src/models/provider.d.ts +9 -0
- package/dist/src/models/provider.d.ts.map +1 -0
- package/dist/src/models/provider.js +47 -0
- package/dist/src/models/provider.js.map +1 -0
- package/dist/src/prompts/index.d.ts.map +1 -1
- package/dist/src/prompts/index.js +2 -1
- package/dist/src/prompts/index.js.map +1 -1
- package/dist/src/prompts/utils.d.ts +1 -0
- package/dist/src/prompts/utils.d.ts.map +1 -1
- package/dist/src/prompts/utils.js +7 -0
- package/dist/src/prompts/utils.js.map +1 -1
- package/dist/src/providers/http.js +2 -2
- package/dist/src/providers/http.js.map +1 -1
- package/dist/src/providers.js +5 -5
- package/dist/src/providers.js.map +1 -1
- package/dist/src/redteam/eval/excessive-agency/llm_rubric-20240617.json +10 -0
- package/dist/src/redteam/eval/excessive-agency/llm_rubric-20240618.json +10 -0
- package/dist/src/redteam/eval/harmful/llm_rubric-20240723.json +10 -0
- package/dist/src/redteam/eval/harmful/llm_rubric-20240724.json +10 -0
- package/dist/src/server/routes/evalRoutes.d.ts +1 -0
- package/dist/src/server/routes/evalRoutes.d.ts.map +1 -0
- package/dist/src/server/routes/evalRoutes.js +2 -0
- package/dist/src/server/routes/evalRoutes.js.map +1 -0
- package/dist/src/server/server.d.ts +1 -0
- package/dist/src/server/server.d.ts.map +1 -1
- package/dist/src/server/server.js +70 -31
- package/dist/src/server/server.js.map +1 -1
- package/dist/src/share.d.ts +2 -2
- package/dist/src/share.d.ts.map +1 -1
- package/dist/src/share.js +93 -34
- package/dist/src/share.js.map +1 -1
- package/dist/src/table.d.ts +2 -2
- package/dist/src/table.d.ts.map +1 -1
- package/dist/src/table.js +3 -3
- package/dist/src/table.js.map +1 -1
- package/dist/src/types/index.d.ts +163 -11
- package/dist/src/types/index.d.ts.map +1 -1
- package/dist/src/types/index.js +21 -1
- package/dist/src/types/index.js.map +1 -1
- package/dist/src/util/config/load.d.ts.map +1 -1
- package/dist/src/util/config/load.js +2 -1
- package/dist/src/util/config/load.js.map +1 -1
- package/dist/src/util/config/manage.d.ts.map +1 -1
- package/dist/src/util/config/manage.js.map +1 -1
- package/dist/src/util/convertEvalResultsToTable.d.ts +16 -0
- package/dist/src/util/convertEvalResultsToTable.d.ts.map +1 -0
- package/dist/src/util/convertEvalResultsToTable.js +137 -0
- package/dist/src/util/convertEvalResultsToTable.js.map +1 -0
- package/dist/src/util/createHash.d.ts +1 -0
- package/dist/src/util/createHash.d.ts.map +1 -1
- package/dist/src/util/createHash.js +9 -0
- package/dist/src/util/createHash.js.map +1 -1
- package/dist/src/util/file.d.ts +8 -0
- package/dist/src/util/file.d.ts.map +1 -0
- package/dist/src/util/file.js +13 -0
- package/dist/src/util/file.js.map +1 -0
- package/dist/src/util/index.d.ts +9 -14
- package/dist/src/util/index.d.ts.map +1 -1
- package/dist/src/util/index.js +87 -223
- package/dist/src/util/index.js.map +1 -1
- package/dist/src/util/time.d.ts +2 -0
- package/dist/src/util/time.d.ts.map +1 -0
- package/dist/src/util/time.js +7 -0
- package/dist/src/util/time.js.map +1 -0
- package/dist/src/util/transform.js +2 -2
- package/dist/src/util/transform.js.map +1 -1
- package/dist/src/validators/providers.d.ts +6 -0
- package/dist/src/validators/providers.d.ts.map +1 -1
- package/dist/src/validators/providers.js +1 -0
- package/dist/src/validators/providers.js.map +1 -1
- package/dist/src/validators/redteam.d.ts +6 -0
- package/dist/src/validators/redteam.d.ts.map +1 -1
- package/dist/test/commands/eval/filterFailingTests.test.js +24 -2
- package/dist/test/commands/eval/filterFailingTests.test.js.map +1 -1
- package/dist/test/evaluator.test.js +153 -74
- package/dist/test/evaluator.test.js.map +1 -1
- package/dist/test/factories/data/eval/database_records.d.ts +142 -0
- package/dist/test/factories/data/eval/database_records.d.ts.map +1 -0
- package/dist/test/factories/data/eval/database_records.js +251 -0
- package/dist/test/factories/data/eval/database_records.js.map +1 -0
- package/dist/test/factories/evalFactory.d.ts +768 -0
- package/dist/test/factories/evalFactory.d.ts.map +1 -0
- package/dist/test/factories/evalFactory.js +121 -0
- package/dist/test/factories/evalFactory.js.map +1 -0
- package/dist/test/factories/index.d.ts +1 -0
- package/dist/test/factories/index.d.ts.map +1 -0
- package/dist/test/factories/index.js +2 -0
- package/dist/test/factories/index.js.map +1 -0
- package/dist/test/index.test.js +17 -33
- package/dist/test/index.test.js.map +1 -1
- package/dist/test/models/eval.test.d.ts +2 -0
- package/dist/test/models/eval.test.d.ts.map +1 -0
- package/dist/test/models/eval.test.js +34 -0
- package/dist/test/models/eval.test.js.map +1 -0
- package/dist/test/providers.test.js +3 -3
- package/dist/test/providers.test.js.map +1 -1
- package/dist/test/server/share.test.d.ts +2 -0
- package/dist/test/server/share.test.d.ts.map +1 -0
- package/dist/test/server/share.test.js +36 -0
- package/dist/test/server/share.test.js.map +1 -0
- package/dist/test/server/v3evalToShare.json +507 -0
- package/dist/test/server/v4evalToShare.json +421 -0
- package/dist/test/types.test.js +58 -0
- package/dist/test/types.test.js.map +1 -1
- package/dist/test/util.file.test.d.ts +2 -0
- package/dist/test/util.file.test.d.ts.map +1 -0
- package/dist/test/util.file.test.js +32 -0
- package/dist/test/util.file.test.js.map +1 -0
- package/dist/test/util.listPrevious.test.d.ts +2 -0
- package/dist/test/util.listPrevious.test.d.ts.map +1 -0
- package/dist/test/util.listPrevious.test.js +37 -0
- package/dist/test/util.listPrevious.test.js.map +1 -0
- package/dist/test/util.test.js +38 -311
- package/dist/test/util.test.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/package.json +10 -8
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.EvalQueries = void 0;
|
|
7
|
+
exports.createEvalId = createEvalId;
|
|
8
|
+
exports.getSummaryofLatestEvals = getSummaryofLatestEvals;
|
|
9
|
+
const crypto_1 = require("crypto");
|
|
10
|
+
const drizzle_orm_1 = require("drizzle-orm");
|
|
11
|
+
const tiny_invariant_1 = __importDefault(require("tiny-invariant"));
|
|
12
|
+
const constants_1 = require("../constants");
|
|
13
|
+
const database_1 = require("../database");
|
|
14
|
+
const tables_1 = require("../database/tables");
|
|
15
|
+
const logger_1 = __importDefault(require("../logger"));
|
|
16
|
+
const utils_1 = require("../prompts/utils");
|
|
17
|
+
const convertEvalResultsToTable_1 = require("../util/convertEvalResultsToTable");
|
|
18
|
+
const createHash_1 = require("../util/createHash");
|
|
19
|
+
const time_1 = require("../util/time");
|
|
20
|
+
const evalResult_1 = __importDefault(require("./evalResult"));
|
|
21
|
+
function createEvalId(createdAt = new Date()) {
|
|
22
|
+
return `eval-${(0, createHash_1.randomSequence)(3)}-${createdAt.toISOString().slice(0, 19)}`;
|
|
23
|
+
}
|
|
24
|
+
class EvalQueries {
|
|
25
|
+
static async getVarsFromEvals(evals) {
|
|
26
|
+
const db = (0, database_1.getDb)();
|
|
27
|
+
const query = drizzle_orm_1.sql.raw(`SELECT DISTINCT j.key, eval_id from (SELECT eval_id, json_extract(eval_results.test_case, '$.vars') as vars
|
|
28
|
+
FROM eval_results where eval_id IN (${evals.map((e) => `'${e.id}'`).join(',')})) t, json_each(t.vars) j;`);
|
|
29
|
+
// @ts-ignore
|
|
30
|
+
const results = await db.all(query);
|
|
31
|
+
const vars = results.reduce((acc, r) => {
|
|
32
|
+
acc[r.eval_id] = acc[r.eval_id] || [];
|
|
33
|
+
acc[r.eval_id].push(r.key);
|
|
34
|
+
return acc;
|
|
35
|
+
}, {});
|
|
36
|
+
return vars;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
exports.EvalQueries = EvalQueries;
|
|
40
|
+
class Eval {
|
|
41
|
+
static async latest() {
|
|
42
|
+
const db = (0, database_1.getDb)();
|
|
43
|
+
const db_results = await db
|
|
44
|
+
.select({
|
|
45
|
+
id: tables_1.evals.id,
|
|
46
|
+
})
|
|
47
|
+
.from(tables_1.evals)
|
|
48
|
+
.orderBy((0, drizzle_orm_1.desc)(tables_1.evals.createdAt))
|
|
49
|
+
.limit(1);
|
|
50
|
+
if (db_results.length === 0) {
|
|
51
|
+
return undefined;
|
|
52
|
+
}
|
|
53
|
+
return await Eval.findById(db_results[0].id);
|
|
54
|
+
}
|
|
55
|
+
static async findById(id) {
|
|
56
|
+
const db = (0, database_1.getDb)();
|
|
57
|
+
const { evals, datasetResults } = await db.transaction(async (tx) => {
|
|
58
|
+
const evals = await tx.select().from(tables_1.evals).where((0, drizzle_orm_1.eq)(tables_1.evals.id, id));
|
|
59
|
+
const datasetResults = await tx
|
|
60
|
+
.select({
|
|
61
|
+
datasetId: tables_1.evalsToDatasets.datasetId,
|
|
62
|
+
})
|
|
63
|
+
.from(tables_1.evalsToDatasets)
|
|
64
|
+
.where((0, drizzle_orm_1.eq)(tables_1.evalsToDatasets.evalId, id))
|
|
65
|
+
.limit(1);
|
|
66
|
+
return { evals, datasetResults };
|
|
67
|
+
});
|
|
68
|
+
if (evals.length === 0) {
|
|
69
|
+
return undefined;
|
|
70
|
+
}
|
|
71
|
+
const eval_ = evals[0];
|
|
72
|
+
const datasetId = datasetResults[0]?.datasetId;
|
|
73
|
+
const evalInstance = new Eval(eval_.config, {
|
|
74
|
+
id: eval_.id,
|
|
75
|
+
createdAt: new Date(eval_.createdAt),
|
|
76
|
+
author: eval_.author || undefined,
|
|
77
|
+
description: eval_.description || undefined,
|
|
78
|
+
prompts: eval_.prompts || [],
|
|
79
|
+
datasetId,
|
|
80
|
+
persisted: true,
|
|
81
|
+
});
|
|
82
|
+
if (eval_.results && 'table' in eval_.results) {
|
|
83
|
+
evalInstance.oldResults = eval_.results;
|
|
84
|
+
}
|
|
85
|
+
return evalInstance;
|
|
86
|
+
}
|
|
87
|
+
static async getMany(limit = constants_1.DEFAULT_QUERY_LIMIT) {
|
|
88
|
+
const db = (0, database_1.getDb)();
|
|
89
|
+
const evals = await db
|
|
90
|
+
.select()
|
|
91
|
+
.from(tables_1.evals)
|
|
92
|
+
.limit(limit)
|
|
93
|
+
.orderBy((0, drizzle_orm_1.desc)(tables_1.evals.createdAt))
|
|
94
|
+
.all();
|
|
95
|
+
return evals.map((e) => new Eval(e.config, {
|
|
96
|
+
id: e.id,
|
|
97
|
+
createdAt: new Date(e.createdAt),
|
|
98
|
+
author: e.author || undefined,
|
|
99
|
+
description: e.description || undefined,
|
|
100
|
+
prompts: e.prompts || [],
|
|
101
|
+
persisted: true,
|
|
102
|
+
}));
|
|
103
|
+
}
|
|
104
|
+
static async create(config, renderedPrompts, // The config doesn't contain the actual prompts, so we need to pass them in separately
|
|
105
|
+
opts) {
|
|
106
|
+
const createdAt = opts?.createdAt || new Date();
|
|
107
|
+
const evalId = opts?.id || createEvalId(createdAt);
|
|
108
|
+
const db = (0, database_1.getDb)();
|
|
109
|
+
await db.transaction((tx) => {
|
|
110
|
+
tx.insert(tables_1.evals)
|
|
111
|
+
.values({
|
|
112
|
+
id: evalId,
|
|
113
|
+
createdAt: createdAt.getTime(),
|
|
114
|
+
author: opts?.author,
|
|
115
|
+
description: config.description,
|
|
116
|
+
config,
|
|
117
|
+
results: {},
|
|
118
|
+
})
|
|
119
|
+
.run();
|
|
120
|
+
if (opts?.results) {
|
|
121
|
+
const res = tx
|
|
122
|
+
.insert(tables_1.evalResultsTable)
|
|
123
|
+
.values(opts.results?.map((r) => ({ ...r, evalId, id: (0, crypto_1.randomUUID)() })))
|
|
124
|
+
.run();
|
|
125
|
+
logger_1.default.debug(`Inserted ${res.changes} eval results`);
|
|
126
|
+
}
|
|
127
|
+
for (const prompt of renderedPrompts) {
|
|
128
|
+
const label = prompt.label || prompt.display || prompt.raw;
|
|
129
|
+
const promptId = (0, utils_1.hashPrompt)(prompt);
|
|
130
|
+
tx.insert(tables_1.prompts)
|
|
131
|
+
.values({
|
|
132
|
+
id: promptId,
|
|
133
|
+
prompt: label,
|
|
134
|
+
})
|
|
135
|
+
.onConflictDoNothing()
|
|
136
|
+
.run();
|
|
137
|
+
tx.insert(tables_1.evalsToPrompts)
|
|
138
|
+
.values({
|
|
139
|
+
evalId,
|
|
140
|
+
promptId,
|
|
141
|
+
})
|
|
142
|
+
.onConflictDoNothing()
|
|
143
|
+
.run();
|
|
144
|
+
logger_1.default.debug(`Inserting prompt ${promptId}`);
|
|
145
|
+
}
|
|
146
|
+
// Record dataset relation
|
|
147
|
+
const datasetId = (0, createHash_1.sha256)(JSON.stringify(config.tests || []));
|
|
148
|
+
tx.insert(tables_1.datasets)
|
|
149
|
+
.values({
|
|
150
|
+
id: datasetId,
|
|
151
|
+
tests: config.tests,
|
|
152
|
+
})
|
|
153
|
+
.onConflictDoNothing()
|
|
154
|
+
.run();
|
|
155
|
+
tx.insert(tables_1.evalsToDatasets)
|
|
156
|
+
.values({
|
|
157
|
+
evalId,
|
|
158
|
+
datasetId,
|
|
159
|
+
})
|
|
160
|
+
.onConflictDoNothing()
|
|
161
|
+
.run();
|
|
162
|
+
logger_1.default.debug(`Inserting dataset ${datasetId}`);
|
|
163
|
+
// Record tags
|
|
164
|
+
if (config.tags) {
|
|
165
|
+
for (const [tagKey, tagValue] of Object.entries(config.tags)) {
|
|
166
|
+
const tagId = (0, createHash_1.sha256)(`${tagKey}:${tagValue}`);
|
|
167
|
+
tx.insert(tables_1.tags)
|
|
168
|
+
.values({
|
|
169
|
+
id: tagId,
|
|
170
|
+
name: tagKey,
|
|
171
|
+
value: tagValue,
|
|
172
|
+
})
|
|
173
|
+
.onConflictDoNothing()
|
|
174
|
+
.run();
|
|
175
|
+
tx.insert(tables_1.evalsToTags)
|
|
176
|
+
.values({
|
|
177
|
+
evalId,
|
|
178
|
+
tagId,
|
|
179
|
+
})
|
|
180
|
+
.onConflictDoNothing()
|
|
181
|
+
.run();
|
|
182
|
+
logger_1.default.debug(`Inserting tag ${tagId}`);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
});
|
|
186
|
+
return new Eval(config, { id: evalId, author: opts?.author, createdAt, persisted: true });
|
|
187
|
+
}
|
|
188
|
+
constructor(config, opts) {
|
|
189
|
+
const createdAt = opts?.createdAt || new Date();
|
|
190
|
+
this.createdAt = createdAt.getTime();
|
|
191
|
+
this.id = opts?.id || createEvalId(createdAt);
|
|
192
|
+
this.author = opts?.author;
|
|
193
|
+
this.config = config;
|
|
194
|
+
this.results = [];
|
|
195
|
+
this.prompts = opts?.prompts || [];
|
|
196
|
+
this.datasetId = opts?.datasetId;
|
|
197
|
+
this.persisted = opts?.persisted || false;
|
|
198
|
+
}
|
|
199
|
+
version() {
|
|
200
|
+
/**
|
|
201
|
+
* Version 3 is the denormalized version of where the table and results are stored on the eval object.
|
|
202
|
+
* Version 4 is the normalized version where the results are stored in another databse table and the table for vizualization is generated by the app.
|
|
203
|
+
*/
|
|
204
|
+
return this.oldResults && 'table' in this.oldResults ? 3 : 4;
|
|
205
|
+
}
|
|
206
|
+
useOldResults() {
|
|
207
|
+
return this.version() < 4;
|
|
208
|
+
}
|
|
209
|
+
setTable(table) {
|
|
210
|
+
(0, tiny_invariant_1.default)(this.version() < 4, 'Eval is not version 3');
|
|
211
|
+
(0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
|
|
212
|
+
this.oldResults.table = table;
|
|
213
|
+
}
|
|
214
|
+
async save() {
|
|
215
|
+
const db = (0, database_1.getDb)();
|
|
216
|
+
const updateObj = {
|
|
217
|
+
config: this.config,
|
|
218
|
+
prompts: this.prompts,
|
|
219
|
+
description: this.config.description,
|
|
220
|
+
author: this.author,
|
|
221
|
+
updatedAt: (0, time_1.getCurrentTimestamp)(),
|
|
222
|
+
};
|
|
223
|
+
if (this.useOldResults()) {
|
|
224
|
+
(0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
|
|
225
|
+
updateObj.results = this.oldResults;
|
|
226
|
+
}
|
|
227
|
+
await db.update(tables_1.evals).set(updateObj).where((0, drizzle_orm_1.eq)(tables_1.evals.id, this.id)).run();
|
|
228
|
+
this.persisted = true;
|
|
229
|
+
}
|
|
230
|
+
async getVars() {
|
|
231
|
+
if (this.useOldResults()) {
|
|
232
|
+
(0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
|
|
233
|
+
return this.oldResults.table?.head.vars || [];
|
|
234
|
+
}
|
|
235
|
+
const db = (0, database_1.getDb)();
|
|
236
|
+
const query = (0, drizzle_orm_1.sql) `SELECT DISTINCT j.key from (SELECT json_extract(test_case_results.test_case, '$.vars') as vars
|
|
237
|
+
FROM test_case_results where test_case_results.eval_id = ${this.id}) t, json_each(t.vars) j;`;
|
|
238
|
+
// @ts-ignore
|
|
239
|
+
const results = await db.all(query);
|
|
240
|
+
return results.map((r) => r.key) || [];
|
|
241
|
+
}
|
|
242
|
+
getPrompts() {
|
|
243
|
+
if (this.useOldResults()) {
|
|
244
|
+
(0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
|
|
245
|
+
return this.oldResults.table?.head.prompts || [];
|
|
246
|
+
}
|
|
247
|
+
return this.prompts;
|
|
248
|
+
}
|
|
249
|
+
async getTable() {
|
|
250
|
+
if (this.useOldResults()) {
|
|
251
|
+
return this.oldResults?.table || { head: { prompts: [], vars: [] }, body: [] };
|
|
252
|
+
}
|
|
253
|
+
return (0, convertEvalResultsToTable_1.convertResultsToTable)(await this.toResultsFile());
|
|
254
|
+
}
|
|
255
|
+
async addResult(result, test) {
|
|
256
|
+
const newResult = await evalResult_1.default.createFromEvaluateResult(this.id, result, test, {
|
|
257
|
+
persist: this.persisted,
|
|
258
|
+
});
|
|
259
|
+
this.results.push(newResult);
|
|
260
|
+
}
|
|
261
|
+
async addPrompts(prompts) {
|
|
262
|
+
this.prompts = prompts;
|
|
263
|
+
if (this.persisted) {
|
|
264
|
+
const db = (0, database_1.getDb)();
|
|
265
|
+
await db.update(tables_1.evals).set({ prompts }).where((0, drizzle_orm_1.eq)(tables_1.evals.id, this.id)).run();
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
async addProviders(providers) {
|
|
269
|
+
if (this.persisted) {
|
|
270
|
+
const db = (0, database_1.getDb)();
|
|
271
|
+
await db.transaction(async (tx) => {
|
|
272
|
+
for (const provider of providers) {
|
|
273
|
+
const id = provider.id;
|
|
274
|
+
tx.insert(tables_1.evalsToProviders)
|
|
275
|
+
.values({
|
|
276
|
+
evalId: this.id,
|
|
277
|
+
providerId: id,
|
|
278
|
+
})
|
|
279
|
+
.onConflictDoNothing()
|
|
280
|
+
.run();
|
|
281
|
+
}
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
async loadResults() {
|
|
286
|
+
this.results = await evalResult_1.default.findManyByEvalId(this.id);
|
|
287
|
+
}
|
|
288
|
+
async getResults() {
|
|
289
|
+
if (this.useOldResults()) {
|
|
290
|
+
(0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
|
|
291
|
+
return this.oldResults.results;
|
|
292
|
+
}
|
|
293
|
+
await this.loadResults();
|
|
294
|
+
return this.results;
|
|
295
|
+
}
|
|
296
|
+
async toEvaluateSummary() {
|
|
297
|
+
if (this.useOldResults()) {
|
|
298
|
+
(0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
|
|
299
|
+
return {
|
|
300
|
+
version: 2,
|
|
301
|
+
timestamp: new Date(this.createdAt).toISOString(),
|
|
302
|
+
results: this.oldResults.results,
|
|
303
|
+
table: this.oldResults.table,
|
|
304
|
+
stats: this.oldResults.stats,
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
if (this.results.length === 0) {
|
|
308
|
+
await this.loadResults();
|
|
309
|
+
}
|
|
310
|
+
const stats = {
|
|
311
|
+
successes: 0,
|
|
312
|
+
failures: 0,
|
|
313
|
+
tokenUsage: {
|
|
314
|
+
cached: 0,
|
|
315
|
+
completion: 0,
|
|
316
|
+
prompt: 0,
|
|
317
|
+
total: 0,
|
|
318
|
+
},
|
|
319
|
+
};
|
|
320
|
+
for (const prompt of this.prompts) {
|
|
321
|
+
stats.successes += prompt.metrics?.testPassCount || 0;
|
|
322
|
+
stats.failures += prompt.metrics?.testFailCount || 0;
|
|
323
|
+
stats.tokenUsage.prompt += prompt.metrics?.tokenUsage.prompt || 0;
|
|
324
|
+
stats.tokenUsage.cached += prompt.metrics?.tokenUsage.cached || 0;
|
|
325
|
+
stats.tokenUsage.completion += prompt.metrics?.tokenUsage.completion || 0;
|
|
326
|
+
stats.tokenUsage.total += prompt.metrics?.tokenUsage.total || 0;
|
|
327
|
+
}
|
|
328
|
+
return {
|
|
329
|
+
version: 3,
|
|
330
|
+
timestamp: new Date(this.createdAt).toISOString(),
|
|
331
|
+
prompts: this.prompts,
|
|
332
|
+
results: this.results.map((r) => r.toEvaluateResult()),
|
|
333
|
+
stats,
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
async toResultsFile() {
|
|
337
|
+
const results = {
|
|
338
|
+
version: this.version(),
|
|
339
|
+
createdAt: new Date(this.createdAt).toISOString(),
|
|
340
|
+
results: await this.toEvaluateSummary(),
|
|
341
|
+
config: this.config,
|
|
342
|
+
author: this.author || null,
|
|
343
|
+
prompts: this.getPrompts(),
|
|
344
|
+
datasetId: this.datasetId || null,
|
|
345
|
+
};
|
|
346
|
+
return results;
|
|
347
|
+
}
|
|
348
|
+
async delete() {
|
|
349
|
+
const db = (0, database_1.getDb)();
|
|
350
|
+
await db.transaction(() => {
|
|
351
|
+
db.delete(tables_1.evalsToDatasets).where((0, drizzle_orm_1.eq)(tables_1.evalsToDatasets.evalId, this.id)).run();
|
|
352
|
+
db.delete(tables_1.evalsToPrompts).where((0, drizzle_orm_1.eq)(tables_1.evalsToPrompts.evalId, this.id)).run();
|
|
353
|
+
db.delete(tables_1.evalsToTags).where((0, drizzle_orm_1.eq)(tables_1.evalsToTags.evalId, this.id)).run();
|
|
354
|
+
db.delete(tables_1.evalsToProviders).where((0, drizzle_orm_1.eq)(tables_1.evalsToProviders.evalId, this.id)).run();
|
|
355
|
+
db.delete(tables_1.evalResultsTable).where((0, drizzle_orm_1.eq)(tables_1.evalResultsTable.evalId, this.id)).run();
|
|
356
|
+
db.delete(tables_1.evals).where((0, drizzle_orm_1.eq)(tables_1.evals.id, this.id)).run();
|
|
357
|
+
});
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
exports.default = Eval;
|
|
361
|
+
async function getSummaryofLatestEvals(limit = constants_1.DEFAULT_QUERY_LIMIT, filterDescription, datasetId) {
|
|
362
|
+
const db = (0, database_1.getDb)();
|
|
363
|
+
const startTime = performance.now();
|
|
364
|
+
const query = db
|
|
365
|
+
.select({
|
|
366
|
+
evalId: tables_1.evals.id,
|
|
367
|
+
createdAt: tables_1.evals.createdAt,
|
|
368
|
+
description: tables_1.evals.description,
|
|
369
|
+
numTests: (0, drizzle_orm_1.sql) `MAX(${tables_1.evalResultsTable.testIdx} + 1)`.as('numTests'),
|
|
370
|
+
datasetId: tables_1.evalsToDatasets.datasetId,
|
|
371
|
+
})
|
|
372
|
+
.from(tables_1.evals)
|
|
373
|
+
.leftJoin(tables_1.evalsToDatasets, (0, drizzle_orm_1.eq)(tables_1.evals.id, tables_1.evalsToDatasets.evalId))
|
|
374
|
+
.leftJoin(tables_1.evalResultsTable, (0, drizzle_orm_1.eq)(tables_1.evals.id, tables_1.evalResultsTable.evalId))
|
|
375
|
+
.where((0, drizzle_orm_1.and)(datasetId ? (0, drizzle_orm_1.eq)(tables_1.evalsToDatasets.datasetId, datasetId) : undefined, filterDescription ? (0, drizzle_orm_1.like)(tables_1.evals.description, `%${filterDescription}%`) : undefined, (0, drizzle_orm_1.eq)(tables_1.evals.results, {})))
|
|
376
|
+
.groupBy(tables_1.evals.id);
|
|
377
|
+
const results = query.orderBy((0, drizzle_orm_1.desc)(tables_1.evals.createdAt)).limit(limit).all();
|
|
378
|
+
const mappedResults = results.map((result) => ({
|
|
379
|
+
evalId: result.evalId,
|
|
380
|
+
createdAt: result.createdAt,
|
|
381
|
+
description: result.description,
|
|
382
|
+
numTests: result.numTests || 0,
|
|
383
|
+
datasetId: result.datasetId,
|
|
384
|
+
}));
|
|
385
|
+
const endTime = performance.now();
|
|
386
|
+
const executionTime = endTime - startTime;
|
|
387
|
+
logger_1.default.debug(`listPreviousResults execution time: ${executionTime.toFixed(2)}ms`);
|
|
388
|
+
return mappedResults;
|
|
389
|
+
}
|
|
390
|
+
//# sourceMappingURL=eval.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/models/eval.ts"],"names":[],"mappings":";;;;;;AAoCA,oCAEC;AAkaD,0DA0CC;AAlfD,mCAAoC;AACpC,6CAAuD;AACvD,oEAAuC;AACvC,4CAAmD;AACnD,0CAAoC;AACpC,+CAU4B;AAC5B,uDAA+B;AAC/B,4CAA8C;AAa9C,iFAA0E;AAC1E,mDAA4D;AAC5D,uCAAmD;AACnD,8DAAsC;AAGtC,SAAgB,YAAY,CAAC,YAAkB,IAAI,IAAI,EAAE;IACvD,OAAO,QAAQ,IAAA,2BAAc,EAAC,CAAC,CAAC,IAAI,SAAS,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;AAC7E,CAAC;AAED,MAAa,WAAW;IACtB,MAAM,CAAC,KAAK,CAAC,gBAAgB,CAAC,KAAa;QACzC,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,KAAK,GAAG,iBAAG,CAAC,GAAG,CACnB;sCACgC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,4BAA4B,CACpG,CAAC;QACF,aAAa;QACb,MAAM,OAAO,GAAuC,MAAM,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QACxE,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAA6B,EAAE,CAAC,EAAE,EAAE;YAC/D,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YACtC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAC3B,OAAO,GAAG,CAAC;QACb,CAAC,EAAE,EAAE,CAAC,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AAhBD,kCAgBC;AAED,MAAqB,IAAI;IAavB,MAAM,CAAC,KAAK,CAAC,MAAM;QACjB,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,UAAU,GAAG,MAAM,EAAE;aACxB,MAAM,CAAC;YACN,EAAE,EAAE,cAAU,CAAC,EAAE;SAClB,CAAC;aACD,IAAI,CAAC,cAAU,CAAC;aAChB,OAAO,CAAC,IAAA,kBAAI,EAAC,cAAU,CAAC,SAAS,CAAC,CAAC;aACnC,KAAK,CAAC,CAAC,CAAC,CAAC;QAEZ,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,OAAO,MAAM,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAU;QAC9B,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QAEnB,MAAM,EAAE,KAAK,EAAE,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;YAClE,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,cAAU,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,cAAU,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;YAC9E,MAAM,cAAc,GAAG,MAAM,EAAE;iBAC5B,MAAM,CAAC;gBACN,SAAS,EAAE,wBAAe,CAAC,SAAS;aACrC,CAAC;iBACD,IAAI,CAAC,wBAAe,CAAC;iBACrB,KAAK,CAAC,IAAA,gBAAE,EAAC,wBAAe,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;iBACrC,KAAK,CAAC,CAAC,CAAC,CAAC;YAEZ,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC;QACnC,CAAC,CAAC,CAAC;QAEH,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,SAAS,CAAC;QACnB,CAAC;QACD,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAEvB,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC;QAE/C,MAAM,YAAY,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAC1C,EAAE,EAAE,KAAK,CAAC,EAAE;YACZ,SAAS,EAAE,IAAI,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC;YACpC,MAAM,EAAE,KAAK,CAAC,MAAM,IAAI,SAAS;YACjC,WAAW,EAAE,KAAK,CAAC,WAAW,IAAI,SAAS;YAC3C,OAAO,EAAE,KAAK,CAAC,OAAO,IAAI,EAAE;YAC5B,SAAS;YACT,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,IAAI,KAAK,CAAC,OAAO,IAAI,OAAO,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;YAC9C,YAAY,CAAC,UAAU,GAAG,KAAK,CAAC,OAA4B,CAAC;QAC/D,CAAC;QAED,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,QAAgB,+BAAmB;QACtD,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,KAAK,GAAG,MAAM,EAAE;aACnB,MAAM,EAAE;aACR,IAAI,CAAC,cAAU,CAAC;aAChB,KAAK,CAAC,KAAK,CAAC;aACZ,OAAO,CAAC,IAAA,kBAAI,EAAC,cAAU,CAAC,SAAS,CAAC,CAAC;aACnC,GAAG,EAAE,CAAC;QACT,OAAO,KAAK,CAAC,GAAG,CACd,CAAC,CAAC,EAAE,EAAE,CACJ,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE;YACjB,EAAE,EAAE,CAAC,CAAC,EAAE;YACR,SAAS,EAAE,IAAI,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;YAChC,MAAM,EAAE,CAAC,CAAC,MAAM,IAAI,SAAS;YAC7B,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,SAAS;YACvC,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,EAAE;YACxB,SAAS,EAAE,IAAI;SAChB,CAAC,CACL,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,MAAM,CACjB,MAA8B,EAC9B,eAAyB,EAAE,uFAAuF;IAClH,IAMC;QAED,MAAM,SAAS,GAAG,IAAI,EAAE,SAAS,IAAI,IAAI,IAAI,EAAE,CAAC;QAChD,MAAM,MAAM,GAAG,IAAI,EAAE,EAAE,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC;QACnD,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,EAAE,CAAC,WAAW,CAAC,CAAC,EAAE,EAAE,EAAE;YAC1B,EAAE,CAAC,MAAM,CAAC,cAAU,CAAC;iBAClB,MAAM,CAAC;gBACN,EAAE,EAAE,MAAM;gBACV,SAAS,EAAE,SAAS,CAAC,OAAO,EAAE;gBAC9B,MAAM,EAAE,IAAI,EAAE,MAAM;gBACpB,WAAW,EAAE,MAAM,CAAC,WAAW;gBAC/B,MAAM;gBACN,OAAO,EAAE,EAAE;aACZ,CAAC;iBACD,GAAG,EAAE,CAAC;YACT,IAAI,IAAI,EAAE,OAAO,EAAE,CAAC;gBAClB,MAAM,GAAG,GAAG,EAAE;qBACX,MAAM,CAAC,yBAAgB,CAAC;qBACxB,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,IAAA,mBAAU,GAAE,EAAE,CAAC,CAAC,CAAC;qBACtE,GAAG,EAAE,CAAC;gBACT,gBAAM,CAAC,KAAK,CAAC,YAAY,GAAG,CAAC,OAAO,eAAe,CAAC,CAAC;YACvD,CAAC;YAED,KAAK,MAAM,MAAM,IAAI,eAAe,EAAE,CAAC;gBACrC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,GAAG,CAAC;gBAC3D,MAAM,QAAQ,GAAG,IAAA,kBAAU,EAAC,MAAM,CAAC,CAAC;gBAEpC,EAAE,CAAC,MAAM,CAAC,gBAAY,CAAC;qBACpB,MAAM,CAAC;oBACN,EAAE,EAAE,QAAQ;oBACZ,MAAM,EAAE,KAAK;iBACd,CAAC;qBACD,mBAAmB,EAAE;qBACrB,GAAG,EAAE,CAAC;gBAET,EAAE,CAAC,MAAM,CAAC,uBAAc,CAAC;qBACtB,MAAM,CAAC;oBACN,MAAM;oBACN,QAAQ;iBACT,CAAC;qBACD,mBAAmB,EAAE;qBACrB,GAAG,EAAE,CAAC;gBAET,gBAAM,CAAC,KAAK,CAAC,oBAAoB,QAAQ,EAAE,CAAC,CAAC;YAC/C,CAAC;YAED,0BAA0B;YAC1B,MAAM,SAAS,GAAG,IAAA,mBAAM,EAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,CAAC;YAC7D,EAAE,CAAC,MAAM,CAAC,iBAAQ,CAAC;iBAChB,MAAM,CAAC;gBACN,EAAE,EAAE,SAAS;gBACb,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC;iBACD,mBAAmB,EAAE;iBACrB,GAAG,EAAE,CAAC;YAET,EAAE,CAAC,MAAM,CAAC,wBAAe,CAAC;iBACvB,MAAM,CAAC;gBACN,MAAM;gBACN,SAAS;aACV,CAAC;iBACD,mBAAmB,EAAE;iBACrB,GAAG,EAAE,CAAC;YAET,gBAAM,CAAC,KAAK,CAAC,qBAAqB,SAAS,EAAE,CAAC,CAAC;YAE/C,cAAc;YACd,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;gBAChB,KAAK,MAAM,CAAC,MAAM,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC7D,MAAM,KAAK,GAAG,IAAA,mBAAM,EAAC,GAAG,MAAM,IAAI,QAAQ,EAAE,CAAC,CAAC;oBAE9C,EAAE,CAAC,MAAM,CAAC,aAAS,CAAC;yBACjB,MAAM,CAAC;wBACN,EAAE,EAAE,KAAK;wBACT,IAAI,EAAE,MAAM;wBACZ,KAAK,EAAE,QAAQ;qBAChB,CAAC;yBACD,mBAAmB,EAAE;yBACrB,GAAG,EAAE,CAAC;oBAET,EAAE,CAAC,MAAM,CAAC,oBAAW,CAAC;yBACnB,MAAM,CAAC;wBACN,MAAM;wBACN,KAAK;qBACN,CAAC;yBACD,mBAAmB,EAAE;yBACrB,GAAG,EAAE,CAAC;oBAET,gBAAM,CAAC,KAAK,CAAC,iBAAiB,KAAK,EAAE,CAAC,CAAC;gBACzC,CAAC;YACH,CAAC;QACH,CAAC,CAAC,CAAC;QACH,OAAO,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5F,CAAC;IAED,YACE,MAA8B,EAC9B,IAQC;QAED,MAAM,SAAS,GAAG,IAAI,EAAE,SAAS,IAAI,IAAI,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC;QACrC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,EAAE,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC;QAC9C,IAAI,CAAC,MAAM,GAAG,IAAI,EAAE,MAAM,CAAC;QAC3B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC;QAClB,IAAI,CAAC,OAAO,GAAG,IAAI,EAAE,OAAO,IAAI,EAAE,CAAC;QACnC,IAAI,CAAC,SAAS,GAAG,IAAI,EAAE,SAAS,CAAC;QACjC,IAAI,CAAC,SAAS,GAAG,IAAI,EAAE,SAAS,IAAI,KAAK,CAAC;IAC5C,CAAC;IAED,OAAO;QACL;;;WAGG;QACH,OAAO,IAAI,CAAC,UAAU,IAAI,OAAO,IAAI,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,CAAC;IAED,aAAa;QACX,OAAO,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IAC5B,CAAC;IAED,QAAQ,CAAC,KAAoB;QAC3B,IAAA,wBAAS,EAAC,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,EAAE,uBAAuB,CAAC,CAAC;QACvD,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;QACpD,IAAI,CAAC,UAAU,CAAC,KAAK,GAAG,KAAK,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,SAAS,GAAwB;YACrC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;YACpC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,SAAS,EAAE,IAAA,0BAAmB,GAAE;SACjC,CAAC;QAEF,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,SAAS,CAAC,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC;QACtC,CAAC;QACD,MAAM,EAAE,CAAC,MAAM,CAAC,cAAU,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,cAAU,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QACnF,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QAChD,CAAC;QACD,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,KAAK,GAAG,IAAA,iBAAG,EAAA;+DAC0C,IAAI,CAAC,EAAE,2BAA2B,CAAC;QAC9F,aAAa;QACb,MAAM,OAAO,GAAsB,MAAM,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAEvD,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;IACzC,CAAC;IAED,UAAU;QACR,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC;QACnD,CAAC;QACD,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IAED,KAAK,CAAC,QAAQ;QACZ,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC,UAAU,EAAE,KAAK,IAAI,EAAE,IAAI,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;QACjF,CAAC;QACD,OAAO,IAAA,iDAAqB,EAAC,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC;IAC3D,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,MAAsB,EAAE,IAAoB;QAC1D,MAAM,SAAS,GAAG,MAAM,oBAAU,CAAC,wBAAwB,CAAC,IAAI,CAAC,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE;YACjF,OAAO,EAAE,IAAI,CAAC,SAAS;SACxB,CAAC,CAAC;QACH,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,OAA0B;QACzC,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;YACnB,MAAM,EAAE,CAAC,MAAM,CAAC,cAAU,CAAC,CAAC,GAAG,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,cAAU,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QACvF,CAAC;IACH,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,SAAqB;QACtC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;YACnB,MAAM,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;gBAChC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;oBACjC,MAAM,EAAE,GAAG,QAAQ,CAAC,EAAE,CAAC;oBACvB,EAAE,CAAC,MAAM,CAAC,yBAAgB,CAAC;yBACxB,MAAM,CAAC;wBACN,MAAM,EAAE,IAAI,CAAC,EAAE;wBACf,UAAU,EAAE,EAAE;qBACf,CAAC;yBACD,mBAAmB,EAAE;yBACrB,GAAG,EAAE,CAAC;gBACX,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW;QACf,IAAI,CAAC,OAAO,GAAG,MAAM,oBAAU,CAAC,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC5D,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,OAAO,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC;QACjC,CAAC;QACD,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IACD,KAAK,CAAC,iBAAiB;QACrB,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,OAAO;gBACL,OAAO,EAAE,CAAC;gBACV,SAAS,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE;gBACjD,OAAO,EAAE,IAAI,CAAC,UAAU,CAAC,OAAO;gBAChC,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK;gBAC5B,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK;aAC7B,CAAC;QACJ,CAAC;QACD,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC9B,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;QAC3B,CAAC;QACD,MAAM,KAAK,GAAkB;YAC3B,SAAS,EAAE,CAAC;YACZ,QAAQ,EAAE,CAAC;YACX,UAAU,EAAE;gBACV,MAAM,EAAE,CAAC;gBACT,UAAU,EAAE,CAAC;gBACb,MAAM,EAAE,CAAC;gBACT,KAAK,EAAE,CAAC;aACT;SACF,CAAC;QAEF,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAClC,KAAK,CAAC,SAAS,IAAI,MAAM,CAAC,OAAO,EAAE,aAAa,IAAI,CAAC,CAAC;YACtD,KAAK,CAAC,QAAQ,IAAI,MAAM,CAAC,OAAO,EAAE,aAAa,IAAI,CAAC,CAAC;YACrD,KAAK,CAAC,UAAU,CAAC,MAAM,IAAI,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,MAAM,IAAI,CAAC,CAAC;YAClE,KAAK,CAAC,UAAU,CAAC,MAAM,IAAI,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,MAAM,IAAI,CAAC,CAAC;YAClE,KAAK,CAAC,UAAU,CAAC,UAAU,IAAI,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,UAAU,IAAI,CAAC,CAAC;YAC1E,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,KAAK,IAAI,CAAC,CAAC;QAClE,CAAC;QAED,OAAO;YACL,OAAO,EAAE,CAAC;YACV,SAAS,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE;YACjD,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,gBAAgB,EAAE,CAAC;YACtD,KAAK;SACN,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,aAAa;QACjB,MAAM,OAAO,GAAgB;YAC3B,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE;YACvB,SAAS,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE;YACjD,OAAO,EAAE,MAAM,IAAI,CAAC,iBAAiB,EAAE;YACvC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,IAAI;YAC3B,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE;YAC1B,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI;SAClC,CAAC;QAEF,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,MAAM;QACV,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;YACxB,EAAE,CAAC,MAAM,CAAC,wBAAe,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,wBAAe,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YAC5E,EAAE,CAAC,MAAM,CAAC,uBAAc,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,uBAAc,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YAC1E,EAAE,CAAC,MAAM,CAAC,oBAAW,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,oBAAW,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YACpE,EAAE,CAAC,MAAM,CAAC,yBAAgB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,yBAAgB,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YAC9E,EAAE,CAAC,MAAM,CAAC,yBAAgB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,yBAAgB,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YAC9E,EAAE,CAAC,MAAM,CAAC,cAAU,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,cAAU,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QAChE,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AA5YD,uBA4YC;AAEM,KAAK,UAAU,uBAAuB,CAC3C,QAAgB,+BAAmB,EACnC,iBAA0B,EAC1B,SAAkB;IAElB,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;IACnB,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,EAAE;SACb,MAAM,CAAC;QACN,MAAM,EAAE,cAAU,CAAC,EAAE;QACrB,SAAS,EAAE,cAAU,CAAC,SAAS;QAC/B,WAAW,EAAE,cAAU,CAAC,WAAW;QACnC,QAAQ,EAAE,IAAA,iBAAG,EAAA,OAAO,yBAAgB,CAAC,OAAO,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC;QAClE,SAAS,EAAE,wBAAe,CAAC,SAAS;KACrC,CAAC;SACD,IAAI,CAAC,cAAU,CAAC;SAChB,QAAQ,CAAC,wBAAe,EAAE,IAAA,gBAAE,EAAC,cAAU,CAAC,EAAE,EAAE,wBAAe,CAAC,MAAM,CAAC,CAAC;SACpE,QAAQ,CAAC,yBAAgB,EAAE,IAAA,gBAAE,EAAC,cAAU,CAAC,EAAE,EAAE,yBAAgB,CAAC,MAAM,CAAC,CAAC;SACtE,KAAK,CACJ,IAAA,iBAAG,EACD,SAAS,CAAC,CAAC,CAAC,IAAA,gBAAE,EAAC,wBAAe,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EAChE,iBAAiB,CAAC,CAAC,CAAC,IAAA,kBAAI,EAAC,cAAU,CAAC,WAAW,EAAE,IAAI,iBAAiB,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,EACtF,IAAA,gBAAE,EAAC,cAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAC3B,CACF;SACA,OAAO,CAAC,cAAU,CAAC,EAAE,CAAC,CAAC;IAE1B,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,IAAA,kBAAI,EAAC,cAAU,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,CAAC;IAE7E,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAC7C,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,WAAW,EAAE,MAAM,CAAC,WAAW;QAC/B,QAAQ,EAAG,MAAM,CAAC,QAAmB,IAAI,CAAC;QAC1C,SAAS,EAAE,MAAM,CAAC,SAAS;KAC5B,CAAC,CAAC,CAAC;IAEJ,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAClC,MAAM,aAAa,GAAG,OAAO,GAAG,SAAS,CAAC;IAC1C,gBAAM,CAAC,KAAK,CAAC,uCAAuC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAElF,OAAO,aAAa,CAAC;AACvB,CAAC"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import type { AtomicTestCase, GradingResult, Prompt, ProviderOptions, ProviderResponse } from '../types';
|
|
2
|
+
import { type EvaluateResult } from '../types';
|
|
3
|
+
export default class EvalResult {
|
|
4
|
+
static createFromEvaluateResult(evalId: string, result: EvaluateResult, testCase: AtomicTestCase, opts?: {
|
|
5
|
+
persist: boolean;
|
|
6
|
+
}): Promise<EvalResult>;
|
|
7
|
+
static createManyFromEvaluateResult(results: EvaluateResult[], evalId: string): Promise<EvalResult[]>;
|
|
8
|
+
static findById(id: string): Promise<EvalResult | null>;
|
|
9
|
+
static findManyByEvalId(evalId: string): Promise<EvalResult[]>;
|
|
10
|
+
id: string;
|
|
11
|
+
evalId: string;
|
|
12
|
+
description?: string | null;
|
|
13
|
+
promptIdx: number;
|
|
14
|
+
testIdx: number;
|
|
15
|
+
testCase: AtomicTestCase;
|
|
16
|
+
prompt: Prompt;
|
|
17
|
+
promptId: string;
|
|
18
|
+
error?: string | null;
|
|
19
|
+
success: boolean;
|
|
20
|
+
score: number;
|
|
21
|
+
response: ProviderResponse | null;
|
|
22
|
+
gradingResult: GradingResult | null;
|
|
23
|
+
namedScores: Record<string, number>;
|
|
24
|
+
provider: ProviderOptions;
|
|
25
|
+
latencyMs: number;
|
|
26
|
+
cost: number;
|
|
27
|
+
persisted: boolean;
|
|
28
|
+
constructor(opts: {
|
|
29
|
+
id: string;
|
|
30
|
+
evalId: string;
|
|
31
|
+
promptIdx: number;
|
|
32
|
+
testIdx: number;
|
|
33
|
+
testCase: AtomicTestCase;
|
|
34
|
+
prompt: Prompt;
|
|
35
|
+
promptId?: string | null;
|
|
36
|
+
error?: string | null;
|
|
37
|
+
success: boolean;
|
|
38
|
+
score: number;
|
|
39
|
+
response: ProviderResponse | null;
|
|
40
|
+
gradingResult: GradingResult | null;
|
|
41
|
+
namedScores?: Record<string, number> | null;
|
|
42
|
+
provider: ProviderOptions;
|
|
43
|
+
latencyMs?: number | null;
|
|
44
|
+
cost?: number | null;
|
|
45
|
+
persisted?: boolean;
|
|
46
|
+
});
|
|
47
|
+
save(): Promise<void>;
|
|
48
|
+
toEvaluateResult(): EvaluateResult;
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=evalResult.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evalResult.d.ts","sourceRoot":"","sources":["../../../src/models/evalResult.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EACV,cAAc,EACd,aAAa,EACb,MAAM,EACN,eAAe,EACf,gBAAgB,EACjB,MAAM,UAAU,CAAC;AAClB,OAAO,EAAE,KAAK,cAAc,EAAE,MAAM,UAAU,CAAC;AAG/C,MAAM,CAAC,OAAO,OAAO,UAAU;WAChB,wBAAwB,CACnC,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,cAAc,EACtB,QAAQ,EAAE,cAAc,EACxB,IAAI,CAAC,EAAE;QAAE,OAAO,EAAE,OAAO,CAAA;KAAE;WAgChB,4BAA4B,CAAC,OAAO,EAAE,cAAc,EAAE,EAAE,MAAM,EAAE,MAAM;WAetE,QAAQ,CAAC,EAAE,EAAE,MAAM;WAMnB,gBAAgB,CAAC,MAAM,EAAE,MAAM;IAS5C,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,cAAc,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAClC,aAAa,EAAE,aAAa,GAAG,IAAI,CAAC;IACpC,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACpC,QAAQ,EAAE,eAAe,CAAC;IAC1B,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,OAAO,CAAC;gBAEP,IAAI,EAAE;QAChB,EAAE,EAAE,MAAM,CAAC;QACX,MAAM,EAAE,MAAM,CAAC;QACf,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE,cAAc,CAAC;QACzB,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACzB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACtB,OAAO,EAAE,OAAO,CAAC;QACjB,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,gBAAgB,GAAG,IAAI,CAAC;QAClC,aAAa,EAAE,aAAa,GAAG,IAAI,CAAC;QACpC,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC;QAC5C,QAAQ,EAAE,eAAe,CAAC;QAC1B,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC1B,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACrB,SAAS,CAAC,EAAE,OAAO,CAAC;KACrB;IAqBK,IAAI;IAeV,gBAAgB,IAAI,cAAc;CAqBnC"}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const crypto_1 = require("crypto");
|
|
4
|
+
const drizzle_orm_1 = require("drizzle-orm");
|
|
5
|
+
const database_1 = require("../database");
|
|
6
|
+
const tables_1 = require("../database/tables");
|
|
7
|
+
const utils_1 = require("../prompts/utils");
|
|
8
|
+
const time_1 = require("../util/time");
|
|
9
|
+
class EvalResult {
|
|
10
|
+
static async createFromEvaluateResult(evalId, result, testCase, opts) {
|
|
11
|
+
const persist = opts?.persist == null ? true : opts.persist;
|
|
12
|
+
const { prompt, error, score, latencyMs, success, provider, gradingResult, namedScores, cost } = result;
|
|
13
|
+
const args = {
|
|
14
|
+
id: (0, crypto_1.randomUUID)(),
|
|
15
|
+
evalId,
|
|
16
|
+
testCase,
|
|
17
|
+
promptIdx: result.promptIdx,
|
|
18
|
+
testIdx: result.testIdx,
|
|
19
|
+
prompt,
|
|
20
|
+
promptId: (0, utils_1.hashPrompt)(prompt),
|
|
21
|
+
error: error?.toString(),
|
|
22
|
+
success,
|
|
23
|
+
score: score == null ? 0 : score,
|
|
24
|
+
response: result.response || null,
|
|
25
|
+
gradingResult: gradingResult || null,
|
|
26
|
+
namedScores,
|
|
27
|
+
provider,
|
|
28
|
+
latencyMs,
|
|
29
|
+
cost,
|
|
30
|
+
};
|
|
31
|
+
if (persist) {
|
|
32
|
+
const db = (0, database_1.getDb)();
|
|
33
|
+
const dbResult = await db.insert(tables_1.evalResultsTable).values(args).returning();
|
|
34
|
+
return new EvalResult({ ...dbResult[0], persisted: true });
|
|
35
|
+
}
|
|
36
|
+
return new EvalResult(args);
|
|
37
|
+
}
|
|
38
|
+
static async createManyFromEvaluateResult(results, evalId) {
|
|
39
|
+
const db = (0, database_1.getDb)();
|
|
40
|
+
const returnResults = [];
|
|
41
|
+
await db.transaction(async (tx) => {
|
|
42
|
+
for (const result of results) {
|
|
43
|
+
const dbResult = await tx
|
|
44
|
+
.insert(tables_1.evalResultsTable)
|
|
45
|
+
.values({ ...result, evalId, id: (0, crypto_1.randomUUID)() })
|
|
46
|
+
.returning();
|
|
47
|
+
returnResults.push(new EvalResult({ ...dbResult[0], persisted: true }));
|
|
48
|
+
}
|
|
49
|
+
});
|
|
50
|
+
return returnResults;
|
|
51
|
+
}
|
|
52
|
+
static async findById(id) {
|
|
53
|
+
const db = (0, database_1.getDb)();
|
|
54
|
+
const result = await db.select().from(tables_1.evalResultsTable).where((0, drizzle_orm_1.eq)(tables_1.evalResultsTable.id, id));
|
|
55
|
+
return result.length > 0 ? new EvalResult({ ...result[0], persisted: true }) : null;
|
|
56
|
+
}
|
|
57
|
+
static async findManyByEvalId(evalId) {
|
|
58
|
+
const db = (0, database_1.getDb)();
|
|
59
|
+
const results = await db
|
|
60
|
+
.select()
|
|
61
|
+
.from(tables_1.evalResultsTable)
|
|
62
|
+
.where((0, drizzle_orm_1.eq)(tables_1.evalResultsTable.evalId, evalId));
|
|
63
|
+
return results.map((result) => new EvalResult({ ...result, persisted: true }));
|
|
64
|
+
}
|
|
65
|
+
constructor(opts) {
|
|
66
|
+
this.id = opts.id;
|
|
67
|
+
this.evalId = opts.evalId;
|
|
68
|
+
this.promptIdx = opts.promptIdx;
|
|
69
|
+
this.testIdx = opts.testIdx;
|
|
70
|
+
this.testCase = opts.testCase;
|
|
71
|
+
this.prompt = opts.prompt;
|
|
72
|
+
this.promptId = opts.promptId || (0, utils_1.hashPrompt)(opts.prompt);
|
|
73
|
+
this.error = opts.error;
|
|
74
|
+
this.score = opts.score;
|
|
75
|
+
this.success = opts.success;
|
|
76
|
+
this.response = opts.response;
|
|
77
|
+
this.gradingResult = opts.gradingResult;
|
|
78
|
+
this.namedScores = opts.namedScores || {};
|
|
79
|
+
this.provider = opts.provider;
|
|
80
|
+
this.latencyMs = opts.latencyMs || 0;
|
|
81
|
+
this.cost = opts.cost || 0;
|
|
82
|
+
this.persisted = opts.persisted || false;
|
|
83
|
+
}
|
|
84
|
+
async save() {
|
|
85
|
+
const db = (0, database_1.getDb)();
|
|
86
|
+
//check if this exists in the db
|
|
87
|
+
if (this.persisted) {
|
|
88
|
+
await db
|
|
89
|
+
.update(tables_1.evalResultsTable)
|
|
90
|
+
.set({ ...this, updatedAt: (0, time_1.getCurrentTimestamp)() })
|
|
91
|
+
.where((0, drizzle_orm_1.eq)(tables_1.evalResultsTable.id, this.id));
|
|
92
|
+
}
|
|
93
|
+
else {
|
|
94
|
+
const result = await db.insert(tables_1.evalResultsTable).values(this).returning();
|
|
95
|
+
this.id = result[0].id;
|
|
96
|
+
this.persisted = true;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
toEvaluateResult() {
|
|
100
|
+
return {
|
|
101
|
+
cost: this.cost,
|
|
102
|
+
description: this.description || undefined,
|
|
103
|
+
error: this.error || undefined,
|
|
104
|
+
gradingResult: this.gradingResult,
|
|
105
|
+
id: this.id,
|
|
106
|
+
latencyMs: this.latencyMs,
|
|
107
|
+
namedScores: this.namedScores,
|
|
108
|
+
prompt: this.prompt,
|
|
109
|
+
promptId: this.promptId,
|
|
110
|
+
promptIdx: this.promptIdx,
|
|
111
|
+
provider: { id: this.provider.id, label: this.provider.label },
|
|
112
|
+
response: this.response || undefined,
|
|
113
|
+
score: this.score,
|
|
114
|
+
success: this.success,
|
|
115
|
+
testCase: this.testCase,
|
|
116
|
+
testIdx: this.testIdx,
|
|
117
|
+
vars: this.testCase.vars || {},
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
exports.default = EvalResult;
|
|
122
|
+
//# sourceMappingURL=evalResult.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evalResult.js","sourceRoot":"","sources":["../../../src/models/evalResult.ts"],"names":[],"mappings":";;AAAA,mCAAoC;AACpC,6CAAiC;AACjC,0CAAoC;AACpC,+CAAsD;AACtD,4CAA8C;AAS9C,uCAAmD;AAEnD,MAAqB,UAAU;IAC7B,MAAM,CAAC,KAAK,CAAC,wBAAwB,CACnC,MAAc,EACd,MAAsB,EACtB,QAAwB,EACxB,IAA2B;QAE3B,MAAM,OAAO,GAAG,IAAI,EAAE,OAAO,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;QAC5D,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,WAAW,EAAE,IAAI,EAAE,GAC5F,MAAM,CAAC;QACT,MAAM,IAAI,GAAG;YACX,EAAE,EAAE,IAAA,mBAAU,GAAE;YAChB,MAAM;YACN,QAAQ;YACR,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,MAAM;YACN,QAAQ,EAAE,IAAA,kBAAU,EAAC,MAAM,CAAC;YAC5B,KAAK,EAAE,KAAK,EAAE,QAAQ,EAAE;YACxB,OAAO;YACP,KAAK,EAAE,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK;YAChC,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,IAAI;YACjC,aAAa,EAAE,aAAa,IAAI,IAAI;YACpC,WAAW;YACX,QAAQ;YACR,SAAS;YACT,IAAI;SACL,CAAC;QACF,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;YAEnB,MAAM,QAAQ,GAAG,MAAM,EAAE,CAAC,MAAM,CAAC,yBAAgB,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,CAAC;YAC5E,OAAO,IAAI,UAAU,CAAC,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7D,CAAC;QACD,OAAO,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,4BAA4B,CAAC,OAAyB,EAAE,MAAc;QACjF,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,aAAa,GAAiB,EAAE,CAAC;QACvC,MAAM,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;YAChC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,QAAQ,GAAG,MAAM,EAAE;qBACtB,MAAM,CAAC,yBAAgB,CAAC;qBACxB,MAAM,CAAC,EAAE,GAAG,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,IAAA,mBAAU,GAAE,EAAE,CAAC;qBAC/C,SAAS,EAAE,CAAC;gBACf,aAAa,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YAC1E,CAAC;QACH,CAAC,CAAC,CAAC;QACH,OAAO,aAAa,CAAC;IACvB,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAU;QAC9B,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,yBAAgB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,yBAAgB,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;QAC3F,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,UAAU,CAAC,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACtF,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,gBAAgB,CAAC,MAAc;QAC1C,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,OAAO,GAAG,MAAM,EAAE;aACrB,MAAM,EAAE;aACR,IAAI,CAAC,yBAAgB,CAAC;aACtB,KAAK,CAAC,IAAA,gBAAE,EAAC,yBAAgB,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;QAC9C,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,UAAU,CAAC,EAAE,GAAG,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IACjF,CAAC;IAqBD,YAAY,IAkBX;QACC,IAAI,CAAC,EAAE,GAAG,IAAI,CAAC,EAAE,CAAC;QAClB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;QAE1B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QAChC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;QAC5B,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC9B,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;QAC1B,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAA,kBAAU,EAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACzD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;QACxB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;QACxB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;QAC5B,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC9B,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC,aAAa,CAAC;QACxC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;QAC1C,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC9B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,CAAC,CAAC;QACrC,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,CAAC,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC;IAC3C,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,gCAAgC;QAChC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,EAAE;iBACL,MAAM,CAAC,yBAAgB,CAAC;iBACxB,GAAG,CAAC,EAAE,GAAG,IAAI,EAAE,SAAS,EAAE,IAAA,0BAAmB,GAAE,EAAE,CAAC;iBAClD,KAAK,CAAC,IAAA,gBAAE,EAAC,yBAAgB,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,MAAM,CAAC,yBAAgB,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,CAAC;YAC1E,IAAI,CAAC,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YACvB,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACxB,CAAC;IACH,CAAC;IAED,gBAAgB;QACd,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,SAAS;YAC1C,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,SAAS;YAC9B,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,EAAE,EAAE,IAAI,CAAC,EAAE;YACX,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,QAAQ,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE;YAC9D,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,SAAS;YACpC,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,IAAI,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,EAAE;SAC/B,CAAC;IACJ,CAAC;CACF;AAjKD,6BAiKC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { ApiProvider } from '../types';
|
|
2
|
+
export default class Provider {
|
|
3
|
+
id: string;
|
|
4
|
+
providerId: string;
|
|
5
|
+
config: Record<string, any>;
|
|
6
|
+
static createMultiple(providers: ApiProvider[]): Promise<Provider[]>;
|
|
7
|
+
constructor(id: string, providerId: string, config: Record<string, any>);
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=provider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"provider.d.ts","sourceRoot":"","sources":["../../../src/models/provider.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAO5C,MAAM,CAAC,OAAO,OAAO,QAAQ;IA8BlB,EAAE,EAAE,MAAM;IACV,UAAU,EAAE,MAAM;IAClB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;WA/BvB,cAAc,CAAC,SAAS,EAAE,WAAW,EAAE;gBA6B3C,EAAE,EAAE,MAAM,EACV,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;CAMrC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const drizzle_orm_1 = require("drizzle-orm");
|
|
4
|
+
const database_1 = require("../database");
|
|
5
|
+
const tables_1 = require("../database/tables");
|
|
6
|
+
const createHash_1 = require("../util/createHash");
|
|
7
|
+
function getProviderId(provider) {
|
|
8
|
+
return (0, createHash_1.sha256)(provider.id() + JSON.stringify(provider.config));
|
|
9
|
+
}
|
|
10
|
+
class Provider {
|
|
11
|
+
static async createMultiple(providers) {
|
|
12
|
+
const db = (0, database_1.getDb)();
|
|
13
|
+
const ret = [];
|
|
14
|
+
for (const provider of providers) {
|
|
15
|
+
const id = getProviderId(provider);
|
|
16
|
+
let providerResult;
|
|
17
|
+
let results = await db.select().from(tables_1.providers).where((0, drizzle_orm_1.eq)(tables_1.providers.id, id));
|
|
18
|
+
if (results.length > 0) {
|
|
19
|
+
providerResult = results[0];
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
results = await db
|
|
23
|
+
.insert(tables_1.providers)
|
|
24
|
+
.values({
|
|
25
|
+
id,
|
|
26
|
+
providerId: provider.id(),
|
|
27
|
+
config: provider.config || {},
|
|
28
|
+
})
|
|
29
|
+
.onConflictDoNothing()
|
|
30
|
+
.returning();
|
|
31
|
+
providerResult = results[0];
|
|
32
|
+
}
|
|
33
|
+
ret.push(new Provider(providerResult.id, providerResult.providerId, providerResult.config));
|
|
34
|
+
}
|
|
35
|
+
return ret;
|
|
36
|
+
}
|
|
37
|
+
constructor(id, providerId, config) {
|
|
38
|
+
this.id = id;
|
|
39
|
+
this.providerId = providerId;
|
|
40
|
+
this.config = config;
|
|
41
|
+
this.id = id;
|
|
42
|
+
this.providerId = providerId;
|
|
43
|
+
this.config = config;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
exports.default = Provider;
|
|
47
|
+
//# sourceMappingURL=provider.js.map
|