promptfoo 0.91.3 → 0.92.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/drizzle/0006_harsh_caretaker.sql +42 -0
- package/dist/drizzle/0007_cloudy_wong.sql +1 -0
- package/dist/drizzle/meta/0006_snapshot.json +721 -0
- package/dist/drizzle/meta/0007_snapshot.json +723 -0
- package/dist/drizzle/meta/_journal.json +14 -0
- package/dist/package.json +10 -8
- package/dist/src/app/assets/{index-C6z1nbLN.js → index-BpjzEMiv.js} +243 -241
- package/dist/src/app/assets/{index.es-oqbvfIxR.js → index.es-ihzvEu35.js} +1 -1
- package/dist/src/app/assets/{sync-D2s75VlC.js → sync-BosjlpGJ.js} +1 -1
- package/dist/src/app/index.html +3 -3
- package/dist/src/assertions.js +2 -2
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/commands/cache.d.ts.map +1 -1
- package/dist/src/commands/cache.js +0 -2
- package/dist/src/commands/cache.js.map +1 -1
- package/dist/src/commands/eval.d.ts.map +1 -1
- package/dist/src/commands/eval.js +19 -16
- package/dist/src/commands/eval.js.map +1 -1
- package/dist/src/commands/export.d.ts.map +1 -1
- package/dist/src/commands/export.js +8 -31
- package/dist/src/commands/export.js.map +1 -1
- package/dist/src/commands/import.d.ts.map +1 -1
- package/dist/src/commands/import.js +52 -13
- package/dist/src/commands/import.js.map +1 -1
- package/dist/src/commands/list.d.ts.map +1 -1
- package/dist/src/commands/list.js +35 -7
- package/dist/src/commands/list.js.map +1 -1
- package/dist/src/commands/share.d.ts +2 -2
- package/dist/src/commands/share.d.ts.map +1 -1
- package/dist/src/commands/share.js +12 -13
- package/dist/src/commands/share.js.map +1 -1
- package/dist/src/commands/show.d.ts.map +1 -1
- package/dist/src/commands/show.js +10 -6
- package/dist/src/commands/show.js.map +1 -1
- package/dist/src/constants.d.ts +1 -0
- package/dist/src/constants.d.ts.map +1 -1
- package/dist/src/constants.js +2 -1
- package/dist/src/constants.js.map +1 -1
- package/dist/src/database/index.js +1 -1
- package/dist/src/database/index.js.map +1 -1
- package/dist/src/database/tables.d.ts +609 -11
- package/dist/src/database/tables.d.ts.map +1 -1
- package/dist/src/database/tables.js +111 -52
- package/dist/src/database/tables.js.map +1 -1
- package/dist/src/database/types.d.ts +3 -3
- package/dist/src/database/types.d.ts.map +1 -1
- package/dist/src/evaluator.d.ts +3 -2
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +75 -104
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/evaluatorHelpers.d.ts.map +1 -1
- package/dist/src/evaluatorHelpers.js +2 -1
- package/dist/src/evaluatorHelpers.js.map +1 -1
- package/dist/src/index.d.ts +2 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +18 -10
- package/dist/src/index.js.map +1 -1
- package/dist/src/models/eval.d.ts +95 -0
- package/dist/src/models/eval.d.ts.map +1 -0
- package/dist/src/models/eval.js +390 -0
- package/dist/src/models/eval.js.map +1 -0
- package/dist/src/models/evalResult.d.ts +50 -0
- package/dist/src/models/evalResult.d.ts.map +1 -0
- package/dist/src/models/evalResult.js +122 -0
- package/dist/src/models/evalResult.js.map +1 -0
- package/dist/src/models/provider.d.ts +9 -0
- package/dist/src/models/provider.d.ts.map +1 -0
- package/dist/src/models/provider.js +47 -0
- package/dist/src/models/provider.js.map +1 -0
- package/dist/src/prompts/index.d.ts.map +1 -1
- package/dist/src/prompts/index.js +2 -1
- package/dist/src/prompts/index.js.map +1 -1
- package/dist/src/prompts/utils.d.ts +1 -0
- package/dist/src/prompts/utils.d.ts.map +1 -1
- package/dist/src/prompts/utils.js +7 -0
- package/dist/src/prompts/utils.js.map +1 -1
- package/dist/src/providers/fal.d.ts +2 -2
- package/dist/src/providers/fal.d.ts.map +1 -1
- package/dist/src/providers/fal.js +2 -1
- package/dist/src/providers/fal.js.map +1 -1
- package/dist/src/providers/http.js +2 -2
- package/dist/src/providers/http.js.map +1 -1
- package/dist/src/providers/palm.d.ts +4 -3
- package/dist/src/providers/palm.d.ts.map +1 -1
- package/dist/src/providers/palm.js +13 -3
- package/dist/src/providers/palm.js.map +1 -1
- package/dist/src/providers.js +5 -5
- package/dist/src/providers.js.map +1 -1
- package/dist/src/redteam/eval/excessive-agency/llm_rubric-20240617.json +10 -0
- package/dist/src/redteam/eval/excessive-agency/llm_rubric-20240618.json +10 -0
- package/dist/src/redteam/eval/harmful/llm_rubric-20240723.json +10 -0
- package/dist/src/redteam/eval/harmful/llm_rubric-20240724.json +10 -0
- package/dist/src/server/server.d.ts +1 -0
- package/dist/src/server/server.d.ts.map +1 -1
- package/dist/src/server/server.js +70 -31
- package/dist/src/server/server.js.map +1 -1
- package/dist/src/share.d.ts +2 -2
- package/dist/src/share.d.ts.map +1 -1
- package/dist/src/share.js +93 -34
- package/dist/src/share.js.map +1 -1
- package/dist/src/table.d.ts +2 -2
- package/dist/src/table.d.ts.map +1 -1
- package/dist/src/table.js +3 -3
- package/dist/src/table.js.map +1 -1
- package/dist/src/types/index.d.ts +163 -11
- package/dist/src/types/index.d.ts.map +1 -1
- package/dist/src/types/index.js +21 -1
- package/dist/src/types/index.js.map +1 -1
- package/dist/src/util/config/load.d.ts.map +1 -1
- package/dist/src/util/config/load.js +2 -1
- package/dist/src/util/config/load.js.map +1 -1
- package/dist/src/util/config/manage.d.ts.map +1 -1
- package/dist/src/util/config/manage.js.map +1 -1
- package/dist/src/util/convertEvalResultsToTable.d.ts +16 -0
- package/dist/src/util/convertEvalResultsToTable.d.ts.map +1 -0
- package/dist/src/util/convertEvalResultsToTable.js +136 -0
- package/dist/src/util/convertEvalResultsToTable.js.map +1 -0
- package/dist/src/util/createHash.d.ts +1 -0
- package/dist/src/util/createHash.d.ts.map +1 -1
- package/dist/src/util/createHash.js +9 -0
- package/dist/src/util/createHash.js.map +1 -1
- package/dist/src/util/file.d.ts +8 -0
- package/dist/src/util/file.d.ts.map +1 -0
- package/dist/src/util/file.js +13 -0
- package/dist/src/util/file.js.map +1 -0
- package/dist/src/util/index.d.ts +9 -14
- package/dist/src/util/index.d.ts.map +1 -1
- package/dist/src/util/index.js +132 -268
- package/dist/src/util/index.js.map +1 -1
- package/dist/src/util/time.d.ts +2 -0
- package/dist/src/util/time.d.ts.map +1 -0
- package/dist/src/util/time.js +7 -0
- package/dist/src/util/time.js.map +1 -0
- package/dist/src/util/transform.js +2 -2
- package/dist/src/util/transform.js.map +1 -1
- package/dist/src/validators/providers.d.ts +6 -0
- package/dist/src/validators/providers.d.ts.map +1 -1
- package/dist/src/validators/providers.js +1 -0
- package/dist/src/validators/providers.js.map +1 -1
- package/dist/src/validators/redteam.d.ts +6 -0
- package/dist/src/validators/redteam.d.ts.map +1 -1
- package/dist/test/commands/eval/filterFailingTests.test.js +24 -2
- package/dist/test/commands/eval/filterFailingTests.test.js.map +1 -1
- package/dist/test/evaluator.test.js +152 -74
- package/dist/test/evaluator.test.js.map +1 -1
- package/dist/test/factories/data/eval/database_records.d.ts +142 -0
- package/dist/test/factories/data/eval/database_records.d.ts.map +1 -0
- package/dist/test/factories/data/eval/database_records.js +251 -0
- package/dist/test/factories/data/eval/database_records.js.map +1 -0
- package/dist/test/factories/evalFactory.d.ts +768 -0
- package/dist/test/factories/evalFactory.d.ts.map +1 -0
- package/dist/test/factories/evalFactory.js +121 -0
- package/dist/test/factories/evalFactory.js.map +1 -0
- package/dist/test/index.test.js +20 -35
- package/dist/test/index.test.js.map +1 -1
- package/dist/test/models/eval.test.d.ts +2 -0
- package/dist/test/models/eval.test.d.ts.map +1 -0
- package/dist/test/models/eval.test.js +34 -0
- package/dist/test/models/eval.test.js.map +1 -0
- package/dist/test/providers.test.js +3 -3
- package/dist/test/providers.test.js.map +1 -1
- package/dist/test/server/share.test.d.ts +2 -0
- package/dist/test/server/share.test.d.ts.map +1 -0
- package/dist/test/server/share.test.js +36 -0
- package/dist/test/server/share.test.js.map +1 -0
- package/dist/test/server/v3evalToShare.json +507 -0
- package/dist/test/server/v4evalToShare.json +421 -0
- package/dist/test/types.test.js +56 -3
- package/dist/test/types.test.js.map +1 -1
- package/dist/test/util.file.test.d.ts +2 -0
- package/dist/test/util.file.test.d.ts.map +1 -0
- package/dist/test/util.file.test.js +32 -0
- package/dist/test/util.file.test.js.map +1 -0
- package/dist/test/util.listPrevious.test.d.ts +2 -0
- package/dist/test/util.listPrevious.test.d.ts.map +1 -0
- package/dist/test/util.listPrevious.test.js +37 -0
- package/dist/test/util.listPrevious.test.js.map +1 -0
- package/dist/test/util.test.js +38 -311
- package/dist/test/util.test.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/package.json +10 -8
package/dist/src/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAwJ4B,4BAAQ;AAxJpC,oEAAuC;AACvC,8DAAsC;AAuJ7B,qBAvJF,oBAAU,CAuJE;AAtJnB,+CAAiC;AAsJZ,sBAAK;AArJ1B,2CAAqD;AACrD,uCAA4C;AAC5C,yDAAiC;AACjC,uCAA+D;AAC/D,yDAAyD;AAiJnB,oBAjJ/B,mBAAS,CAiJ+B;AAhJ/C,2CAA+C;AAC/C,4DAAgE;AAChE,0DAAoE;AACpE,+CAA4C;AAC5C,+CAA4C;AAC5C,iDAA8E;AAC9E,qDAAkD;AAClD,4DAAoC;AACpC,2CAAwC;AASxC,iCAAwE;AACxE,kDAAoD;AAEpD,0CAAwB;AAExB,iCAAwC;AAA/B,sGAAA,aAAa,OAAA;AAEtB,KAAK,UAAU,QAAQ,CAAC,SAA4B,EAAE,UAA2B,EAAE;IACjF,IAAI,SAAS,CAAC,kBAAkB,EAAE,CAAC;QACjC,MAAM,IAAA,yBAAe,GAAE,CAAC;IAC1B,CAAC;IAED,MAAM,oBAAoB,GAAc;QACtC,GAAG,SAAS;QACZ,SAAS,EAAE,SAAS,CAAC,SAAuB;QAC5C,SAAS,EAAE,MAAM,IAAA,4BAAgB,EAAC,SAAS,CAAC,SAAS,EAAE;YACrD,GAAG,EAAE,SAAS,CAAC,GAAG;SACnB,CAAC;QACF,KAAK,EAAE,MAAM,IAAA,qBAAS,EAAC,SAAS,CAAC,KAAK,CAAC;QAEvC,eAAe,EAAE,MAAM,IAAA,kBAAW,EAAC,SAAS,CAAC,eAAe,IAAI,EAAE,CAAC;QAEnE,wCAAwC;QACxC,OAAO,EAAE,CACP,MAAM,OAAO,CAAC,GAAG,CACf,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,WAAW,EAAE,EAAE;YAC1C,IAAI,OAAO,WAAW,KAAK,UAAU,EAAE,CAAC;gBACtC,OAAO;oBACL,GAAG,EAAE,WAAW,CAAC,QAAQ,EAAE;oBAC3B,KAAK,EAAE,WAAW,EAAE,IAAI,IAAI,WAAW,CAAC,QAAQ,EAAE;oBAClD,QAAQ,EAAE,WAA6B;iBACxC,CAAC;YACJ,CAAC;iBAAM,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC;gBAC3C,OAAO,IAAA,qBAAW,EAAC,WAAW,CAAC,CAAC;YAClC,CAAC;YACD,IAAI,CAAC;gBACH,OAAO,sBAAY,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YACzC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CACV,8CAA8C,KAAK,kDAAkD,CACtG,CAAC;gBACF,OAAO;oBACL,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;oBAChC,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;iBACnC,CAAC;YACJ,CAAC;QACH,CAAC,CAAC,CACH,CACF,CAAC,IAAI,EAAE;KACT,CAAC;IAEF,2BAA2B;IAC3B,KAAK,MAAM,IAAI,IAAI,oBAAoB,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;QACpD,IAAI,IAAI,CAAC,OAAO,EAAE,QAAQ,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;YAC1E,IAAI,CAAC,OAAO,CAAC,QAAQ,GAAG,MAAM,IAAA,2BAAe,EAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACvE,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBACpC,IAAI,SAAS,CAAC,IAAI,KAAK,YAAY,IAAI,OAAO,SAAS,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;oBAChF,SAAS;gBACX,CAAC;gBAED,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC;oBACvB,IAAI,OAAO,SAAS,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;wBAC3C,MAAM,MAAM,GAAG,SAAS,CAAC,QAA2B,CAAC;wBACrD,IAAA,wBAAS,EAAC,MAAM,CAAC,EAAE,EAAE,iCAAiC,CAAC,CAAC;wBACxD,SAAS,CAAC,QAAQ,GAAG,MAAM,IAAA,2BAAe,EAAC,MAAM,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;oBAC7E,CAAC;yBAAM,IAAI,OAAO,SAAS,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;wBAClD,SAAS,CAAC,QAAQ,GAAG,MAAM,IAAA,2BAAe,EAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;oBACjE,CAAC;yBAAM,CAAC;wBACN,MAAM,IAAI,KAAK,CAAC,uBAAuB,CAAC,CAAC;oBAC3C,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,iBAAiB;IACjB,IAAI,OAAO,CAAC,KAAK,KAAK,KAAK,IAAI,CAAC,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;QACtE,KAAK,CAAC,YAAY,EAAE,CAAC;IACvB,CAAC;IAED,MAAM,uBAAuB,GAAG,IAAA,+BAAqB,EAAC,SAAS,EAAE,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAC/F,MAAM,aAAa,GAAG,EAAE,GAAG,SAAS,EAAE,OAAO,EAAE,oBAAoB,CAAC,OAAO,EAAE,CAAC;IAC9E,MAAM,UAAU,GAAG,SAAS,CAAC,kBAAkB;QAC7C,CAAC,CAAC,MAAM,cAAI,CAAC,MAAM,CAAC,aAAa,EAAE,oBAAoB,CAAC,OAAO,CAAC;QAChE,CAAC,CAAC,IAAI,cAAI,CAAC,aAAa,CAAC,CAAC;IAE5B,gBAAgB;IAChB,MAAM,GAAG,GAAG,MAAM,IAAA,oBAAU,EAC1B;QACE,GAAG,oBAAoB;QACvB,iBAAiB,EAAE,uBAAuB;KAC3C,EACD,UAAU,EACV;QACE,WAAW,EAAE,SAAS;QACtB,GAAG,OAAO;KACX,CACF,CAAC;IAEF,IAAI,SAAS,CAAC,UAAU,EAAE,CAAC;QACzB,IAAI,OAAO,SAAS,CAAC,UAAU,KAAK,QAAQ,EAAE,CAAC;YAC7C,MAAM,IAAA,kBAAW,EAAC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,IAAI,CAAC,CAAC;QAC5D,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/C,MAAM,IAAA,2BAAoB,EAAC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,IAAI,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;IAED,MAAM,mBAAS,CAAC,IAAI,EAAE,CAAC;IACvB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,OAAO,GAAG;IACd,UAAU,EAAE;QACV,eAAe,EAAf,0BAAe;QACf,oBAAoB,EAApB,8BAAoB;KACrB;IACD,OAAO,EAAE,iBAAO;IAChB,OAAO,EAAP,iBAAO;IACP,UAAU,EAAV,uBAAU;IACV,IAAI,EAAE;QACJ,MAAM,EAAE,wBAAiB;QACzB,MAAM,EAAE,wBAAiB;KAC1B;CACF,CAAC;AAE+C,0BAAO;AAExD,kBAAe;IACb,UAAU,EAAV,oBAAU;IACV,KAAK;IACL,QAAQ;IACR,SAAS,EAAT,mBAAS;IACT,OAAO;CACR,CAAC"}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import type { AtomicTestCase, CompletedPrompt, EvaluateResult, EvaluateSummaryV3, EvaluateSummaryV2, EvaluateTable, Prompt, ResultsFile, UnifiedConfig } from '../types';
|
|
2
|
+
import EvalResult from './evalResult';
|
|
3
|
+
import type Provider from './provider';
|
|
4
|
+
export declare function createEvalId(createdAt?: Date): string;
|
|
5
|
+
export declare class EvalQueries {
|
|
6
|
+
static getVarsFromEvals(evals: Eval[]): Promise<Record<string, string[]>>;
|
|
7
|
+
}
|
|
8
|
+
export default class Eval {
|
|
9
|
+
id: string;
|
|
10
|
+
createdAt: number;
|
|
11
|
+
author?: string;
|
|
12
|
+
description?: string;
|
|
13
|
+
config: Partial<UnifiedConfig>;
|
|
14
|
+
results: EvalResult[];
|
|
15
|
+
datasetId?: string;
|
|
16
|
+
prompts: CompletedPrompt[];
|
|
17
|
+
oldResults?: EvaluateSummaryV2;
|
|
18
|
+
persisted: boolean;
|
|
19
|
+
static latest(): Promise<Eval | undefined>;
|
|
20
|
+
static findById(id: string): Promise<Eval | undefined>;
|
|
21
|
+
static getMany(limit?: number): Promise<Eval[]>;
|
|
22
|
+
static create(config: Partial<UnifiedConfig>, renderedPrompts: Prompt[], // The config doesn't contain the actual prompts, so we need to pass them in separately
|
|
23
|
+
opts?: {
|
|
24
|
+
id?: string;
|
|
25
|
+
createdAt?: Date;
|
|
26
|
+
author?: string;
|
|
27
|
+
results?: EvalResult[];
|
|
28
|
+
}): Promise<Eval>;
|
|
29
|
+
constructor(config: Partial<UnifiedConfig>, opts?: {
|
|
30
|
+
id?: string;
|
|
31
|
+
createdAt?: Date;
|
|
32
|
+
author?: string;
|
|
33
|
+
description?: string;
|
|
34
|
+
prompts?: CompletedPrompt[];
|
|
35
|
+
datasetId?: string;
|
|
36
|
+
persisted?: boolean;
|
|
37
|
+
});
|
|
38
|
+
version(): 3 | 4;
|
|
39
|
+
useOldResults(): boolean;
|
|
40
|
+
setTable(table: EvaluateTable): void;
|
|
41
|
+
save(): Promise<void>;
|
|
42
|
+
getVars(): Promise<string[]>;
|
|
43
|
+
getPrompts(): {
|
|
44
|
+
provider: string;
|
|
45
|
+
raw: string;
|
|
46
|
+
label: string;
|
|
47
|
+
function?: ((args_0: {
|
|
48
|
+
vars: Record<string, any>;
|
|
49
|
+
provider?: import("../types").ApiProvider | undefined;
|
|
50
|
+
}, ...args: unknown[]) => Promise<any>) | undefined;
|
|
51
|
+
id?: string | undefined;
|
|
52
|
+
config?: any;
|
|
53
|
+
display?: string | undefined;
|
|
54
|
+
metrics?: {
|
|
55
|
+
cost: number;
|
|
56
|
+
tokenUsage: {
|
|
57
|
+
cached?: number | undefined;
|
|
58
|
+
completion?: number | undefined;
|
|
59
|
+
prompt?: number | undefined;
|
|
60
|
+
total?: number | undefined;
|
|
61
|
+
};
|
|
62
|
+
score: number;
|
|
63
|
+
testPassCount: number;
|
|
64
|
+
testFailCount: number;
|
|
65
|
+
assertPassCount: number;
|
|
66
|
+
assertFailCount: number;
|
|
67
|
+
totalLatencyMs: number;
|
|
68
|
+
namedScores: Record<string, number>;
|
|
69
|
+
namedScoresCount: Record<string, number>;
|
|
70
|
+
redteam?: {
|
|
71
|
+
pluginPassCount: Record<string, number>;
|
|
72
|
+
pluginFailCount: Record<string, number>;
|
|
73
|
+
strategyPassCount: Record<string, number>;
|
|
74
|
+
strategyFailCount: Record<string, number>;
|
|
75
|
+
} | undefined;
|
|
76
|
+
} | undefined;
|
|
77
|
+
}[];
|
|
78
|
+
getTable(): Promise<EvaluateTable>;
|
|
79
|
+
addResult(result: EvaluateResult, test: AtomicTestCase): Promise<void>;
|
|
80
|
+
addPrompts(prompts: CompletedPrompt[]): Promise<void>;
|
|
81
|
+
addProviders(providers: Provider[]): Promise<void>;
|
|
82
|
+
loadResults(): Promise<void>;
|
|
83
|
+
getResults(): Promise<EvaluateResult[] | EvalResult[]>;
|
|
84
|
+
toEvaluateSummary(): Promise<EvaluateSummaryV3 | EvaluateSummaryV2>;
|
|
85
|
+
toResultsFile(): Promise<ResultsFile>;
|
|
86
|
+
delete(): Promise<void>;
|
|
87
|
+
}
|
|
88
|
+
export declare function getSummaryofLatestEvals(limit?: number, filterDescription?: string, datasetId?: string): Promise<{
|
|
89
|
+
evalId: string;
|
|
90
|
+
createdAt: number;
|
|
91
|
+
description: string | null;
|
|
92
|
+
numTests: number;
|
|
93
|
+
datasetId: string | null;
|
|
94
|
+
}[]>;
|
|
95
|
+
//# sourceMappingURL=eval.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../../src/models/eval.ts"],"names":[],"mappings":"AAkBA,OAAO,KAAK,EACV,cAAc,EACd,eAAe,EACf,cAAc,EAEd,iBAAiB,EACjB,iBAAiB,EACjB,aAAa,EACb,MAAM,EACN,WAAW,EACX,aAAa,EACd,MAAM,UAAU,CAAC;AAIlB,OAAO,UAAU,MAAM,cAAc,CAAC;AACtC,OAAO,KAAK,QAAQ,MAAM,YAAY,CAAC;AAEvC,wBAAgB,YAAY,CAAC,SAAS,GAAE,IAAiB,UAExD;AAED,qBAAa,WAAW;WACT,gBAAgB,CAAC,KAAK,EAAE,IAAI,EAAE;CAe5C;AAED,MAAM,CAAC,OAAO,OAAO,IAAI;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;IAE/B,OAAO,EAAE,UAAU,EAAE,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,iBAAiB,CAAC;IAC/B,SAAS,EAAE,OAAO,CAAC;WAEN,MAAM;WAiBN,QAAQ,CAAC,EAAE,EAAE,MAAM;WAuCnB,OAAO,CAAC,KAAK,GAAE,MAA4B;WAqB3C,MAAM,CACjB,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,EAC9B,eAAe,EAAE,MAAM,EAAE,EAAE,uFAAuF;IAClH,IAAI,CAAC,EAAE;QACL,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,SAAS,CAAC,EAAE,IAAI,CAAC;QACjB,MAAM,CAAC,EAAE,MAAM,CAAC;QAEhB,OAAO,CAAC,EAAE,UAAU,EAAE,CAAC;KACxB,GACA,OAAO,CAAC,IAAI,CAAC;gBAgGd,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,EAC9B,IAAI,CAAC,EAAE;QACL,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,SAAS,CAAC,EAAE,IAAI,CAAC;QACjB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,OAAO,CAAC,EAAE,eAAe,EAAE,CAAC;QAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,SAAS,CAAC,EAAE,OAAO,CAAC;KACrB;IAaH,OAAO;IAQP,aAAa;IAIb,QAAQ,CAAC,KAAK,EAAE,aAAa;IAMvB,IAAI;IAkBJ,OAAO;IAcb,UAAU;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IAQJ,QAAQ,IAAI,OAAO,CAAC,aAAa,CAAC;IAOlC,SAAS,CAAC,MAAM,EAAE,cAAc,EAAE,IAAI,EAAE,cAAc;IAOtD,UAAU,CAAC,OAAO,EAAE,eAAe,EAAE;IAQrC,YAAY,CAAC,SAAS,EAAE,QAAQ,EAAE;IAkBlC,WAAW;IAIX,UAAU,IAAI,OAAO,CAAC,cAAc,EAAE,GAAG,UAAU,EAAE,CAAC;IAQtD,iBAAiB,IAAI,OAAO,CAAC,iBAAiB,GAAG,iBAAiB,CAAC;IA2CnE,aAAa,IAAI,OAAO,CAAC,WAAW,CAAC;IAcrC,MAAM;CAWb;AAED,wBAAsB,uBAAuB,CAC3C,KAAK,GAAE,MAA4B,EACnC,iBAAiB,CAAC,EAAE,MAAM,EAC1B,SAAS,CAAC,EAAE,MAAM;;;;;;KAuCnB"}
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.EvalQueries = void 0;
|
|
7
|
+
exports.createEvalId = createEvalId;
|
|
8
|
+
exports.getSummaryofLatestEvals = getSummaryofLatestEvals;
|
|
9
|
+
const crypto_1 = require("crypto");
|
|
10
|
+
const drizzle_orm_1 = require("drizzle-orm");
|
|
11
|
+
const tiny_invariant_1 = __importDefault(require("tiny-invariant"));
|
|
12
|
+
const constants_1 = require("../constants");
|
|
13
|
+
const database_1 = require("../database");
|
|
14
|
+
const tables_1 = require("../database/tables");
|
|
15
|
+
const logger_1 = __importDefault(require("../logger"));
|
|
16
|
+
const utils_1 = require("../prompts/utils");
|
|
17
|
+
const convertEvalResultsToTable_1 = require("../util/convertEvalResultsToTable");
|
|
18
|
+
const createHash_1 = require("../util/createHash");
|
|
19
|
+
const time_1 = require("../util/time");
|
|
20
|
+
const evalResult_1 = __importDefault(require("./evalResult"));
|
|
21
|
+
function createEvalId(createdAt = new Date()) {
|
|
22
|
+
return `eval-${(0, createHash_1.randomSequence)(3)}-${createdAt.toISOString().slice(0, 19)}`;
|
|
23
|
+
}
|
|
24
|
+
class EvalQueries {
|
|
25
|
+
static async getVarsFromEvals(evals) {
|
|
26
|
+
const db = (0, database_1.getDb)();
|
|
27
|
+
const query = drizzle_orm_1.sql.raw(`SELECT DISTINCT j.key, eval_id from (SELECT eval_id, json_extract(eval_results.test_case, '$.vars') as vars
|
|
28
|
+
FROM eval_results where eval_id IN (${evals.map((e) => `'${e.id}'`).join(',')})) t, json_each(t.vars) j;`);
|
|
29
|
+
// @ts-ignore
|
|
30
|
+
const results = await db.all(query);
|
|
31
|
+
const vars = results.reduce((acc, r) => {
|
|
32
|
+
acc[r.eval_id] = acc[r.eval_id] || [];
|
|
33
|
+
acc[r.eval_id].push(r.key);
|
|
34
|
+
return acc;
|
|
35
|
+
}, {});
|
|
36
|
+
return vars;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
exports.EvalQueries = EvalQueries;
|
|
40
|
+
class Eval {
|
|
41
|
+
static async latest() {
|
|
42
|
+
const db = (0, database_1.getDb)();
|
|
43
|
+
const db_results = await db
|
|
44
|
+
.select({
|
|
45
|
+
id: tables_1.evalsTable.id,
|
|
46
|
+
})
|
|
47
|
+
.from(tables_1.evalsTable)
|
|
48
|
+
.orderBy((0, drizzle_orm_1.desc)(tables_1.evalsTable.createdAt))
|
|
49
|
+
.limit(1);
|
|
50
|
+
if (db_results.length === 0) {
|
|
51
|
+
return undefined;
|
|
52
|
+
}
|
|
53
|
+
return await Eval.findById(db_results[0].id);
|
|
54
|
+
}
|
|
55
|
+
static async findById(id) {
|
|
56
|
+
const db = (0, database_1.getDb)();
|
|
57
|
+
const { evals, datasetResults } = await db.transaction(async (tx) => {
|
|
58
|
+
const evals = await tx.select().from(tables_1.evalsTable).where((0, drizzle_orm_1.eq)(tables_1.evalsTable.id, id));
|
|
59
|
+
const datasetResults = await tx
|
|
60
|
+
.select({
|
|
61
|
+
datasetId: tables_1.evalsToDatasetsTable.datasetId,
|
|
62
|
+
})
|
|
63
|
+
.from(tables_1.evalsToDatasetsTable)
|
|
64
|
+
.where((0, drizzle_orm_1.eq)(tables_1.evalsToDatasetsTable.evalId, id))
|
|
65
|
+
.limit(1);
|
|
66
|
+
return { evals, datasetResults };
|
|
67
|
+
});
|
|
68
|
+
if (evals.length === 0) {
|
|
69
|
+
return undefined;
|
|
70
|
+
}
|
|
71
|
+
const eval_ = evals[0];
|
|
72
|
+
const datasetId = datasetResults[0]?.datasetId;
|
|
73
|
+
const evalInstance = new Eval(eval_.config, {
|
|
74
|
+
id: eval_.id,
|
|
75
|
+
createdAt: new Date(eval_.createdAt),
|
|
76
|
+
author: eval_.author || undefined,
|
|
77
|
+
description: eval_.description || undefined,
|
|
78
|
+
prompts: eval_.prompts || [],
|
|
79
|
+
datasetId,
|
|
80
|
+
persisted: true,
|
|
81
|
+
});
|
|
82
|
+
if (eval_.results && 'table' in eval_.results) {
|
|
83
|
+
evalInstance.oldResults = eval_.results;
|
|
84
|
+
}
|
|
85
|
+
return evalInstance;
|
|
86
|
+
}
|
|
87
|
+
static async getMany(limit = constants_1.DEFAULT_QUERY_LIMIT) {
|
|
88
|
+
const db = (0, database_1.getDb)();
|
|
89
|
+
const evals = await db
|
|
90
|
+
.select()
|
|
91
|
+
.from(tables_1.evalsTable)
|
|
92
|
+
.limit(limit)
|
|
93
|
+
.orderBy((0, drizzle_orm_1.desc)(tables_1.evalsTable.createdAt))
|
|
94
|
+
.all();
|
|
95
|
+
return evals.map((e) => new Eval(e.config, {
|
|
96
|
+
id: e.id,
|
|
97
|
+
createdAt: new Date(e.createdAt),
|
|
98
|
+
author: e.author || undefined,
|
|
99
|
+
description: e.description || undefined,
|
|
100
|
+
prompts: e.prompts || [],
|
|
101
|
+
persisted: true,
|
|
102
|
+
}));
|
|
103
|
+
}
|
|
104
|
+
static async create(config, renderedPrompts, // The config doesn't contain the actual prompts, so we need to pass them in separately
|
|
105
|
+
opts) {
|
|
106
|
+
const createdAt = opts?.createdAt || new Date();
|
|
107
|
+
const evalId = opts?.id || createEvalId(createdAt);
|
|
108
|
+
const db = (0, database_1.getDb)();
|
|
109
|
+
await db.transaction((tx) => {
|
|
110
|
+
tx.insert(tables_1.evalsTable)
|
|
111
|
+
.values({
|
|
112
|
+
id: evalId,
|
|
113
|
+
createdAt: createdAt.getTime(),
|
|
114
|
+
author: opts?.author,
|
|
115
|
+
description: config.description,
|
|
116
|
+
config,
|
|
117
|
+
results: {},
|
|
118
|
+
})
|
|
119
|
+
.run();
|
|
120
|
+
if (opts?.results) {
|
|
121
|
+
const res = tx
|
|
122
|
+
.insert(tables_1.evalResultsTable)
|
|
123
|
+
.values(opts.results?.map((r) => ({ ...r, evalId, id: (0, crypto_1.randomUUID)() })))
|
|
124
|
+
.run();
|
|
125
|
+
logger_1.default.debug(`Inserted ${res.changes} eval results`);
|
|
126
|
+
}
|
|
127
|
+
for (const prompt of renderedPrompts) {
|
|
128
|
+
const label = prompt.label || prompt.display || prompt.raw;
|
|
129
|
+
const promptId = (0, utils_1.hashPrompt)(prompt);
|
|
130
|
+
tx.insert(tables_1.promptsTable)
|
|
131
|
+
.values({
|
|
132
|
+
id: promptId,
|
|
133
|
+
prompt: label,
|
|
134
|
+
})
|
|
135
|
+
.onConflictDoNothing()
|
|
136
|
+
.run();
|
|
137
|
+
tx.insert(tables_1.evalsToPromptsTable)
|
|
138
|
+
.values({
|
|
139
|
+
evalId,
|
|
140
|
+
promptId,
|
|
141
|
+
})
|
|
142
|
+
.onConflictDoNothing()
|
|
143
|
+
.run();
|
|
144
|
+
logger_1.default.debug(`Inserting prompt ${promptId}`);
|
|
145
|
+
}
|
|
146
|
+
// Record dataset relation
|
|
147
|
+
const datasetId = (0, createHash_1.sha256)(JSON.stringify(config.tests || []));
|
|
148
|
+
tx.insert(tables_1.datasetsTable)
|
|
149
|
+
.values({
|
|
150
|
+
id: datasetId,
|
|
151
|
+
tests: config.tests,
|
|
152
|
+
})
|
|
153
|
+
.onConflictDoNothing()
|
|
154
|
+
.run();
|
|
155
|
+
tx.insert(tables_1.evalsToDatasetsTable)
|
|
156
|
+
.values({
|
|
157
|
+
evalId,
|
|
158
|
+
datasetId,
|
|
159
|
+
})
|
|
160
|
+
.onConflictDoNothing()
|
|
161
|
+
.run();
|
|
162
|
+
logger_1.default.debug(`Inserting dataset ${datasetId}`);
|
|
163
|
+
// Record tags
|
|
164
|
+
if (config.tags) {
|
|
165
|
+
for (const [tagKey, tagValue] of Object.entries(config.tags)) {
|
|
166
|
+
const tagId = (0, createHash_1.sha256)(`${tagKey}:${tagValue}`);
|
|
167
|
+
tx.insert(tables_1.tagsTable)
|
|
168
|
+
.values({
|
|
169
|
+
id: tagId,
|
|
170
|
+
name: tagKey,
|
|
171
|
+
value: tagValue,
|
|
172
|
+
})
|
|
173
|
+
.onConflictDoNothing()
|
|
174
|
+
.run();
|
|
175
|
+
tx.insert(tables_1.evalsToTagsTable)
|
|
176
|
+
.values({
|
|
177
|
+
evalId,
|
|
178
|
+
tagId,
|
|
179
|
+
})
|
|
180
|
+
.onConflictDoNothing()
|
|
181
|
+
.run();
|
|
182
|
+
logger_1.default.debug(`Inserting tag ${tagId}`);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
});
|
|
186
|
+
return new Eval(config, { id: evalId, author: opts?.author, createdAt, persisted: true });
|
|
187
|
+
}
|
|
188
|
+
constructor(config, opts) {
|
|
189
|
+
const createdAt = opts?.createdAt || new Date();
|
|
190
|
+
this.createdAt = createdAt.getTime();
|
|
191
|
+
this.id = opts?.id || createEvalId(createdAt);
|
|
192
|
+
this.author = opts?.author;
|
|
193
|
+
this.config = config;
|
|
194
|
+
this.results = [];
|
|
195
|
+
this.prompts = opts?.prompts || [];
|
|
196
|
+
this.datasetId = opts?.datasetId;
|
|
197
|
+
this.persisted = opts?.persisted || false;
|
|
198
|
+
}
|
|
199
|
+
version() {
|
|
200
|
+
/**
|
|
201
|
+
* Version 3 is the denormalized version of where the table and results are stored on the eval object.
|
|
202
|
+
* Version 4 is the normalized version where the results are stored in another databse table and the table for vizualization is generated by the app.
|
|
203
|
+
*/
|
|
204
|
+
return this.oldResults && 'table' in this.oldResults ? 3 : 4;
|
|
205
|
+
}
|
|
206
|
+
useOldResults() {
|
|
207
|
+
return this.version() < 4;
|
|
208
|
+
}
|
|
209
|
+
setTable(table) {
|
|
210
|
+
(0, tiny_invariant_1.default)(this.version() < 4, 'Eval is not version 3');
|
|
211
|
+
(0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
|
|
212
|
+
this.oldResults.table = table;
|
|
213
|
+
}
|
|
214
|
+
async save() {
|
|
215
|
+
const db = (0, database_1.getDb)();
|
|
216
|
+
const updateObj = {
|
|
217
|
+
config: this.config,
|
|
218
|
+
prompts: this.prompts,
|
|
219
|
+
description: this.config.description,
|
|
220
|
+
author: this.author,
|
|
221
|
+
updatedAt: (0, time_1.getCurrentTimestamp)(),
|
|
222
|
+
};
|
|
223
|
+
if (this.useOldResults()) {
|
|
224
|
+
(0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
|
|
225
|
+
updateObj.results = this.oldResults;
|
|
226
|
+
}
|
|
227
|
+
await db.update(tables_1.evalsTable).set(updateObj).where((0, drizzle_orm_1.eq)(tables_1.evalsTable.id, this.id)).run();
|
|
228
|
+
this.persisted = true;
|
|
229
|
+
}
|
|
230
|
+
async getVars() {
|
|
231
|
+
if (this.useOldResults()) {
|
|
232
|
+
(0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
|
|
233
|
+
return this.oldResults.table?.head.vars || [];
|
|
234
|
+
}
|
|
235
|
+
const db = (0, database_1.getDb)();
|
|
236
|
+
const query = (0, drizzle_orm_1.sql) `SELECT DISTINCT j.key from (SELECT json_extract(test_case_results.test_case, '$.vars') as vars
|
|
237
|
+
FROM test_case_results where test_case_results.eval_id = ${this.id}) t, json_each(t.vars) j;`;
|
|
238
|
+
// @ts-ignore
|
|
239
|
+
const results = await db.all(query);
|
|
240
|
+
return results.map((r) => r.key) || [];
|
|
241
|
+
}
|
|
242
|
+
getPrompts() {
|
|
243
|
+
if (this.useOldResults()) {
|
|
244
|
+
(0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
|
|
245
|
+
return this.oldResults.table?.head.prompts || [];
|
|
246
|
+
}
|
|
247
|
+
return this.prompts;
|
|
248
|
+
}
|
|
249
|
+
async getTable() {
|
|
250
|
+
if (this.useOldResults()) {
|
|
251
|
+
return this.oldResults?.table || { head: { prompts: [], vars: [] }, body: [] };
|
|
252
|
+
}
|
|
253
|
+
return (0, convertEvalResultsToTable_1.convertResultsToTable)(await this.toResultsFile());
|
|
254
|
+
}
|
|
255
|
+
async addResult(result, test) {
|
|
256
|
+
const newResult = await evalResult_1.default.createFromEvaluateResult(this.id, result, test, {
|
|
257
|
+
persist: this.persisted,
|
|
258
|
+
});
|
|
259
|
+
this.results.push(newResult);
|
|
260
|
+
}
|
|
261
|
+
async addPrompts(prompts) {
|
|
262
|
+
this.prompts = prompts;
|
|
263
|
+
if (this.persisted) {
|
|
264
|
+
const db = (0, database_1.getDb)();
|
|
265
|
+
await db.update(tables_1.evalsTable).set({ prompts }).where((0, drizzle_orm_1.eq)(tables_1.evalsTable.id, this.id)).run();
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
async addProviders(providers) {
|
|
269
|
+
if (this.persisted) {
|
|
270
|
+
const db = (0, database_1.getDb)();
|
|
271
|
+
await db.transaction(async (tx) => {
|
|
272
|
+
for (const provider of providers) {
|
|
273
|
+
const id = provider.id;
|
|
274
|
+
tx.insert(tables_1.evalsToProvidersTable)
|
|
275
|
+
.values({
|
|
276
|
+
evalId: this.id,
|
|
277
|
+
providerId: id,
|
|
278
|
+
})
|
|
279
|
+
.onConflictDoNothing()
|
|
280
|
+
.run();
|
|
281
|
+
}
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
async loadResults() {
|
|
286
|
+
this.results = await evalResult_1.default.findManyByEvalId(this.id);
|
|
287
|
+
}
|
|
288
|
+
async getResults() {
|
|
289
|
+
if (this.useOldResults()) {
|
|
290
|
+
(0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
|
|
291
|
+
return this.oldResults.results;
|
|
292
|
+
}
|
|
293
|
+
await this.loadResults();
|
|
294
|
+
return this.results;
|
|
295
|
+
}
|
|
296
|
+
async toEvaluateSummary() {
|
|
297
|
+
if (this.useOldResults()) {
|
|
298
|
+
(0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
|
|
299
|
+
return {
|
|
300
|
+
version: 2,
|
|
301
|
+
timestamp: new Date(this.createdAt).toISOString(),
|
|
302
|
+
results: this.oldResults.results,
|
|
303
|
+
table: this.oldResults.table,
|
|
304
|
+
stats: this.oldResults.stats,
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
if (this.results.length === 0) {
|
|
308
|
+
await this.loadResults();
|
|
309
|
+
}
|
|
310
|
+
const stats = {
|
|
311
|
+
successes: 0,
|
|
312
|
+
failures: 0,
|
|
313
|
+
tokenUsage: {
|
|
314
|
+
cached: 0,
|
|
315
|
+
completion: 0,
|
|
316
|
+
prompt: 0,
|
|
317
|
+
total: 0,
|
|
318
|
+
},
|
|
319
|
+
};
|
|
320
|
+
for (const prompt of this.prompts) {
|
|
321
|
+
stats.successes += prompt.metrics?.testPassCount || 0;
|
|
322
|
+
stats.failures += prompt.metrics?.testFailCount || 0;
|
|
323
|
+
stats.tokenUsage.prompt += prompt.metrics?.tokenUsage.prompt || 0;
|
|
324
|
+
stats.tokenUsage.cached += prompt.metrics?.tokenUsage.cached || 0;
|
|
325
|
+
stats.tokenUsage.completion += prompt.metrics?.tokenUsage.completion || 0;
|
|
326
|
+
stats.tokenUsage.total += prompt.metrics?.tokenUsage.total || 0;
|
|
327
|
+
}
|
|
328
|
+
return {
|
|
329
|
+
version: 3,
|
|
330
|
+
timestamp: new Date(this.createdAt).toISOString(),
|
|
331
|
+
prompts: this.prompts,
|
|
332
|
+
results: this.results.map((r) => r.toEvaluateResult()),
|
|
333
|
+
stats,
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
async toResultsFile() {
|
|
337
|
+
const results = {
|
|
338
|
+
version: this.version(),
|
|
339
|
+
createdAt: new Date(this.createdAt).toISOString(),
|
|
340
|
+
results: await this.toEvaluateSummary(),
|
|
341
|
+
config: this.config,
|
|
342
|
+
author: this.author || null,
|
|
343
|
+
prompts: this.getPrompts(),
|
|
344
|
+
datasetId: this.datasetId || null,
|
|
345
|
+
};
|
|
346
|
+
return results;
|
|
347
|
+
}
|
|
348
|
+
async delete() {
|
|
349
|
+
const db = (0, database_1.getDb)();
|
|
350
|
+
await db.transaction(() => {
|
|
351
|
+
db.delete(tables_1.evalsToDatasetsTable).where((0, drizzle_orm_1.eq)(tables_1.evalsToDatasetsTable.evalId, this.id)).run();
|
|
352
|
+
db.delete(tables_1.evalsToPromptsTable).where((0, drizzle_orm_1.eq)(tables_1.evalsToPromptsTable.evalId, this.id)).run();
|
|
353
|
+
db.delete(tables_1.evalsToTagsTable).where((0, drizzle_orm_1.eq)(tables_1.evalsToTagsTable.evalId, this.id)).run();
|
|
354
|
+
db.delete(tables_1.evalsToProvidersTable).where((0, drizzle_orm_1.eq)(tables_1.evalsToProvidersTable.evalId, this.id)).run();
|
|
355
|
+
db.delete(tables_1.evalResultsTable).where((0, drizzle_orm_1.eq)(tables_1.evalResultsTable.evalId, this.id)).run();
|
|
356
|
+
db.delete(tables_1.evalsTable).where((0, drizzle_orm_1.eq)(tables_1.evalsTable.id, this.id)).run();
|
|
357
|
+
});
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
exports.default = Eval;
|
|
361
|
+
async function getSummaryofLatestEvals(limit = constants_1.DEFAULT_QUERY_LIMIT, filterDescription, datasetId) {
|
|
362
|
+
const db = (0, database_1.getDb)();
|
|
363
|
+
const startTime = performance.now();
|
|
364
|
+
const query = db
|
|
365
|
+
.select({
|
|
366
|
+
evalId: tables_1.evalsTable.id,
|
|
367
|
+
createdAt: tables_1.evalsTable.createdAt,
|
|
368
|
+
description: tables_1.evalsTable.description,
|
|
369
|
+
numTests: (0, drizzle_orm_1.sql) `COUNT(DISTINCT ${tables_1.evalResultsTable.testIdx})`.as('numTests'),
|
|
370
|
+
datasetId: tables_1.evalsToDatasetsTable.datasetId,
|
|
371
|
+
})
|
|
372
|
+
.from(tables_1.evalsTable)
|
|
373
|
+
.leftJoin(tables_1.evalsToDatasetsTable, (0, drizzle_orm_1.eq)(tables_1.evalsTable.id, tables_1.evalsToDatasetsTable.evalId))
|
|
374
|
+
.leftJoin(tables_1.evalResultsTable, (0, drizzle_orm_1.eq)(tables_1.evalsTable.id, tables_1.evalResultsTable.evalId))
|
|
375
|
+
.where((0, drizzle_orm_1.and)(datasetId ? (0, drizzle_orm_1.eq)(tables_1.evalsToDatasetsTable.datasetId, datasetId) : undefined, filterDescription ? (0, drizzle_orm_1.like)(tables_1.evalsTable.description, `%${filterDescription}%`) : undefined, (0, drizzle_orm_1.eq)(tables_1.evalsTable.results, {})))
|
|
376
|
+
.groupBy(tables_1.evalsTable.id);
|
|
377
|
+
const results = query.orderBy((0, drizzle_orm_1.desc)(tables_1.evalsTable.createdAt)).limit(limit).all();
|
|
378
|
+
const mappedResults = results.map((result) => ({
|
|
379
|
+
evalId: result.evalId,
|
|
380
|
+
createdAt: result.createdAt,
|
|
381
|
+
description: result.description,
|
|
382
|
+
numTests: result.numTests || 0,
|
|
383
|
+
datasetId: result.datasetId,
|
|
384
|
+
}));
|
|
385
|
+
const endTime = performance.now();
|
|
386
|
+
const executionTime = endTime - startTime;
|
|
387
|
+
logger_1.default.debug(`listPreviousResults execution time: ${executionTime.toFixed(2)}ms`);
|
|
388
|
+
return mappedResults;
|
|
389
|
+
}
|
|
390
|
+
//# sourceMappingURL=eval.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/models/eval.ts"],"names":[],"mappings":";;;;;;AAoCA,oCAEC;AAkaD,0DA0CC;AAlfD,mCAAoC;AACpC,6CAAuD;AACvD,oEAAuC;AACvC,4CAAmD;AACnD,0CAAoC;AACpC,+CAU4B;AAC5B,uDAA+B;AAC/B,4CAA8C;AAa9C,iFAA0E;AAC1E,mDAA4D;AAC5D,uCAAmD;AACnD,8DAAsC;AAGtC,SAAgB,YAAY,CAAC,YAAkB,IAAI,IAAI,EAAE;IACvD,OAAO,QAAQ,IAAA,2BAAc,EAAC,CAAC,CAAC,IAAI,SAAS,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;AAC7E,CAAC;AAED,MAAa,WAAW;IACtB,MAAM,CAAC,KAAK,CAAC,gBAAgB,CAAC,KAAa;QACzC,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,KAAK,GAAG,iBAAG,CAAC,GAAG,CACnB;sCACgC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,4BAA4B,CACpG,CAAC;QACF,aAAa;QACb,MAAM,OAAO,GAAuC,MAAM,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QACxE,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAA6B,EAAE,CAAC,EAAE,EAAE;YAC/D,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YACtC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAC3B,OAAO,GAAG,CAAC;QACb,CAAC,EAAE,EAAE,CAAC,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AAhBD,kCAgBC;AAED,MAAqB,IAAI;IAavB,MAAM,CAAC,KAAK,CAAC,MAAM;QACjB,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,UAAU,GAAG,MAAM,EAAE;aACxB,MAAM,CAAC;YACN,EAAE,EAAE,mBAAU,CAAC,EAAE;SAClB,CAAC;aACD,IAAI,CAAC,mBAAU,CAAC;aAChB,OAAO,CAAC,IAAA,kBAAI,EAAC,mBAAU,CAAC,SAAS,CAAC,CAAC;aACnC,KAAK,CAAC,CAAC,CAAC,CAAC;QAEZ,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,OAAO,MAAM,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAU;QAC9B,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QAEnB,MAAM,EAAE,KAAK,EAAE,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;YAClE,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,mBAAU,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,mBAAU,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;YAC9E,MAAM,cAAc,GAAG,MAAM,EAAE;iBAC5B,MAAM,CAAC;gBACN,SAAS,EAAE,6BAAoB,CAAC,SAAS;aAC1C,CAAC;iBACD,IAAI,CAAC,6BAAoB,CAAC;iBAC1B,KAAK,CAAC,IAAA,gBAAE,EAAC,6BAAoB,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;iBAC1C,KAAK,CAAC,CAAC,CAAC,CAAC;YAEZ,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC;QACnC,CAAC,CAAC,CAAC;QAEH,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,SAAS,CAAC;QACnB,CAAC;QACD,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAEvB,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC;QAE/C,MAAM,YAAY,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAC1C,EAAE,EAAE,KAAK,CAAC,EAAE;YACZ,SAAS,EAAE,IAAI,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC;YACpC,MAAM,EAAE,KAAK,CAAC,MAAM,IAAI,SAAS;YACjC,WAAW,EAAE,KAAK,CAAC,WAAW,IAAI,SAAS;YAC3C,OAAO,EAAE,KAAK,CAAC,OAAO,IAAI,EAAE;YAC5B,SAAS;YACT,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,IAAI,KAAK,CAAC,OAAO,IAAI,OAAO,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;YAC9C,YAAY,CAAC,UAAU,GAAG,KAAK,CAAC,OAA4B,CAAC;QAC/D,CAAC;QAED,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,QAAgB,+BAAmB;QACtD,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,KAAK,GAAG,MAAM,EAAE;aACnB,MAAM,EAAE;aACR,IAAI,CAAC,mBAAU,CAAC;aAChB,KAAK,CAAC,KAAK,CAAC;aACZ,OAAO,CAAC,IAAA,kBAAI,EAAC,mBAAU,CAAC,SAAS,CAAC,CAAC;aACnC,GAAG,EAAE,CAAC;QACT,OAAO,KAAK,CAAC,GAAG,CACd,CAAC,CAAC,EAAE,EAAE,CACJ,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE;YACjB,EAAE,EAAE,CAAC,CAAC,EAAE;YACR,SAAS,EAAE,IAAI,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;YAChC,MAAM,EAAE,CAAC,CAAC,MAAM,IAAI,SAAS;YAC7B,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,SAAS;YACvC,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,EAAE;YACxB,SAAS,EAAE,IAAI;SAChB,CAAC,CACL,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,MAAM,CACjB,MAA8B,EAC9B,eAAyB,EAAE,uFAAuF;IAClH,IAMC;QAED,MAAM,SAAS,GAAG,IAAI,EAAE,SAAS,IAAI,IAAI,IAAI,EAAE,CAAC;QAChD,MAAM,MAAM,GAAG,IAAI,EAAE,EAAE,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC;QACnD,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,EAAE,CAAC,WAAW,CAAC,CAAC,EAAE,EAAE,EAAE;YAC1B,EAAE,CAAC,MAAM,CAAC,mBAAU,CAAC;iBAClB,MAAM,CAAC;gBACN,EAAE,EAAE,MAAM;gBACV,SAAS,EAAE,SAAS,CAAC,OAAO,EAAE;gBAC9B,MAAM,EAAE,IAAI,EAAE,MAAM;gBACpB,WAAW,EAAE,MAAM,CAAC,WAAW;gBAC/B,MAAM;gBACN,OAAO,EAAE,EAAE;aACZ,CAAC;iBACD,GAAG,EAAE,CAAC;YACT,IAAI,IAAI,EAAE,OAAO,EAAE,CAAC;gBAClB,MAAM,GAAG,GAAG,EAAE;qBACX,MAAM,CAAC,yBAAgB,CAAC;qBACxB,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,IAAA,mBAAU,GAAE,EAAE,CAAC,CAAC,CAAC;qBACtE,GAAG,EAAE,CAAC;gBACT,gBAAM,CAAC,KAAK,CAAC,YAAY,GAAG,CAAC,OAAO,eAAe,CAAC,CAAC;YACvD,CAAC;YAED,KAAK,MAAM,MAAM,IAAI,eAAe,EAAE,CAAC;gBACrC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,GAAG,CAAC;gBAC3D,MAAM,QAAQ,GAAG,IAAA,kBAAU,EAAC,MAAM,CAAC,CAAC;gBAEpC,EAAE,CAAC,MAAM,CAAC,qBAAY,CAAC;qBACpB,MAAM,CAAC;oBACN,EAAE,EAAE,QAAQ;oBACZ,MAAM,EAAE,KAAK;iBACd,CAAC;qBACD,mBAAmB,EAAE;qBACrB,GAAG,EAAE,CAAC;gBAET,EAAE,CAAC,MAAM,CAAC,4BAAmB,CAAC;qBAC3B,MAAM,CAAC;oBACN,MAAM;oBACN,QAAQ;iBACT,CAAC;qBACD,mBAAmB,EAAE;qBACrB,GAAG,EAAE,CAAC;gBAET,gBAAM,CAAC,KAAK,CAAC,oBAAoB,QAAQ,EAAE,CAAC,CAAC;YAC/C,CAAC;YAED,0BAA0B;YAC1B,MAAM,SAAS,GAAG,IAAA,mBAAM,EAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,CAAC;YAC7D,EAAE,CAAC,MAAM,CAAC,sBAAa,CAAC;iBACrB,MAAM,CAAC;gBACN,EAAE,EAAE,SAAS;gBACb,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC;iBACD,mBAAmB,EAAE;iBACrB,GAAG,EAAE,CAAC;YAET,EAAE,CAAC,MAAM,CAAC,6BAAoB,CAAC;iBAC5B,MAAM,CAAC;gBACN,MAAM;gBACN,SAAS;aACV,CAAC;iBACD,mBAAmB,EAAE;iBACrB,GAAG,EAAE,CAAC;YAET,gBAAM,CAAC,KAAK,CAAC,qBAAqB,SAAS,EAAE,CAAC,CAAC;YAE/C,cAAc;YACd,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;gBAChB,KAAK,MAAM,CAAC,MAAM,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC7D,MAAM,KAAK,GAAG,IAAA,mBAAM,EAAC,GAAG,MAAM,IAAI,QAAQ,EAAE,CAAC,CAAC;oBAE9C,EAAE,CAAC,MAAM,CAAC,kBAAS,CAAC;yBACjB,MAAM,CAAC;wBACN,EAAE,EAAE,KAAK;wBACT,IAAI,EAAE,MAAM;wBACZ,KAAK,EAAE,QAAQ;qBAChB,CAAC;yBACD,mBAAmB,EAAE;yBACrB,GAAG,EAAE,CAAC;oBAET,EAAE,CAAC,MAAM,CAAC,yBAAgB,CAAC;yBACxB,MAAM,CAAC;wBACN,MAAM;wBACN,KAAK;qBACN,CAAC;yBACD,mBAAmB,EAAE;yBACrB,GAAG,EAAE,CAAC;oBAET,gBAAM,CAAC,KAAK,CAAC,iBAAiB,KAAK,EAAE,CAAC,CAAC;gBACzC,CAAC;YACH,CAAC;QACH,CAAC,CAAC,CAAC;QACH,OAAO,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5F,CAAC;IAED,YACE,MAA8B,EAC9B,IAQC;QAED,MAAM,SAAS,GAAG,IAAI,EAAE,SAAS,IAAI,IAAI,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC;QACrC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,EAAE,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC;QAC9C,IAAI,CAAC,MAAM,GAAG,IAAI,EAAE,MAAM,CAAC;QAC3B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC;QAClB,IAAI,CAAC,OAAO,GAAG,IAAI,EAAE,OAAO,IAAI,EAAE,CAAC;QACnC,IAAI,CAAC,SAAS,GAAG,IAAI,EAAE,SAAS,CAAC;QACjC,IAAI,CAAC,SAAS,GAAG,IAAI,EAAE,SAAS,IAAI,KAAK,CAAC;IAC5C,CAAC;IAED,OAAO;QACL;;;WAGG;QACH,OAAO,IAAI,CAAC,UAAU,IAAI,OAAO,IAAI,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,CAAC;IAED,aAAa;QACX,OAAO,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IAC5B,CAAC;IAED,QAAQ,CAAC,KAAoB;QAC3B,IAAA,wBAAS,EAAC,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,EAAE,uBAAuB,CAAC,CAAC;QACvD,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;QACpD,IAAI,CAAC,UAAU,CAAC,KAAK,GAAG,KAAK,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,SAAS,GAAwB;YACrC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;YACpC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,SAAS,EAAE,IAAA,0BAAmB,GAAE;SACjC,CAAC;QAEF,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,SAAS,CAAC,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC;QACtC,CAAC;QACD,MAAM,EAAE,CAAC,MAAM,CAAC,mBAAU,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,mBAAU,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QACnF,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QAChD,CAAC;QACD,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,KAAK,GAAG,IAAA,iBAAG,EAAA;+DAC0C,IAAI,CAAC,EAAE,2BAA2B,CAAC;QAC9F,aAAa;QACb,MAAM,OAAO,GAAsB,MAAM,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAEvD,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;IACzC,CAAC;IAED,UAAU;QACR,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC;QACnD,CAAC;QACD,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IAED,KAAK,CAAC,QAAQ;QACZ,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC,UAAU,EAAE,KAAK,IAAI,EAAE,IAAI,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;QACjF,CAAC;QACD,OAAO,IAAA,iDAAqB,EAAC,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC;IAC3D,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,MAAsB,EAAE,IAAoB;QAC1D,MAAM,SAAS,GAAG,MAAM,oBAAU,CAAC,wBAAwB,CAAC,IAAI,CAAC,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE;YACjF,OAAO,EAAE,IAAI,CAAC,SAAS;SACxB,CAAC,CAAC;QACH,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,OAA0B;QACzC,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;YACnB,MAAM,EAAE,CAAC,MAAM,CAAC,mBAAU,CAAC,CAAC,GAAG,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,mBAAU,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QACvF,CAAC;IACH,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,SAAqB;QACtC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;YACnB,MAAM,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;gBAChC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;oBACjC,MAAM,EAAE,GAAG,QAAQ,CAAC,EAAE,CAAC;oBACvB,EAAE,CAAC,MAAM,CAAC,8BAAqB,CAAC;yBAC7B,MAAM,CAAC;wBACN,MAAM,EAAE,IAAI,CAAC,EAAE;wBACf,UAAU,EAAE,EAAE;qBACf,CAAC;yBACD,mBAAmB,EAAE;yBACrB,GAAG,EAAE,CAAC;gBACX,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW;QACf,IAAI,CAAC,OAAO,GAAG,MAAM,oBAAU,CAAC,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC5D,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,OAAO,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC;QACjC,CAAC;QACD,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IACD,KAAK,CAAC,iBAAiB;QACrB,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,OAAO;gBACL,OAAO,EAAE,CAAC;gBACV,SAAS,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE;gBACjD,OAAO,EAAE,IAAI,CAAC,UAAU,CAAC,OAAO;gBAChC,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK;gBAC5B,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK;aAC7B,CAAC;QACJ,CAAC;QACD,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC9B,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;QAC3B,CAAC;QACD,MAAM,KAAK,GAAkB;YAC3B,SAAS,EAAE,CAAC;YACZ,QAAQ,EAAE,CAAC;YACX,UAAU,EAAE;gBACV,MAAM,EAAE,CAAC;gBACT,UAAU,EAAE,CAAC;gBACb,MAAM,EAAE,CAAC;gBACT,KAAK,EAAE,CAAC;aACT;SACF,CAAC;QAEF,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAClC,KAAK,CAAC,SAAS,IAAI,MAAM,CAAC,OAAO,EAAE,aAAa,IAAI,CAAC,CAAC;YACtD,KAAK,CAAC,QAAQ,IAAI,MAAM,CAAC,OAAO,EAAE,aAAa,IAAI,CAAC,CAAC;YACrD,KAAK,CAAC,UAAU,CAAC,MAAM,IAAI,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,MAAM,IAAI,CAAC,CAAC;YAClE,KAAK,CAAC,UAAU,CAAC,MAAM,IAAI,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,MAAM,IAAI,CAAC,CAAC;YAClE,KAAK,CAAC,UAAU,CAAC,UAAU,IAAI,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,UAAU,IAAI,CAAC,CAAC;YAC1E,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,KAAK,IAAI,CAAC,CAAC;QAClE,CAAC;QAED,OAAO;YACL,OAAO,EAAE,CAAC;YACV,SAAS,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE;YACjD,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,gBAAgB,EAAE,CAAC;YACtD,KAAK;SACN,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,aAAa;QACjB,MAAM,OAAO,GAAgB;YAC3B,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE;YACvB,SAAS,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE;YACjD,OAAO,EAAE,MAAM,IAAI,CAAC,iBAAiB,EAAE;YACvC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,IAAI;YAC3B,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE;YAC1B,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI;SAClC,CAAC;QAEF,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,MAAM;QACV,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;YACxB,EAAE,CAAC,MAAM,CAAC,6BAAoB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,6BAAoB,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YACtF,EAAE,CAAC,MAAM,CAAC,4BAAmB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,4BAAmB,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YACpF,EAAE,CAAC,MAAM,CAAC,yBAAgB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,yBAAgB,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YAC9E,EAAE,CAAC,MAAM,CAAC,8BAAqB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,8BAAqB,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YACxF,EAAE,CAAC,MAAM,CAAC,yBAAgB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,yBAAgB,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YAC9E,EAAE,CAAC,MAAM,CAAC,mBAAU,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,mBAAU,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QAChE,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AA5YD,uBA4YC;AAEM,KAAK,UAAU,uBAAuB,CAC3C,QAAgB,+BAAmB,EACnC,iBAA0B,EAC1B,SAAkB;IAElB,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;IACnB,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,EAAE;SACb,MAAM,CAAC;QACN,MAAM,EAAE,mBAAU,CAAC,EAAE;QACrB,SAAS,EAAE,mBAAU,CAAC,SAAS;QAC/B,WAAW,EAAE,mBAAU,CAAC,WAAW;QACnC,QAAQ,EAAE,IAAA,iBAAG,EAAA,kBAAkB,yBAAgB,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC,UAAU,CAAC;QACzE,SAAS,EAAE,6BAAoB,CAAC,SAAS;KAC1C,CAAC;SACD,IAAI,CAAC,mBAAU,CAAC;SAChB,QAAQ,CAAC,6BAAoB,EAAE,IAAA,gBAAE,EAAC,mBAAU,CAAC,EAAE,EAAE,6BAAoB,CAAC,MAAM,CAAC,CAAC;SAC9E,QAAQ,CAAC,yBAAgB,EAAE,IAAA,gBAAE,EAAC,mBAAU,CAAC,EAAE,EAAE,yBAAgB,CAAC,MAAM,CAAC,CAAC;SACtE,KAAK,CACJ,IAAA,iBAAG,EACD,SAAS,CAAC,CAAC,CAAC,IAAA,gBAAE,EAAC,6BAAoB,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EACrE,iBAAiB,CAAC,CAAC,CAAC,IAAA,kBAAI,EAAC,mBAAU,CAAC,WAAW,EAAE,IAAI,iBAAiB,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,EACtF,IAAA,gBAAE,EAAC,mBAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAC3B,CACF;SACA,OAAO,CAAC,mBAAU,CAAC,EAAE,CAAC,CAAC;IAE1B,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,IAAA,kBAAI,EAAC,mBAAU,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,CAAC;IAE7E,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAC7C,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,WAAW,EAAE,MAAM,CAAC,WAAW;QAC/B,QAAQ,EAAG,MAAM,CAAC,QAAmB,IAAI,CAAC;QAC1C,SAAS,EAAE,MAAM,CAAC,SAAS;KAC5B,CAAC,CAAC,CAAC;IAEJ,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAClC,MAAM,aAAa,GAAG,OAAO,GAAG,SAAS,CAAC;IAC1C,gBAAM,CAAC,KAAK,CAAC,uCAAuC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAElF,OAAO,aAAa,CAAC;AACvB,CAAC"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import type { AtomicTestCase, GradingResult, Prompt, ProviderOptions, ProviderResponse } from '../types';
|
|
2
|
+
import { type EvaluateResult } from '../types';
|
|
3
|
+
export default class EvalResult {
|
|
4
|
+
static createFromEvaluateResult(evalId: string, result: EvaluateResult, testCase: AtomicTestCase, opts?: {
|
|
5
|
+
persist: boolean;
|
|
6
|
+
}): Promise<EvalResult>;
|
|
7
|
+
static createManyFromEvaluateResult(results: EvaluateResult[], evalId: string): Promise<EvalResult[]>;
|
|
8
|
+
static findById(id: string): Promise<EvalResult | null>;
|
|
9
|
+
static findManyByEvalId(evalId: string): Promise<EvalResult[]>;
|
|
10
|
+
id: string;
|
|
11
|
+
evalId: string;
|
|
12
|
+
description?: string | null;
|
|
13
|
+
promptIdx: number;
|
|
14
|
+
testIdx: number;
|
|
15
|
+
testCase: AtomicTestCase;
|
|
16
|
+
prompt: Prompt;
|
|
17
|
+
promptId: string;
|
|
18
|
+
error?: string | null;
|
|
19
|
+
success: boolean;
|
|
20
|
+
score: number;
|
|
21
|
+
response: ProviderResponse | null;
|
|
22
|
+
gradingResult: GradingResult | null;
|
|
23
|
+
namedScores: Record<string, number>;
|
|
24
|
+
provider: ProviderOptions;
|
|
25
|
+
latencyMs: number;
|
|
26
|
+
cost: number;
|
|
27
|
+
persisted: boolean;
|
|
28
|
+
constructor(opts: {
|
|
29
|
+
id: string;
|
|
30
|
+
evalId: string;
|
|
31
|
+
promptIdx: number;
|
|
32
|
+
testIdx: number;
|
|
33
|
+
testCase: AtomicTestCase;
|
|
34
|
+
prompt: Prompt;
|
|
35
|
+
promptId?: string | null;
|
|
36
|
+
error?: string | null;
|
|
37
|
+
success: boolean;
|
|
38
|
+
score: number;
|
|
39
|
+
response: ProviderResponse | null;
|
|
40
|
+
gradingResult: GradingResult | null;
|
|
41
|
+
namedScores?: Record<string, number> | null;
|
|
42
|
+
provider: ProviderOptions;
|
|
43
|
+
latencyMs?: number | null;
|
|
44
|
+
cost?: number | null;
|
|
45
|
+
persisted?: boolean;
|
|
46
|
+
});
|
|
47
|
+
save(): Promise<void>;
|
|
48
|
+
toEvaluateResult(): EvaluateResult;
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=evalResult.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evalResult.d.ts","sourceRoot":"","sources":["../../../src/models/evalResult.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EACV,cAAc,EACd,aAAa,EACb,MAAM,EACN,eAAe,EACf,gBAAgB,EACjB,MAAM,UAAU,CAAC;AAClB,OAAO,EAAE,KAAK,cAAc,EAAE,MAAM,UAAU,CAAC;AAG/C,MAAM,CAAC,OAAO,OAAO,UAAU;WAChB,wBAAwB,CACnC,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,cAAc,EACtB,QAAQ,EAAE,cAAc,EACxB,IAAI,CAAC,EAAE;QAAE,OAAO,EAAE,OAAO,CAAA;KAAE;WAgChB,4BAA4B,CAAC,OAAO,EAAE,cAAc,EAAE,EAAE,MAAM,EAAE,MAAM;WAetE,QAAQ,CAAC,EAAE,EAAE,MAAM;WAMnB,gBAAgB,CAAC,MAAM,EAAE,MAAM;IAS5C,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,cAAc,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAClC,aAAa,EAAE,aAAa,GAAG,IAAI,CAAC;IACpC,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACpC,QAAQ,EAAE,eAAe,CAAC;IAC1B,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,OAAO,CAAC;gBAEP,IAAI,EAAE;QAChB,EAAE,EAAE,MAAM,CAAC;QACX,MAAM,EAAE,MAAM,CAAC;QACf,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE,cAAc,CAAC;QACzB,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACzB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACtB,OAAO,EAAE,OAAO,CAAC;QACjB,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,gBAAgB,GAAG,IAAI,CAAC;QAClC,aAAa,EAAE,aAAa,GAAG,IAAI,CAAC;QACpC,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC;QAC5C,QAAQ,EAAE,eAAe,CAAC;QAC1B,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC1B,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACrB,SAAS,CAAC,EAAE,OAAO,CAAC;KACrB;IAqBK,IAAI;IAeV,gBAAgB,IAAI,cAAc;CAqBnC"}
|