promptfoo 0.8.3 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/assertions.d.ts +3 -3
- package/dist/assertions.d.ts.map +1 -1
- package/dist/assertions.js +11 -12
- package/dist/assertions.js.map +1 -1
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +9 -9
- package/dist/cache.js.map +1 -1
- package/dist/evaluator.d.ts +1 -1
- package/dist/evaluator.d.ts.map +1 -1
- package/dist/evaluator.js +60 -34
- package/dist/evaluator.js.map +1 -1
- package/dist/index.d.ts +10 -10
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +18 -14
- package/dist/index.js.map +1 -1
- package/dist/main.js +41 -40
- package/dist/main.js.map +1 -1
- package/dist/providers/localai.js +11 -11
- package/dist/providers/localai.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +30 -21
- package/dist/providers/openai.js.map +1 -1
- package/dist/providers.d.ts +3 -3
- package/dist/providers.d.ts.map +1 -1
- package/dist/providers.js +15 -15
- package/dist/providers.js.map +1 -1
- package/dist/types.d.ts +5 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/util.d.ts +2 -2
- package/dist/util.d.ts.map +1 -1
- package/dist/util.js +43 -15
- package/dist/util.js.map +1 -1
- package/dist/web/client/assets/index-9a9ba400.css +1 -0
- package/dist/web/client/assets/{index-8751749f.js → index-b72d3ca9.js} +12 -12
- package/dist/web/client/index.html +2 -2
- package/dist/web/server.js +9 -9
- package/dist/web/server.js.map +1 -1
- package/package.json +3 -1
- package/src/assertions.ts +8 -9
- package/src/cache.ts +5 -4
- package/src/evaluator.ts +66 -33
- package/src/index.ts +13 -8
- package/src/main.ts +13 -18
- package/src/providers/localai.ts +3 -3
- package/src/providers/openai.ts +16 -8
- package/src/providers.ts +3 -3
- package/src/types.ts +7 -2
- package/src/util.ts +42 -20
- package/src/web/client/package-lock.json +5729 -0
- package/src/web/client/src/ResultsTable.css +19 -0
- package/src/web/client/src/ResultsTable.tsx +51 -37
- package/src/web/client/src/ResultsView.tsx +7 -7
- package/src/web/server.ts +3 -3
- package/dist/web/client/assets/index-207192fc.css +0 -1
package/README.md
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
|
-
# promptfoo:
|
|
1
|
+
# promptfoo: test your prompts
|
|
2
2
|
|
|
3
3
|
[](https://npmjs.com/package/promptfoo)
|
|
4
4
|

|
|
5
5
|
|
|
6
|
-
`promptfoo`
|
|
6
|
+
`promptfoo` is a tool for testing and evaluating LLM prompt quality.
|
|
7
7
|
|
|
8
8
|
With promptfoo, you can:
|
|
9
9
|
|
|
10
|
-
- **
|
|
10
|
+
- **Systematically test prompts** against predefined test cases
|
|
11
11
|
- **Evaluate quality and catch regressions** by comparing LLM outputs side-by-side
|
|
12
12
|
- **Speed up evaluations** with caching and concurrent tests
|
|
13
|
-
- **
|
|
14
|
-
- Use as a
|
|
13
|
+
- **Score outputs automatically** by defining "expectations"
|
|
14
|
+
- Use as a CLI, or integrate into your workflow as a library
|
|
15
15
|
- Use OpenAI models, open-source models like Llama and Vicuna, or integrate custom API providers for any LLM API
|
|
16
16
|
|
|
17
17
|
The goal: **test-driven prompt engineering**, rather than trial-and-error.
|
package/dist/assertions.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import type { Assertion, GradingConfig,
|
|
2
|
-
export declare function runAssertions(test:
|
|
3
|
-
export declare function runAssertion(assertion: Assertion, test:
|
|
1
|
+
import type { Assertion, GradingConfig, GradingResult, AtomicTestCase } from './types';
|
|
2
|
+
export declare function runAssertions(test: AtomicTestCase, output: string): Promise<GradingResult>;
|
|
3
|
+
export declare function runAssertion(assertion: Assertion, test: AtomicTestCase, output: string): Promise<GradingResult>;
|
|
4
4
|
export declare function matchesSimilarity(expected: string, output: string, threshold: number): Promise<GradingResult>;
|
|
5
5
|
export declare function matchesLlmRubric(expected: string, output: string, options?: GradingConfig): Promise<GradingResult>;
|
|
6
6
|
export declare function assertionFromString(expected: string): Assertion;
|
package/dist/assertions.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../src/assertions.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,SAAS,EAAE,aAAa,
|
|
1
|
+
{"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../src/assertions.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,SAAS,EAAE,aAAa,EAAY,aAAa,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAMjG,wBAAsB,aAAa,CAAC,IAAI,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC,CAyBhG;AAED,wBAAsB,YAAY,CAChC,SAAS,EAAE,SAAS,EACpB,IAAI,EAAE,cAAc,EACpB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,aAAa,CAAC,CA0DxB;AAoBD,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,aAAa,CAAC,CA0CxB;AAED,wBAAsB,gBAAgB,CACpC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,aAAa,CAAC,CAgDxB;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,SAAS,CAmC/D;;;;;AAED,wBAGE"}
|
package/dist/assertions.js
CHANGED
|
@@ -6,10 +6,10 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
6
6
|
exports.assertionFromString = exports.matchesLlmRubric = exports.matchesSimilarity = exports.runAssertion = exports.runAssertions = void 0;
|
|
7
7
|
const tiny_invariant_1 = __importDefault(require("tiny-invariant"));
|
|
8
8
|
const nunjucks_1 = __importDefault(require("nunjucks"));
|
|
9
|
-
const
|
|
10
|
-
const
|
|
11
|
-
const
|
|
12
|
-
const
|
|
9
|
+
const openai_1 = require("./providers/openai");
|
|
10
|
+
const util_1 = require("./util");
|
|
11
|
+
const providers_1 = require("./providers");
|
|
12
|
+
const prompts_1 = require("./prompts");
|
|
13
13
|
const SIMILAR_REGEX = /similar(?::|\((\d+(\.\d+)?)\):)/;
|
|
14
14
|
const DEFAULT_SEMANTIC_SIMILARITY_THRESHOLD = 0.8;
|
|
15
15
|
async function runAssertions(test, output) {
|
|
@@ -81,8 +81,7 @@ async function runAssertion(assertion, test, output) {
|
|
|
81
81
|
}
|
|
82
82
|
if (assertion.type === 'similar') {
|
|
83
83
|
(0, tiny_invariant_1.default)(assertion.value, 'Similarity assertion must have a string value');
|
|
84
|
-
|
|
85
|
-
return matchesSimilarity(assertion.value, output, assertion.threshold);
|
|
84
|
+
return matchesSimilarity(assertion.value, output, assertion.threshold || 0.75);
|
|
86
85
|
}
|
|
87
86
|
if (assertion.type === 'llm-rubric') {
|
|
88
87
|
(0, tiny_invariant_1.default)(assertion.value, 'Similarity assertion must have a string value');
|
|
@@ -107,8 +106,8 @@ function containsJSON(str) {
|
|
|
107
106
|
}
|
|
108
107
|
}
|
|
109
108
|
async function matchesSimilarity(expected, output, threshold) {
|
|
110
|
-
const expectedEmbedding = await
|
|
111
|
-
const outputEmbedding = await
|
|
109
|
+
const expectedEmbedding = await openai_1.DefaultEmbeddingProvider.callEmbeddingApi(expected);
|
|
110
|
+
const outputEmbedding = await openai_1.DefaultEmbeddingProvider.callEmbeddingApi(output);
|
|
112
111
|
const tokensUsed = {
|
|
113
112
|
total: (expectedEmbedding.tokenUsage?.total || 0) + (outputEmbedding.tokenUsage?.total || 0),
|
|
114
113
|
prompt: (expectedEmbedding.tokenUsage?.prompt || 0) + (outputEmbedding.tokenUsage?.prompt || 0),
|
|
@@ -129,7 +128,7 @@ async function matchesSimilarity(expected, output, threshold) {
|
|
|
129
128
|
tokensUsed,
|
|
130
129
|
};
|
|
131
130
|
}
|
|
132
|
-
const similarity = (0,
|
|
131
|
+
const similarity = (0, util_1.cosineSimilarity)(expectedEmbedding.embedding, outputEmbedding.embedding);
|
|
133
132
|
if (similarity < threshold) {
|
|
134
133
|
return {
|
|
135
134
|
pass: false,
|
|
@@ -148,13 +147,13 @@ async function matchesLlmRubric(expected, output, options) {
|
|
|
148
147
|
if (!options) {
|
|
149
148
|
throw new Error('Cannot grade output without grading config. Specify --grader option or grading config.');
|
|
150
149
|
}
|
|
151
|
-
const prompt = nunjucks_1.default.renderString(options.rubricPrompt ||
|
|
150
|
+
const prompt = nunjucks_1.default.renderString(options.rubricPrompt || prompts_1.DEFAULT_GRADING_PROMPT, {
|
|
152
151
|
content: output,
|
|
153
152
|
rubric: expected,
|
|
154
153
|
});
|
|
155
|
-
let provider = options.provider ||
|
|
154
|
+
let provider = options.provider || openai_1.DefaultGradingProvider;
|
|
156
155
|
if (typeof provider === 'string') {
|
|
157
|
-
provider = await (0,
|
|
156
|
+
provider = await (0, providers_1.loadApiProvider)(provider);
|
|
158
157
|
}
|
|
159
158
|
const resp = await provider.callApi(prompt);
|
|
160
159
|
if (resp.error || !resp.output) {
|
package/dist/assertions.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"assertions.js","sourceRoot":"","sources":["../src/assertions.ts"],"names":[],"mappings":";;;;;;AAAA,oEAAuC;AACvC,wDAAgC;AAEhC
|
|
1
|
+
{"version":3,"file":"assertions.js","sourceRoot":"","sources":["../src/assertions.ts"],"names":[],"mappings":";;;;;;AAAA,oEAAuC;AACvC,wDAAgC;AAEhC,+CAAsF;AACtF,iCAA0C;AAC1C,2CAA8C;AAC9C,uCAAmD;AAInD,MAAM,aAAa,GAAG,iCAAiC,CAAC;AAExD,MAAM,qCAAqC,GAAG,GAAG,CAAC;AAE3C,KAAK,UAAU,aAAa,CAAC,IAAoB,EAAE,MAAc;IACtE,MAAM,UAAU,GAAG;QACjB,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,CAAC;QACT,UAAU,EAAE,CAAC;KACd,CAAC;IAEF,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;QAChB,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,eAAe,EAAE,UAAU,EAAE,CAAC;KAC5D;IAED,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,MAAM,EAAE;QACnC,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,SAAS,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;QAC3D,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE;YAChB,OAAO,MAAM,CAAC;SACf;QAED,IAAI,MAAM,CAAC,UAAU,EAAE;YACrB,UAAU,CAAC,KAAK,IAAI,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC;YAC5C,UAAU,CAAC,MAAM,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC;YAC9C,UAAU,CAAC,UAAU,IAAI,MAAM,CAAC,UAAU,CAAC,UAAU,CAAC;SACvD;KACF;IAED,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,uBAAuB,EAAE,UAAU,EAAE,CAAC;AACrE,CAAC;AAzBD,sCAyBC;AAEM,KAAK,UAAU,YAAY,CAChC,SAAoB,EACpB,IAAoB,EACpB,MAAc;IAEd,IAAI,IAAI,GAAY,KAAK,CAAC;IAE1B,IAAI,SAAS,CAAC,IAAI,KAAK,QAAQ,EAAE;QAC/B,IAAI,GAAG,SAAS,CAAC,KAAK,KAAK,MAAM,CAAC;QAClC,OAAO;YACL,IAAI;YACJ,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,oBAAoB,SAAS,CAAC,KAAK,GAAG;SAC3E,CAAC;KACH;IAED,IAAI,SAAS,CAAC,IAAI,KAAK,SAAS,EAAE;QAChC,IAAI;YACF,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YACnB,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,kBAAkB,EAAE,CAAC;SACnD;QAAC,OAAO,GAAG,EAAE;YACZ,OAAO;gBACL,IAAI,EAAE,KAAK;gBACX,MAAM,EAAE,2DAA2D,GAAG,EAAE;aACzE,CAAC;SACH;KACF;IAED,IAAI,SAAS,CAAC,IAAI,KAAK,eAAe,EAAE;QACtC,MAAM,IAAI,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;QAClC,OAAO;YACL,IAAI;YACJ,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,uCAAuC;SAC5E,CAAC;KACH;IAED,IAAI,SAAS,CAAC,IAAI,KAAK,YAAY,EAAE;QACnC,IAAI;YACF,MAAM,cAAc,GAAG,IAAI,QAAQ,CAAC,QAAQ,EAAE,UAAU,SAAS,CAAC,KAAK,EAAE,CAAC,CAAC;YAC3E,IAAI,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC;SAC/B;QAAC,OAAO,GAAG,EAAE;YACZ,OAAO;gBACL,IAAI,EAAE,KAAK;gBACX,MAAM,EAAE,gCAAiC,GAAa,CAAC,OAAO,EAAE;aACjE,CAAC;SACH;QACD,OAAO;YACL,IAAI;YACJ,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,gCAAgC;SACrE,CAAC;KACH;IAED,IAAI,SAAS,CAAC,IAAI,KAAK,SAAS,EAAE;QAChC,IAAA,wBAAS,EAAC,SAAS,CAAC,KAAK,EAAE,+CAA+C,CAAC,CAAC;QAC5E,OAAO,iBAAiB,CAAC,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,CAAC,SAAS,IAAI,IAAI,CAAC,CAAC;KAChF;IAED,IAAI,SAAS,CAAC,IAAI,KAAK,YAAY,EAAE;QACnC,IAAA,wBAAS,EAAC,SAAS,CAAC,KAAK,EAAE,+CAA+C,CAAC,CAAC;QAC5E,OAAO,gBAAgB,CAAC,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;KAChE;IAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;AAC/D,CAAC;AA9DD,oCA8DC;AAED,SAAS,YAAY,CAAC,GAAW;IAC/B,oDAAoD;IACpD,MAAM,WAAW,GAAG,wBAAwB,CAAC;IAE7C,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;IAErC,IAAI,CAAC,KAAK,EAAE;QACV,OAAO,KAAK,CAAC;KACd;IAED,IAAI;QACF,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,OAAO,IAAI,CAAC;KACb;IAAC,OAAO,KAAK,EAAE;QACd,OAAO,KAAK,CAAC;KACd;AACH,CAAC;AAEM,KAAK,UAAU,iBAAiB,CACrC,QAAgB,EAChB,MAAc,EACd,SAAiB;IAEjB,MAAM,iBAAiB,GAAG,MAAM,iCAAwB,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IACpF,MAAM,eAAe,GAAG,MAAM,iCAAwB,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;IAEhF,MAAM,UAAU,GAAG;QACjB,KAAK,EAAE,CAAC,iBAAiB,CAAC,UAAU,EAAE,KAAK,IAAI,CAAC,CAAC,GAAG,CAAC,eAAe,CAAC,UAAU,EAAE,KAAK,IAAI,CAAC,CAAC;QAC5F,MAAM,EAAE,CAAC,iBAAiB,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,CAAC,eAAe,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,CAAC;QAC/F,UAAU,EACR,CAAC,iBAAiB,CAAC,UAAU,EAAE,UAAU,IAAI,CAAC,CAAC;YAC/C,CAAC,eAAe,CAAC,UAAU,EAAE,UAAU,IAAI,CAAC,CAAC;KAChD,CAAC;IAEF,IAAI,iBAAiB,CAAC,KAAK,IAAI,eAAe,CAAC,KAAK,EAAE;QACpD,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EACJ,iBAAiB,CAAC,KAAK,IAAI,eAAe,CAAC,KAAK,IAAI,mCAAmC;YACzF,UAAU;SACX,CAAC;KACH;IAED,IAAI,CAAC,iBAAiB,CAAC,SAAS,IAAI,CAAC,eAAe,CAAC,SAAS,EAAE;QAC9D,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EAAE,qBAAqB;YAC7B,UAAU;SACX,CAAC;KACH;IAED,MAAM,UAAU,GAAG,IAAA,uBAAgB,EAAC,iBAAiB,CAAC,SAAS,EAAE,eAAe,CAAC,SAAS,CAAC,CAAC;IAC5F,IAAI,UAAU,GAAG,SAAS,EAAE;QAC1B,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EAAE,cAAc,UAAU,2BAA2B,SAAS,EAAE;YACtE,UAAU;SACX,CAAC;KACH;IACD,OAAO;QACL,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,cAAc,UAAU,8BAA8B,SAAS,EAAE;QACzE,UAAU;KACX,CAAC;AACJ,CAAC;AA9CD,8CA8CC;AAEM,KAAK,UAAU,gBAAgB,CACpC,QAAgB,EAChB,MAAc,EACd,OAAuB;IAEvB,IAAI,CAAC,OAAO,EAAE;QACZ,MAAM,IAAI,KAAK,CACb,wFAAwF,CACzF,CAAC;KACH;IAED,MAAM,MAAM,GAAG,kBAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,YAAY,IAAI,gCAAsB,EAAE;QACnF,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,QAAQ;KACjB,CAAC,CAAC;IAEH,IAAI,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,+BAAsB,CAAC;IAC1D,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE;QAChC,QAAQ,GAAG,MAAM,IAAA,2BAAe,EAAC,QAAQ,CAAC,CAAC;KAC5C;IACD,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAC5C,IAAI,IAAI,CAAC,KAAK,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;QAC9B,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EAAE,IAAI,CAAC,KAAK,IAAI,WAAW;YACjC,UAAU,EAAE;gBACV,KAAK,EAAE,IAAI,CAAC,UAAU,EAAE,KAAK,IAAI,CAAC;gBAClC,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC;gBACpC,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,UAAU,IAAI,CAAC;aAC7C;SACF,CAAC;KACH;IAED,IAAI;QACF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAkB,CAAC;QACxD,MAAM,CAAC,UAAU,GAAG;YAClB,KAAK,EAAE,IAAI,CAAC,UAAU,EAAE,KAAK,IAAI,CAAC;YAClC,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC;YACpC,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,UAAU,IAAI,CAAC;SAC7C,CAAC;QACF,OAAO,MAAM,CAAC;KACf;IAAC,OAAO,GAAG,EAAE;QACZ,OAAO;YACL,IAAI,EAAE,KAAK;YACX,MAAM,EAAE,6BAA6B,IAAI,CAAC,MAAM,EAAE;YAClD,UAAU,EAAE;gBACV,KAAK,EAAE,IAAI,CAAC,UAAU,EAAE,KAAK,IAAI,CAAC;gBAClC,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC;gBACpC,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,UAAU,IAAI,CAAC;aAC7C;SACF,CAAC;KACH;AACH,CAAC;AApDD,4CAoDC;AAED,SAAgB,mBAAmB,CAAC,QAAgB;IAClD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IAC5C,IAAI,KAAK,EAAE;QACT,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,qCAAqC,CAAC;QAChF,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QACxD,OAAO;YACL,IAAI,EAAE,SAAS;YACf,KAAK,EAAE,IAAI;YACX,SAAS;SACV,CAAC;KACH;IACD,IAAI,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,QAAQ,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE;QAC9D,wCAAwC;QACxC,MAAM,WAAW,GAAG,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC;QAC/E,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QACjD,OAAO;YACL,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,YAAY;SACpB,CAAC;KACH;IACD,IAAI,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE;QACjC,OAAO;YACL,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;SACzB,CAAC;KACH;IACD,IAAI,QAAQ,KAAK,SAAS,IAAI,QAAQ,KAAK,eAAe,EAAE;QAC1D,OAAO;YACL,IAAI,EAAE,QAAQ;SACf,CAAC;KACH;IACD,OAAO;QACL,IAAI,EAAE,QAAQ;QACd,KAAK,EAAE,QAAQ;KAChB,CAAC;AACJ,CAAC;AAnCD,kDAmCC;AAED,kBAAe;IACb,iBAAiB;IACjB,gBAAgB;CACjB,CAAC"}
|
package/dist/cache.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAkC3D,wBAAsB,kBAAkB,CACtC,GAAG,EAAE,WAAW,EAChB,OAAO,yBAAkB,EACzB,OAAO,EAAE,MAAM,GACd,OAAO,CAAC;IAAE,IAAI,EAAE,GAAG,CAAC;IAAC,MAAM,EAAE,OAAO,CAAA;CAAE,CAAC,CAyCzC;AAED,wBAAgB,WAAW,SAE1B;AAED,wBAAgB,YAAY,SAG3B"}
|
package/dist/cache.js
CHANGED
|
@@ -8,8 +8,8 @@ const node_fs_1 = __importDefault(require("node:fs"));
|
|
|
8
8
|
const node_path_1 = __importDefault(require("node:path"));
|
|
9
9
|
const cache_manager_1 = __importDefault(require("cache-manager"));
|
|
10
10
|
const cache_manager_fs_hash_1 = __importDefault(require("cache-manager-fs-hash"));
|
|
11
|
-
const
|
|
12
|
-
const
|
|
11
|
+
const logger_1 = __importDefault(require("./logger"));
|
|
12
|
+
const util_1 = require("./util");
|
|
13
13
|
let cacheInstance;
|
|
14
14
|
let enabled = typeof process.env.PROMPTFOO_CACHE_ENABLED === 'undefined'
|
|
15
15
|
? true
|
|
@@ -17,9 +17,9 @@ let enabled = typeof process.env.PROMPTFOO_CACHE_ENABLED === 'undefined'
|
|
|
17
17
|
const cacheType = process.env.PROMPTFOO_CACHE_TYPE || (process.env.NODE_ENV === 'test' ? 'memory' : 'disk');
|
|
18
18
|
function getCache() {
|
|
19
19
|
if (!cacheInstance) {
|
|
20
|
-
const cachePath = process.env.PROMPTFOO_CACHE_PATH || node_path_1.default.join((0,
|
|
20
|
+
const cachePath = process.env.PROMPTFOO_CACHE_PATH || node_path_1.default.join((0, util_1.getConfigDirectoryPath)(), 'cache');
|
|
21
21
|
if (!node_fs_1.default.existsSync(cachePath)) {
|
|
22
|
-
|
|
22
|
+
logger_1.default.info(`Creating cache folder at ${cachePath}.`);
|
|
23
23
|
node_fs_1.default.mkdirSync(cachePath, { recursive: true });
|
|
24
24
|
}
|
|
25
25
|
cacheInstance = cache_manager_1.default.caching({
|
|
@@ -37,7 +37,7 @@ function getCache() {
|
|
|
37
37
|
}
|
|
38
38
|
async function fetchJsonWithCache(url, options = {}, timeout) {
|
|
39
39
|
if (!enabled) {
|
|
40
|
-
const resp = await (0,
|
|
40
|
+
const resp = await (0, util_1.fetchWithTimeout)(url, options, timeout);
|
|
41
41
|
return {
|
|
42
42
|
cached: false,
|
|
43
43
|
data: await resp.json(),
|
|
@@ -50,18 +50,18 @@ async function fetchJsonWithCache(url, options = {}, timeout) {
|
|
|
50
50
|
// Try to get the cached response
|
|
51
51
|
const cachedResponse = await cache.get(cacheKey);
|
|
52
52
|
if (cachedResponse) {
|
|
53
|
-
|
|
53
|
+
logger_1.default.debug(`Returning cached response for ${url}: ${cachedResponse}`);
|
|
54
54
|
return {
|
|
55
55
|
cached: true,
|
|
56
56
|
data: JSON.parse(cachedResponse),
|
|
57
57
|
};
|
|
58
58
|
}
|
|
59
59
|
// Fetch the actual data and store it in the cache
|
|
60
|
-
const response = await (0,
|
|
60
|
+
const response = await (0, util_1.fetchWithTimeout)(url, options, timeout);
|
|
61
61
|
try {
|
|
62
62
|
const data = await response.json();
|
|
63
63
|
if (response.ok) {
|
|
64
|
-
|
|
64
|
+
logger_1.default.debug(`Storing ${url} response in cache: ${data}`);
|
|
65
65
|
await cache.set(cacheKey, JSON.stringify(data));
|
|
66
66
|
}
|
|
67
67
|
return {
|
|
@@ -79,7 +79,7 @@ function enableCache() {
|
|
|
79
79
|
}
|
|
80
80
|
exports.enableCache = enableCache;
|
|
81
81
|
function disableCache() {
|
|
82
|
-
|
|
82
|
+
logger_1.default.info('Cache is disabled.');
|
|
83
83
|
enabled = false;
|
|
84
84
|
}
|
|
85
85
|
exports.disableCache = disableCache;
|
package/dist/cache.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":";;;;;;AAAA,sDAAyB;AACzB,0DAA6B;AAE7B,kEAAyC;AACzC,kFAA4C;AAE5C,
|
|
1
|
+
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":";;;;;;AAAA,sDAAyB;AACzB,0DAA6B;AAE7B,kEAAyC;AACzC,kFAA4C;AAE5C,sDAA8B;AAC9B,iCAAkE;AAKlE,IAAI,aAAgC,CAAC;AAErC,IAAI,OAAO,GACT,OAAO,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,WAAW;IACxD,CAAC,CAAC,IAAI;IACN,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;AAEnD,MAAM,SAAS,GACb,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;AAE5F,SAAS,QAAQ;IACf,IAAI,CAAC,aAAa,EAAE;QAClB,MAAM,SAAS,GACb,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,mBAAI,CAAC,IAAI,CAAC,IAAA,6BAAsB,GAAE,EAAE,OAAO,CAAC,CAAC;QACnF,IAAI,CAAC,iBAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE;YAC7B,gBAAM,CAAC,IAAI,CAAC,4BAA4B,SAAS,GAAG,CAAC,CAAC;YACtD,iBAAE,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;SAC9C;QACD,aAAa,GAAG,uBAAY,CAAC,OAAO,CAAC;YACnC,KAAK,EAAE,SAAS,KAAK,MAAM,CAAC,CAAC,CAAC,+BAAO,CAAC,CAAC,CAAC,QAAQ;YAChD,OAAO,EAAE;gBACP,GAAG,EAAE,OAAO,CAAC,GAAG,CAAC,8BAA8B,IAAI,KAAM;gBACzD,IAAI,EAAE,SAAS;gBACf,GAAG,EAAE,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE;gBACzD,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,wBAAwB,IAAI,GAAG,EAAE,iBAAiB;gBACvE,+CAA+C;aAChD;SACF,CAAC,CAAC;KACJ;IACD,OAAO,aAAa,CAAC;AACvB,CAAC;AAEM,KAAK,UAAU,kBAAkB,CACtC,GAAgB,EAChB,UAAuB,EAAE,EACzB,OAAe;IAEf,IAAI,CAAC,OAAO,EAAE;QACZ,MAAM,IAAI,GAAG,MAAM,IAAA,uBAAgB,EAAC,GAAG,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;QAC3D,OAAO;YACL,MAAM,EAAE,KAAK;YACb,IAAI,EAAE,MAAM,IAAI,CAAC,IAAI,EAAE;SACxB,CAAC;KACH;IAED,MAAM,KAAK,GAAG,MAAM,QAAQ,EAAE,CAAC;IAE/B,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;IACxC,OAAO,IAAI,CAAC,OAAO,CAAC;IACpB,MAAM,QAAQ,GAAG,SAAS,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC;IAExD,iCAAiC;IACjC,MAAM,cAAc,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAEjD,IAAI,cAAc,EAAE;QAClB,gBAAM,CAAC,KAAK,CAAC,iCAAiC,GAAG,KAAK,cAAc,EAAE,CAAC,CAAC;QACxE,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,cAAwB,CAAC;SAC3C,CAAC;KACH;IAED,kDAAkD;IAClD,MAAM,QAAQ,GAAG,MAAM,IAAA,uBAAgB,EAAC,GAAG,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAC/D,IAAI;QACF,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,IAAI,QAAQ,CAAC,EAAE,EAAE;YACf,gBAAM,CAAC,KAAK,CAAC,WAAW,GAAG,uBAAuB,IAAI,EAAE,CAAC,CAAC;YAC1D,MAAM,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;SACjD;QACD,OAAO;YACL,MAAM,EAAE,KAAK;YACb,IAAI;SACL,CAAC;KACH;IAAC,OAAO,GAAG,EAAE;QACZ,MAAM,IAAI,KAAK,CAAC,+BAA+B,GAAG,KAAK,GAAG,EAAE,CAAC,CAAC;KAC/D;AACH,CAAC;AA7CD,gDA6CC;AAED,SAAgB,WAAW;IACzB,OAAO,GAAG,IAAI,CAAC;AACjB,CAAC;AAFD,kCAEC;AAED,SAAgB,YAAY;IAC1B,gBAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;IAClC,OAAO,GAAG,KAAK,CAAC;AAClB,CAAC;AAHD,oCAGC"}
|
package/dist/evaluator.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import type { EvaluateOptions, EvaluateSummary, TestSuite } from './types
|
|
1
|
+
import type { EvaluateOptions, EvaluateSummary, TestSuite } from './types';
|
|
2
2
|
export declare function evaluate(testSuite: TestSuite, options: EvaluateOptions): Promise<EvaluateSummary>;
|
|
3
3
|
//# sourceMappingURL=evaluator.d.ts.map
|
package/dist/evaluator.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAEV,eAAe,EAGf,eAAe,EAEf,SAAS,
|
|
1
|
+
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAEV,eAAe,EAGf,eAAe,EAEf,SAAS,EAIV,MAAM,SAAS,CAAC;AA6VjB,wBAAgB,QAAQ,CAAC,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,eAAe,4BAGtE"}
|
package/dist/evaluator.js
CHANGED
|
@@ -31,10 +31,26 @@ const node_readline_1 = __importDefault(require("node:readline"));
|
|
|
31
31
|
const async_1 = __importDefault(require("async"));
|
|
32
32
|
const chalk_1 = __importDefault(require("chalk"));
|
|
33
33
|
const nunjucks_1 = __importDefault(require("nunjucks"));
|
|
34
|
-
const
|
|
35
|
-
const
|
|
36
|
-
const
|
|
34
|
+
const logger_1 = __importDefault(require("./logger"));
|
|
35
|
+
const assertions_1 = require("./assertions");
|
|
36
|
+
const suggestions_1 = require("./suggestions");
|
|
37
37
|
const DEFAULT_MAX_CONCURRENCY = 4;
|
|
38
|
+
function generateVarCombinations(vars) {
|
|
39
|
+
const keys = Object.keys(vars);
|
|
40
|
+
const combinations = [{}];
|
|
41
|
+
for (const key of keys) {
|
|
42
|
+
const values = Array.isArray(vars[key]) ? vars[key] : [vars[key]];
|
|
43
|
+
const newCombinations = [];
|
|
44
|
+
for (const combination of combinations) {
|
|
45
|
+
for (const value of values) {
|
|
46
|
+
newCombinations.push({ ...combination, [key]: value });
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
combinations.length = 0;
|
|
50
|
+
combinations.push(...newCombinations);
|
|
51
|
+
}
|
|
52
|
+
return combinations;
|
|
53
|
+
}
|
|
38
54
|
class Evaluator {
|
|
39
55
|
constructor(testSuite, options) {
|
|
40
56
|
this.testSuite = testSuite;
|
|
@@ -52,9 +68,12 @@ class Evaluator {
|
|
|
52
68
|
}
|
|
53
69
|
async runEval({ provider, prompt, test, includeProviderId, }) {
|
|
54
70
|
const vars = test.vars || {};
|
|
55
|
-
const renderedPrompt = nunjucks_1.default.renderString(prompt, vars);
|
|
71
|
+
const renderedPrompt = nunjucks_1.default.renderString(prompt.raw, vars);
|
|
56
72
|
// Note that we're using original prompt, not renderedPrompt
|
|
57
|
-
|
|
73
|
+
let promptDisplay = prompt.display;
|
|
74
|
+
if (includeProviderId) {
|
|
75
|
+
promptDisplay = `[${provider.id()}] ${promptDisplay}`;
|
|
76
|
+
}
|
|
58
77
|
const setup = {
|
|
59
78
|
prompt: {
|
|
60
79
|
raw: renderedPrompt,
|
|
@@ -73,7 +92,7 @@ class Evaluator {
|
|
|
73
92
|
ret.error = response.error;
|
|
74
93
|
}
|
|
75
94
|
else if (response.output) {
|
|
76
|
-
const checkResult = await (0,
|
|
95
|
+
const checkResult = await (0, assertions_1.runAssertions)(test, response.output);
|
|
77
96
|
if (!checkResult.pass) {
|
|
78
97
|
ret.error = checkResult.reason;
|
|
79
98
|
}
|
|
@@ -116,17 +135,17 @@ class Evaluator {
|
|
|
116
135
|
const prompts = [];
|
|
117
136
|
if (options.generateSuggestions) {
|
|
118
137
|
// TODO(ian): Move this into its own command/file
|
|
119
|
-
|
|
120
|
-
const { prompts: newPrompts, error } = await (0,
|
|
138
|
+
logger_1.default.info(`Generating prompt variations...`);
|
|
139
|
+
const { prompts: newPrompts, error } = await (0, suggestions_1.generatePrompts)(testSuite.prompts[0].raw, 1);
|
|
121
140
|
if (error || !newPrompts) {
|
|
122
141
|
throw new Error(`Failed to generate prompts: ${error}`);
|
|
123
142
|
}
|
|
124
|
-
|
|
143
|
+
logger_1.default.info(chalk_1.default.blue('Generated prompts:'));
|
|
125
144
|
let numAdded = 0;
|
|
126
145
|
for (const prompt of newPrompts) {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
146
|
+
logger_1.default.info('--------------------------------------------------------');
|
|
147
|
+
logger_1.default.info(`${prompt}`);
|
|
148
|
+
logger_1.default.info('--------------------------------------------------------');
|
|
130
149
|
// Ask the user if they want to continue
|
|
131
150
|
await new Promise((resolve) => {
|
|
132
151
|
const rl = node_readline_1.default.createInterface({
|
|
@@ -136,28 +155,28 @@ class Evaluator {
|
|
|
136
155
|
rl.question(`${chalk_1.default.blue('Do you want to test this prompt?')} (y/N): `, async (answer) => {
|
|
137
156
|
rl.close();
|
|
138
157
|
if (answer.toLowerCase().startsWith('y')) {
|
|
139
|
-
testSuite.prompts.push(prompt);
|
|
158
|
+
testSuite.prompts.push({ raw: prompt, display: prompt });
|
|
140
159
|
numAdded++;
|
|
141
160
|
}
|
|
142
161
|
else {
|
|
143
|
-
|
|
162
|
+
logger_1.default.info('Skipping this prompt.');
|
|
144
163
|
}
|
|
145
164
|
resolve(true);
|
|
146
165
|
});
|
|
147
166
|
});
|
|
148
167
|
}
|
|
149
168
|
if (numAdded < 1) {
|
|
150
|
-
|
|
169
|
+
logger_1.default.info(chalk_1.default.red('No prompts selected. Aborting.'));
|
|
151
170
|
process.exit(1);
|
|
152
171
|
}
|
|
153
172
|
}
|
|
154
173
|
// Split prompts by provider
|
|
155
|
-
for (const
|
|
174
|
+
for (const prompt of testSuite.prompts) {
|
|
156
175
|
for (const provider of testSuite.providers) {
|
|
157
|
-
const
|
|
176
|
+
const updatedDisplay = testSuite.providers.length > 1 ? `[${provider.id()}] ${prompt.display}` : prompt.display;
|
|
158
177
|
prompts.push({
|
|
159
|
-
|
|
160
|
-
display,
|
|
178
|
+
...prompt,
|
|
179
|
+
display: updatedDisplay,
|
|
161
180
|
});
|
|
162
181
|
}
|
|
163
182
|
}
|
|
@@ -195,6 +214,7 @@ class Evaluator {
|
|
|
195
214
|
// And progress bar...
|
|
196
215
|
let progressbar;
|
|
197
216
|
if (options.showProgressBar) {
|
|
217
|
+
// FIXME(ian): Add var combinations too
|
|
198
218
|
const totalNumRuns = testSuite.prompts.length * testSuite.providers.length * (tests.length || 1);
|
|
199
219
|
const cliProgress = await Promise.resolve().then(() => __importStar(require('cli-progress')));
|
|
200
220
|
progressbar = new cliProgress.SingleBar({
|
|
@@ -210,7 +230,6 @@ class Evaluator {
|
|
|
210
230
|
const runEvalOptions = [];
|
|
211
231
|
let rowIndex = 0;
|
|
212
232
|
for (const testCase of tests) {
|
|
213
|
-
let colIndex = 0;
|
|
214
233
|
// Handle default properties
|
|
215
234
|
testCase.vars = Object.assign({}, testSuite.defaultTest?.vars, testCase.vars);
|
|
216
235
|
testCase.assert = [...(testSuite.defaultTest?.assert || []), ...(testCase.assert || [])];
|
|
@@ -220,20 +239,27 @@ class Evaluator {
|
|
|
220
239
|
const prependToPrompt = testCase.options?.prefix || testSuite.defaultTest?.options?.prefix || '';
|
|
221
240
|
const appendToPrompt = testCase.options?.suffix || testSuite.defaultTest?.options?.suffix || '';
|
|
222
241
|
// Finalize test case eval
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
242
|
+
const varCombinations = generateVarCombinations(testCase.vars || {});
|
|
243
|
+
for (const vars of varCombinations) {
|
|
244
|
+
let colIndex = 0;
|
|
245
|
+
for (const prompt of testSuite.prompts) {
|
|
246
|
+
for (const provider of testSuite.providers) {
|
|
247
|
+
runEvalOptions.push({
|
|
248
|
+
provider,
|
|
249
|
+
prompt: {
|
|
250
|
+
...prompt,
|
|
251
|
+
raw: prependToPrompt + prompt.raw + appendToPrompt,
|
|
252
|
+
},
|
|
253
|
+
test: { ...testCase, vars },
|
|
254
|
+
includeProviderId: testSuite.providers.length > 1,
|
|
255
|
+
rowIndex,
|
|
256
|
+
colIndex,
|
|
257
|
+
});
|
|
258
|
+
colIndex++;
|
|
259
|
+
}
|
|
234
260
|
}
|
|
261
|
+
rowIndex++;
|
|
235
262
|
}
|
|
236
|
-
rowIndex++;
|
|
237
263
|
}
|
|
238
264
|
// Actually run the eval
|
|
239
265
|
const results = [];
|
|
@@ -243,7 +269,7 @@ class Evaluator {
|
|
|
243
269
|
if (progressbar) {
|
|
244
270
|
progressbar.increment({
|
|
245
271
|
provider: options.provider.id(),
|
|
246
|
-
prompt: options.prompt.slice(0, 10),
|
|
272
|
+
prompt: options.prompt.raw.slice(0, 10),
|
|
247
273
|
vars: Object.entries(options.test.vars || {})
|
|
248
274
|
.map(([k, v]) => `${k}=${v}`)
|
|
249
275
|
.join(' ')
|
|
@@ -275,7 +301,7 @@ class Evaluator {
|
|
|
275
301
|
if (!table.body[rowIndex]) {
|
|
276
302
|
table.body[rowIndex] = {
|
|
277
303
|
outputs: [],
|
|
278
|
-
vars: table.head.vars.map((varName) => options.test.vars?.[varName] || ''),
|
|
304
|
+
vars: table.head.vars.map((varName) => options.test.vars?.[varName] || '').flat(),
|
|
279
305
|
};
|
|
280
306
|
}
|
|
281
307
|
table.body[rowIndex].outputs[colIndex] = resultText;
|
package/dist/evaluator.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,kEAAqC;AAErC,kDAA0B;AAC1B,kDAA0B;AAC1B,wDAAgC;AAEhC,
|
|
1
|
+
{"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,kEAAqC;AAErC,kDAA0B;AAC1B,kDAA0B;AAC1B,wDAAgC;AAEhC,sDAA8B;AAC9B,6CAA6C;AAe7C,+CAAgD;AAchD,MAAM,uBAAuB,GAAG,CAAC,CAAC;AAElC,SAAS,uBAAuB,CAC9B,IAAuC;IAEvC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,YAAY,GAA6B,CAAC,EAAE,CAAC,CAAC;IAEpD,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;QACtB,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,MAAM,eAAe,GAA6B,EAAE,CAAC;QAErD,KAAK,MAAM,WAAW,IAAI,YAAY,EAAE;YACtC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE;gBAC1B,eAAe,CAAC,IAAI,CAAC,EAAE,GAAG,WAAW,EAAE,CAAC,GAAG,CAAC,EAAE,KAAe,EAAE,CAAC,CAAC;aAClE;SACF;QAED,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;QACxB,YAAY,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAC;KACvC;IAED,OAAO,YAAY,CAAC;AACtB,CAAC;AAED,MAAM,SAAS;IAKb,YAAY,SAAoB,EAAE,OAAwB;QACxD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,KAAK,GAAG;YACX,SAAS,EAAE,CAAC;YACZ,QAAQ,EAAE,CAAC;YACX,UAAU,EAAE;gBACV,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;gBACT,UAAU,EAAE,CAAC;gBACb,MAAM,EAAE,CAAC;aACV;SACF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,EACZ,QAAQ,EACR,MAAM,EACN,IAAI,EACJ,iBAAiB,GACF;QACf,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QAC7B,MAAM,cAAc,GAAG,kBAAQ,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAE/D,4DAA4D;QAC5D,IAAI,aAAa,GAAG,MAAM,CAAC,OAAO,CAAC;QACnC,IAAI,iBAAiB,EAAE;YACrB,aAAa,GAAG,IAAI,QAAQ,CAAC,EAAE,EAAE,KAAK,aAAa,EAAE,CAAC;SACvD;QAED,MAAM,KAAK,GAAG;YACZ,MAAM,EAAE;gBACN,GAAG,EAAE,cAAc;gBACnB,OAAO,EAAE,aAAa;aACvB;YACD,IAAI;SACL,CAAC;QAEF,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;YACxD,MAAM,GAAG,GAAmB;gBAC1B,GAAG,KAAK;gBACR,QAAQ;gBACR,OAAO,EAAE,KAAK;aACf,CAAC;YACF,IAAI,QAAQ,CAAC,KAAK,EAAE;gBAClB,GAAG,CAAC,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC;aAC5B;iBAAM,IAAI,QAAQ,CAAC,MAAM,EAAE;gBAC1B,MAAM,WAAW,GAAG,MAAM,IAAA,0BAAa,EAAC,IAAI,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;gBAC/D,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE;oBACrB,GAAG,CAAC,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC;iBAChC;gBACD,GAAG,CAAC,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC;gBAC/B,IAAI,WAAW,CAAC,UAAU,EAAE;oBAC1B,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC;oBAC5D,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,IAAI,WAAW,CAAC,UAAU,CAAC,MAAM,CAAC;oBAC9D,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,UAAU,IAAI,WAAW,CAAC,UAAU,CAAC,UAAU,CAAC;iBACvE;aACF;iBAAM;gBACL,GAAG,CAAC,OAAO,GAAG,KAAK,CAAC;gBACpB,GAAG,CAAC,KAAK,GAAG,WAAW,CAAC;aACzB;YAED,2BAA2B;YAC3B,IAAI,QAAQ,CAAC,UAAU,EAAE;gBACvB,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,QAAQ,CAAC,UAAU,CAAC,KAAK,IAAI,CAAC,CAAC;gBAC9D,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,CAAC,MAAM,IAAI,CAAC,CAAC;gBAChE,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,UAAU,IAAI,QAAQ,CAAC,UAAU,CAAC,UAAU,IAAI,CAAC,CAAC;gBACxE,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,CAAC,MAAM,IAAI,CAAC,CAAC;aACjE;YAED,IAAI,GAAG,CAAC,OAAO,EAAE;gBACf,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;aACxB;iBAAM;gBACL,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;aACvB;YAED,OAAO,GAAG,CAAC;SACZ;QAAC,OAAO,GAAG,EAAE;YACZ,OAAO;gBACL,GAAG,KAAK;gBACR,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC;gBAClB,OAAO,EAAE,KAAK;aACf,CAAC;SACH;IACH,CAAC;IAED,KAAK,CAAC,QAAQ;QACZ,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC;QACpC,MAAM,OAAO,GAAa,EAAE,CAAC;QAE7B,IAAI,OAAO,CAAC,mBAAmB,EAAE;YAC/B,iDAAiD;YACjD,gBAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;YAC/C,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,GAAG,MAAM,IAAA,6BAAe,EAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YAC1F,IAAI,KAAK,IAAI,CAAC,UAAU,EAAE;gBACxB,MAAM,IAAI,KAAK,CAAC,+BAA+B,KAAK,EAAE,CAAC,CAAC;aACzD;YAED,gBAAM,CAAC,IAAI,CAAC,eAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,CAAC;YAC9C,IAAI,QAAQ,GAAG,CAAC,CAAC;YACjB,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE;gBAC/B,gBAAM,CAAC,IAAI,CAAC,0DAA0D,CAAC,CAAC;gBACxE,gBAAM,CAAC,IAAI,CAAC,GAAG,MAAM,EAAE,CAAC,CAAC;gBACzB,gBAAM,CAAC,IAAI,CAAC,0DAA0D,CAAC,CAAC;gBAExE,wCAAwC;gBACxC,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;oBAC5B,MAAM,EAAE,GAAG,uBAAQ,CAAC,eAAe,CAAC;wBAClC,KAAK,EAAE,OAAO,CAAC,KAAK;wBACpB,MAAM,EAAE,OAAO,CAAC,MAAM;qBACvB,CAAC,CAAC;oBACH,EAAE,CAAC,QAAQ,CACT,GAAG,eAAK,CAAC,IAAI,CAAC,kCAAkC,CAAC,UAAU,EAC3D,KAAK,EAAE,MAAM,EAAE,EAAE;wBACf,EAAE,CAAC,KAAK,EAAE,CAAC;wBACX,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;4BACxC,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;4BACzD,QAAQ,EAAE,CAAC;yBACZ;6BAAM;4BACL,gBAAM,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;yBACtC;wBACD,OAAO,CAAC,IAAI,CAAC,CAAC;oBAChB,CAAC,CACF,CAAC;gBACJ,CAAC,CAAC,CAAC;aACJ;YAED,IAAI,QAAQ,GAAG,CAAC,EAAE;gBAChB,gBAAM,CAAC,IAAI,CAAC,eAAK,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC,CAAC;gBACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;aACjB;SACF;QAED,4BAA4B;QAC5B,KAAK,MAAM,MAAM,IAAI,SAAS,CAAC,OAAO,EAAE;YACtC,KAAK,MAAM,QAAQ,IAAI,SAAS,CAAC,SAAS,EAAE;gBAC1C,MAAM,cAAc,GAClB,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,EAAE,EAAE,KAAK,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC;gBAC3F,OAAO,CAAC,IAAI,CAAC;oBACX,GAAG,MAAM;oBACT,OAAO,EAAE,cAAc;iBACxB,CAAC,CAAC;aACJ;SACF;QAED,uCAAuC;QAEvC,MAAM,KAAK,GAAG,CACZ,SAAS,CAAC,KAAK,IAAI;YACjB;YACE,8DAA8D;aAC/D;SACF,CACF,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YACb,MAAM,aAAa,GAAa,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,SAAS,CAAC,WAAW,CAAC,CAAC;YACzE,OAAO,MAAM,CAAC,MAAM,CAAC,aAAa,EAAE,IAAI,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAgB,IAAI,GAAG,EAAE,CAAC;QACxC,MAAM,0BAA0B,GAAwC,EAAE,CAAC;QAC3E,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE;YAC5B,IAAI,QAAQ,CAAC,IAAI,EAAE;gBACjB,MAAM,yBAAyB,GAAsC,EAAE,CAAC;gBACxE,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;oBAChD,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;oBACtB,yBAAyB,CAAC,OAAO,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAC7D;gBACD,0BAA0B,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;aAC5D;SACF;QAED,kBAAkB;QAClB,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAE7C,MAAM,KAAK,GAAkB;YAC3B,IAAI,EAAE;gBACJ,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;gBACtC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE;gBACjC,sCAAsC;aACvC;YACD,IAAI,EAAE,EAAE;SACT,CAAC;QAEF,sBAAsB;QACtB,IAAI,WAAkC,CAAC;QACvC,IAAI,OAAO,CAAC,eAAe,EAAE;YAC3B,uCAAuC;YACvC,MAAM,YAAY,GAChB,SAAS,CAAC,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;YAC9E,MAAM,WAAW,GAAG,wDAAa,cAAc,GAAC,CAAC;YACjD,WAAW,GAAG,IAAI,WAAW,CAAC,SAAS,CACrC;gBACE,MAAM,EACJ,4FAA4F;aAC/F,EACD,WAAW,CAAC,OAAO,CAAC,cAAc,CACnC,CAAC;YACF,WAAW,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC,EAAE;gBACjC,QAAQ,EAAE,EAAE;gBACZ,MAAM,EAAE,EAAE;gBACV,IAAI,EAAE,EAAE;aACT,CAAC,CAAC;SACJ;QAED,oBAAoB;QACpB,MAAM,cAAc,GAAqB,EAAE,CAAC;QAC5C,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE;YAC5B,4BAA4B;YAC5B,QAAQ,CAAC,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,SAAS,CAAC,WAAW,EAAE,IAAI,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC;YAC9E,QAAQ,CAAC,MAAM,GAAG,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,MAAM,IAAI,EAAE,CAAC,EAAE,GAAG,CAAC,QAAQ,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,CAAC;YACzF,QAAQ,CAAC,OAAO,GAAG,QAAQ,CAAC,OAAO,IAAI,EAAE,CAAC;YAC1C,QAAQ,CAAC,OAAO,CAAC,QAAQ;gBACvB,QAAQ,CAAC,OAAO,CAAC,QAAQ,IAAI,SAAS,CAAC,WAAW,EAAE,OAAO,EAAE,QAAQ,CAAC;YACxE,MAAM,eAAe,GACnB,QAAQ,CAAC,OAAO,EAAE,MAAM,IAAI,SAAS,CAAC,WAAW,EAAE,OAAO,EAAE,MAAM,IAAI,EAAE,CAAC;YAC3E,MAAM,cAAc,GAClB,QAAQ,CAAC,OAAO,EAAE,MAAM,IAAI,SAAS,CAAC,WAAW,EAAE,OAAO,EAAE,MAAM,IAAI,EAAE,CAAC;YAE3E,0BAA0B;YAC1B,MAAM,eAAe,GAAG,uBAAuB,CAAC,QAAQ,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;YACrE,KAAK,MAAM,IAAI,IAAI,eAAe,EAAE;gBAClC,IAAI,QAAQ,GAAG,CAAC,CAAC;gBACjB,KAAK,MAAM,MAAM,IAAI,SAAS,CAAC,OAAO,EAAE;oBACtC,KAAK,MAAM,QAAQ,IAAI,SAAS,CAAC,SAAS,EAAE;wBAC1C,cAAc,CAAC,IAAI,CAAC;4BAClB,QAAQ;4BACR,MAAM,EAAE;gCACN,GAAG,MAAM;gCACT,GAAG,EAAE,eAAe,GAAG,MAAM,CAAC,GAAG,GAAG,cAAc;6BACnD;4BACD,IAAI,EAAE,EAAE,GAAG,QAAQ,EAAE,IAAI,EAAE;4BAC3B,iBAAiB,EAAE,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC;4BACjD,QAAQ;4BACR,QAAQ;yBACT,CAAC,CAAC;wBACH,QAAQ,EAAE,CAAC;qBACZ;iBACF;gBACD,QAAQ,EAAE,CAAC;aACZ;SACF;QAED,wBAAwB;QACxB,MAAM,OAAO,GAAqB,EAAE,CAAC;QACrC,MAAM,eAAK,CAAC,cAAc,CACxB,cAAc,EACd,OAAO,CAAC,cAAc,IAAI,uBAAuB,EACjD,KAAK,EAAE,OAAuB,EAAE,KAAsB,EAAE,EAAE;YACxD,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAExC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAElB,IAAI,WAAW,EAAE;gBACf,WAAW,CAAC,SAAS,CAAC;oBACpB,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,EAAE,EAAE;oBAC/B,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;oBACvC,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;yBAC1C,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;yBAC5B,IAAI,CAAC,GAAG,CAAC;yBACT,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;iBAChB,CAAC,CAAC;aACJ;YAED,wBAAwB;YACxB,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE;gBAC7B,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;aAClD;YAED,IAAI,UAA8B,CAAC;YACnC,IAAI,MAAM,EAAE;gBACV,IAAI,GAAG,CAAC,OAAO,EAAE;oBACf,UAAU,GAAG,UAAU,GAAG,CAAC,QAAQ,EAAE,MAAM,IAAI,GAAG,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;iBAClE;qBAAM;oBACL,UAAU,GAAG,UAAU,GAAG,CAAC,KAAK,UAAU,GAAG,CAAC,QAAQ,EAAE,MAAM,IAAI,GAAG,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;iBACrF;aACF;iBAAM,IAAI,GAAG,CAAC,KAAK,EAAE;gBACpB,UAAU,GAAG,UAAU,GAAG,CAAC,KAAK,EAAE,CAAC;aACpC;iBAAM;gBACL,UAAU,GAAG,GAAG,CAAC,QAAQ,EAAE,MAAM,IAAI,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC;aACtD;YAED,sEAAsE;YACtE,uCAAuC;YACvC,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC;YACvC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE;gBACzB,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG;oBACrB,OAAO,EAAE,EAAE;oBACX,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE;iBAClF,CAAC;aACH;YACD,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,UAAU,CAAC;QACtD,CAAC,CACF,CAAC;QAEF,IAAI,WAAW,EAAE;YACf,WAAW,CAAC,IAAI,EAAE,CAAC;SACpB;QAED,OAAO,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,CAAC;IAC3D,CAAC;CACF;AAED,SAAgB,QAAQ,CAAC,SAAoB,EAAE,OAAwB;IACrE,MAAM,EAAE,GAAG,IAAI,SAAS,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IAC7C,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC;AACvB,CAAC;AAHD,4BAGC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
import type { EvaluateOptions, TestSuiteConfig } from './types
|
|
2
|
-
export * from './types
|
|
1
|
+
import type { EvaluateOptions, TestSuiteConfig } from './types';
|
|
2
|
+
export * from './types';
|
|
3
3
|
interface EvaluateTestSuite extends TestSuiteConfig {
|
|
4
4
|
prompts: string[];
|
|
5
5
|
}
|
|
6
|
-
declare function evaluate(testSuite: EvaluateTestSuite, options?: EvaluateOptions): Promise<import("./types
|
|
6
|
+
declare function evaluate(testSuite: EvaluateTestSuite, options?: EvaluateOptions): Promise<import("./types").EvaluateSummary>;
|
|
7
7
|
declare const _default: {
|
|
8
8
|
evaluate: typeof evaluate;
|
|
9
9
|
assertions: {
|
|
10
|
-
matchesSimilarity: typeof import("./assertions
|
|
11
|
-
matchesLlmRubric: typeof import("./assertions
|
|
10
|
+
matchesSimilarity: typeof import("./assertions").matchesSimilarity;
|
|
11
|
+
matchesLlmRubric: typeof import("./assertions").matchesLlmRubric;
|
|
12
12
|
};
|
|
13
13
|
providers: {
|
|
14
|
-
OpenAiCompletionProvider: typeof import("./providers/openai
|
|
15
|
-
OpenAiChatCompletionProvider: typeof import("./providers/openai
|
|
16
|
-
LocalAiCompletionProvider: typeof import("./providers/localai
|
|
17
|
-
LocalAiChatProvider: typeof import("./providers/localai
|
|
18
|
-
loadApiProvider: typeof import("./providers
|
|
14
|
+
OpenAiCompletionProvider: typeof import("./providers/openai").OpenAiCompletionProvider;
|
|
15
|
+
OpenAiChatCompletionProvider: typeof import("./providers/openai").OpenAiChatCompletionProvider;
|
|
16
|
+
LocalAiCompletionProvider: typeof import("./providers/localai").LocalAiCompletionProvider;
|
|
17
|
+
LocalAiChatProvider: typeof import("./providers/localai").LocalAiChatProvider;
|
|
18
|
+
loadApiProvider: typeof import("./providers").loadApiProvider;
|
|
19
19
|
};
|
|
20
20
|
};
|
|
21
21
|
export default _default;
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,eAAe,EAAa,eAAe,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,eAAe,EAAa,eAAe,EAAE,MAAM,SAAS,CAAC;AAG3E,cAAc,SAAS,CAAC;AAExB,UAAU,iBAAkB,SAAQ,eAAe;IACjD,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,iBAAe,QAAQ,CAAC,SAAS,EAAE,iBAAiB,EAAE,OAAO,GAAE,eAAoB,8CAalF;;;;;;;;;;;;;;;AAQD,wBAIE"}
|
package/dist/index.js
CHANGED
|
@@ -17,29 +17,33 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
17
17
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
18
18
|
};
|
|
19
19
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
20
|
-
const
|
|
21
|
-
const
|
|
22
|
-
const
|
|
23
|
-
const
|
|
24
|
-
const
|
|
25
|
-
__exportStar(require("./types
|
|
20
|
+
const evaluator_1 = require("./evaluator");
|
|
21
|
+
const providers_1 = require("./providers");
|
|
22
|
+
const assertions_1 = __importDefault(require("./assertions"));
|
|
23
|
+
const providers_2 = __importDefault(require("./providers"));
|
|
24
|
+
const util_1 = require("./util");
|
|
25
|
+
__exportStar(require("./types"), exports);
|
|
26
26
|
async function evaluate(testSuite, options = {}) {
|
|
27
27
|
const constructedTestSuite = {
|
|
28
28
|
...testSuite,
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
29
|
+
providers: await (0, providers_1.loadApiProviders)(testSuite.providers),
|
|
30
|
+
tests: await (0, util_1.readTests)(testSuite.tests),
|
|
31
|
+
// Full prompts expected (not filepaths)
|
|
32
|
+
prompts: testSuite.prompts.map((promptContent) => ({
|
|
33
|
+
raw: promptContent,
|
|
34
|
+
display: promptContent,
|
|
35
|
+
})),
|
|
32
36
|
};
|
|
33
|
-
return (0,
|
|
37
|
+
return (0, evaluator_1.evaluate)(constructedTestSuite, options);
|
|
34
38
|
}
|
|
35
39
|
module.exports = {
|
|
36
40
|
evaluate,
|
|
37
|
-
assertions:
|
|
38
|
-
providers:
|
|
41
|
+
assertions: assertions_1.default,
|
|
42
|
+
providers: providers_2.default,
|
|
39
43
|
};
|
|
40
44
|
exports.default = {
|
|
41
45
|
evaluate,
|
|
42
|
-
assertions:
|
|
43
|
-
providers:
|
|
46
|
+
assertions: assertions_1.default,
|
|
47
|
+
providers: providers_2.default,
|
|
44
48
|
};
|
|
45
49
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;AAAA,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;AAAA,2CAAqD;AACrD,2CAA+C;AAC/C,8DAAsC;AACtC,4DAAoC;AAGpC,iCAAmC;AAEnC,0CAAwB;AAMxB,KAAK,UAAU,QAAQ,CAAC,SAA4B,EAAE,UAA2B,EAAE;IACjF,MAAM,oBAAoB,GAAc;QACtC,GAAG,SAAS;QACZ,SAAS,EAAE,MAAM,IAAA,4BAAgB,EAAC,SAAS,CAAC,SAAS,CAAC;QACtD,KAAK,EAAE,MAAM,IAAA,gBAAS,EAAC,SAAS,CAAC,KAAK,CAAC;QAEvC,wCAAwC;QACxC,OAAO,EAAE,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;YACjD,GAAG,EAAE,aAAa;YAClB,OAAO,EAAE,aAAa;SACvB,CAAC,CAAC;KACJ,CAAC;IACF,OAAO,IAAA,oBAAU,EAAC,oBAAoB,EAAE,OAAO,CAAC,CAAC;AACnD,CAAC;AAED,MAAM,CAAC,OAAO,GAAG;IACf,QAAQ;IACR,UAAU,EAAV,oBAAU;IACV,SAAS,EAAT,mBAAS;CACV,CAAC;AAEF,kBAAe;IACb,QAAQ;IACR,UAAU,EAAV,oBAAU;IACV,SAAS,EAAT,mBAAS;CACV,CAAC"}
|