promptfoo 0.55.0 → 0.57.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -1
- package/dist/package.json +6 -3
- package/dist/src/cache.d.ts.map +1 -1
- package/dist/src/cache.js +14 -7
- package/dist/src/cache.js.map +1 -1
- package/dist/src/commands/eval/filterFailingTests.d.ts +5 -0
- package/dist/src/commands/eval/filterFailingTests.d.ts.map +1 -0
- package/dist/src/commands/eval/filterFailingTests.js +19 -0
- package/dist/src/commands/eval/filterFailingTests.js.map +1 -0
- package/dist/src/commands/eval/filterTests.d.ts +10 -0
- package/dist/src/commands/eval/filterTests.d.ts.map +1 -0
- package/dist/src/commands/eval/filterTests.js +34 -0
- package/dist/src/commands/eval/filterTests.js.map +1 -0
- package/dist/src/commands/export.d.ts +3 -0
- package/dist/src/commands/export.d.ts.map +1 -0
- package/dist/src/commands/export.js +56 -0
- package/dist/src/commands/export.js.map +1 -0
- package/dist/src/commands/import.d.ts +3 -0
- package/dist/src/commands/import.d.ts.map +1 -0
- package/dist/src/commands/import.js +44 -0
- package/dist/src/commands/import.js.map +1 -0
- package/dist/src/commands/list.d.ts.map +1 -1
- package/dist/src/commands/list.js +6 -3
- package/dist/src/commands/list.js.map +1 -1
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +26 -10
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/index.d.ts +2 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/integrations/langfuse.d.ts +2 -0
- package/dist/src/integrations/langfuse.d.ts.map +1 -0
- package/dist/src/integrations/langfuse.js +16 -0
- package/dist/src/integrations/langfuse.js.map +1 -0
- package/dist/src/main.js +29 -6
- package/dist/src/main.js.map +1 -1
- package/dist/src/prompts.d.ts.map +1 -1
- package/dist/src/prompts.js +2 -0
- package/dist/src/prompts.js.map +1 -1
- package/dist/src/providers/azureopenai.d.ts.map +1 -1
- package/dist/src/providers/azureopenai.js +10 -2
- package/dist/src/providers/azureopenai.js.map +1 -1
- package/dist/src/providers/bam.d.ts +76 -0
- package/dist/src/providers/bam.d.ts.map +1 -0
- package/dist/src/providers/bam.js +126 -0
- package/dist/src/providers/bam.js.map +1 -0
- package/dist/src/providers/huggingface.d.ts.map +1 -1
- package/dist/src/providers/huggingface.js +1 -0
- package/dist/src/providers/huggingface.js.map +1 -1
- package/dist/src/providers/openai.d.ts +12 -0
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +11 -2
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/pythonCompletion.js +1 -1
- package/dist/src/providers/pythonCompletion.js.map +1 -1
- package/dist/src/providers/vertex.d.ts +12 -0
- package/dist/src/providers/vertex.d.ts.map +1 -1
- package/dist/src/providers/vertex.js +2 -1
- package/dist/src/providers/vertex.js.map +1 -1
- package/dist/src/providers.d.ts +3 -0
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +29 -11
- package/dist/src/providers.js.map +1 -1
- package/dist/src/python/wrapper.d.ts.map +1 -1
- package/dist/src/python/wrapper.js +5 -3
- package/dist/src/python/wrapper.js.map +1 -1
- package/dist/src/testCases.d.ts.map +1 -1
- package/dist/src/testCases.js +13 -0
- package/dist/src/testCases.js.map +1 -1
- package/dist/src/types.d.ts +17 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/types.js +9 -0
- package/dist/src/types.js.map +1 -1
- package/dist/src/util.d.ts +15 -11
- package/dist/src/util.d.ts.map +1 -1
- package/dist/src/util.js +54 -21
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/nextui/404/index.html +1 -1
- package/dist/src/web/nextui/404.html +1 -1
- package/dist/src/web/nextui/_next/static/chunks/712-6aacc65ec1cd9990.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/{page-fd17c94175203b8b.js → page-220f1e1ba1ae2ad9.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-b242a9f90d8df411.js +1 -0
- package/dist/src/web/nextui/auth/login/index.html +1 -1
- package/dist/src/web/nextui/auth/login/index.txt +1 -1
- package/dist/src/web/nextui/auth/signup/index.html +1 -1
- package/dist/src/web/nextui/auth/signup/index.txt +1 -1
- package/dist/src/web/nextui/datasets/index.html +1 -1
- package/dist/src/web/nextui/datasets/index.txt +1 -1
- package/dist/src/web/nextui/eval/index.html +1 -1
- package/dist/src/web/nextui/eval/index.txt +2 -2
- package/dist/src/web/nextui/index.html +1 -1
- package/dist/src/web/nextui/index.txt +1 -1
- package/dist/src/web/nextui/progress/index.html +1 -1
- package/dist/src/web/nextui/progress/index.txt +1 -1
- package/dist/src/web/nextui/prompts/index.html +1 -1
- package/dist/src/web/nextui/prompts/index.txt +1 -1
- package/dist/src/web/nextui/setup/index.html +1 -1
- package/dist/src/web/nextui/setup/index.txt +1 -1
- package/package.json +6 -3
- package/dist/src/web/nextui/_next/static/chunks/670-c0bbfb378b86dbd6.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-f7288a403368b113.js +0 -1
- /package/dist/src/web/nextui/_next/static/{3HpbHMqtYmi39VCixO04u → 3BrycjzrBIZyXUXHQkzf3}/_buildManifest.js +0 -0
- /package/dist/src/web/nextui/_next/static/{3HpbHMqtYmi39VCixO04u → 3BrycjzrBIZyXUXHQkzf3}/_ssgManifest.js +0 -0
package/README.md
CHANGED
|
@@ -199,7 +199,7 @@ npx promptfoo eval -p prompts.txt -r openai:gpt-3.5-turbo -t tests.csv
|
|
|
199
199
|

|
|
200
200
|
-->
|
|
201
201
|
|
|
202
|
-
This command will evaluate the prompts in `prompts.txt`,
|
|
202
|
+
This command will evaluate the prompts in `prompts.txt`, substituting the variable values from `vars.csv`, and output results in your terminal.
|
|
203
203
|
|
|
204
204
|
You can also output a nice [spreadsheet](https://docs.google.com/spreadsheets/d/1nanoj3_TniWrDl1Sj-qYqIMD6jwm5FBy15xPFdUTsmI/edit?usp=sharing), [JSON](https://github.com/typpo/promptfoo/blob/main/examples/simple-cli/output.json), YAML, or an HTML file:
|
|
205
205
|
|
|
@@ -248,6 +248,9 @@ You can also use `promptfoo` as a library in your project by importing the `eval
|
|
|
248
248
|
|
|
249
249
|
// The required score for this test case. If not provided, the test case is graded pass/fail.
|
|
250
250
|
threshold?: number;
|
|
251
|
+
|
|
252
|
+
// Override the provider for this test
|
|
253
|
+
provider?: string | ProviderOptions | ApiProvider;
|
|
251
254
|
}
|
|
252
255
|
|
|
253
256
|
interface Assertion {
|
package/dist/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "promptfoo",
|
|
3
3
|
"description": "LLM eval & testing toolkit",
|
|
4
4
|
"author": "Ian Webster",
|
|
5
|
-
"version": "0.
|
|
5
|
+
"version": "0.57.0",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"type": "commonjs",
|
|
8
8
|
"repository": "promptfoo/promptfoo",
|
|
@@ -45,8 +45,10 @@
|
|
|
45
45
|
"@aws-sdk/client-bedrock-runtime": "^3.458.0",
|
|
46
46
|
"@azure/identity": "^4.0.0",
|
|
47
47
|
"@azure/openai-assistants": "^1.0.0-beta.5",
|
|
48
|
+
"@ibm-generative-ai/node-sdk": "^2.0.6",
|
|
48
49
|
"google-auth-library": "^9.7.0",
|
|
49
|
-
"googleapis": "^134.0.0"
|
|
50
|
+
"googleapis": "^134.0.0",
|
|
51
|
+
"langfuse": "^3.7.0"
|
|
50
52
|
},
|
|
51
53
|
"devDependencies": {
|
|
52
54
|
"@aws-sdk/client-bedrock-runtime": "^3.458.0",
|
|
@@ -100,13 +102,14 @@
|
|
|
100
102
|
"dotenv": "^16.4.5",
|
|
101
103
|
"drizzle-orm": "^0.29.3",
|
|
102
104
|
"express": "^4.18.2",
|
|
105
|
+
"fast-deep-equal": "^3.1.3",
|
|
103
106
|
"fastest-levenshtein": "^1.0.16",
|
|
104
107
|
"glob": "^10.2.6",
|
|
105
108
|
"js-yaml": "^4.1.0",
|
|
106
109
|
"mathjs": "^12.4.1",
|
|
107
110
|
"node-fetch": "^2.6.7",
|
|
108
111
|
"nunjucks": "^3.2.4",
|
|
109
|
-
"openai": "^4.
|
|
112
|
+
"openai": "^4.38.5",
|
|
110
113
|
"opener": "^1.5.2",
|
|
111
114
|
"proxy-agent": "^6.3.1",
|
|
112
115
|
"python-shell": "^5.0.0",
|
package/dist/src/cache.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../../src/cache.ts"],"names":[],"mappings":"AAGA,OAAO,YAAY,MAAM,eAAe,CAAC;AAQzC,OAAO,KAAK,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAc3D,wBAAgB,QAAQ,
|
|
1
|
+
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../../src/cache.ts"],"names":[],"mappings":"AAGA,OAAO,YAAY,MAAM,eAAe,CAAC;AAQzC,OAAO,KAAK,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAc3D,wBAAgB,QAAQ,uBAsBvB;AAED,wBAAsB,cAAc,CAClC,GAAG,EAAE,WAAW,EAChB,OAAO,yBAAkB,EACzB,OAAO,EAAE,MAAM,EACf,MAAM,GAAE,MAAM,GAAG,MAAe,EAChC,IAAI,GAAE,OAAe,GACpB,OAAO,CAAC;IAAE,IAAI,EAAE,GAAG,CAAC;IAAC,MAAM,EAAE,OAAO,CAAA;CAAE,CAAC,CA2DzC;AAED,wBAAgB,WAAW,SAE1B;AAED,wBAAgB,YAAY,SAE3B;AAED,wBAAsB,UAAU,kBAE/B;AAED,wBAAgB,cAAc,YAE7B"}
|
package/dist/src/cache.js
CHANGED
|
@@ -17,16 +17,19 @@ let enabled = typeof process.env.PROMPTFOO_CACHE_ENABLED === 'undefined'
|
|
|
17
17
|
: process.env.PROMPTFOO_CACHE_ENABLED === '1' ||
|
|
18
18
|
process.env.PROMPTFOO_CACHE_ENABLED === 'true' ||
|
|
19
19
|
process.env.PROMPTFOO_CACHE_ENABLED === 'yes';
|
|
20
|
-
|
|
20
|
+
let cacheType = process.env.PROMPTFOO_CACHE_TYPE || (process.env.NODE_ENV === 'test' ? 'memory' : 'disk');
|
|
21
21
|
function getCache() {
|
|
22
22
|
if (!cacheInstance) {
|
|
23
|
-
|
|
24
|
-
if (
|
|
25
|
-
|
|
26
|
-
fs_1.default.
|
|
23
|
+
let cachePath = '';
|
|
24
|
+
if (cacheType === 'disk' && enabled) {
|
|
25
|
+
cachePath = process.env.PROMPTFOO_CACHE_PATH || path_1.default.join((0, util_1.getConfigDirectoryPath)(), 'cache');
|
|
26
|
+
if (!fs_1.default.existsSync(cachePath)) {
|
|
27
|
+
logger_1.default.info(`Creating cache folder at ${cachePath}.`);
|
|
28
|
+
fs_1.default.mkdirSync(cachePath, { recursive: true });
|
|
29
|
+
}
|
|
27
30
|
}
|
|
28
31
|
cacheInstance = cache_manager_1.default.caching({
|
|
29
|
-
store: cacheType === 'disk' ? cache_manager_fs_hash_1.default : 'memory',
|
|
32
|
+
store: cacheType === 'disk' && enabled ? cache_manager_fs_hash_1.default : 'memory',
|
|
30
33
|
options: {
|
|
31
34
|
max: process.env.PROMPTFOO_CACHE_MAX_FILE_COUNT || 10000,
|
|
32
35
|
path: cachePath,
|
|
@@ -72,6 +75,10 @@ async function fetchWithCache(url, options = {}, timeout, format = 'json', bust
|
|
|
72
75
|
// Don't cache error responses
|
|
73
76
|
return;
|
|
74
77
|
}
|
|
78
|
+
if (!data) {
|
|
79
|
+
// Don't cache empty responses
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
75
82
|
logger_1.default.debug(`Storing ${url} response in cache: ${data}`);
|
|
76
83
|
return data;
|
|
77
84
|
}
|
|
@@ -79,7 +86,7 @@ async function fetchWithCache(url, options = {}, timeout, format = 'json', bust
|
|
|
79
86
|
throw new Error(`Error parsing response from ${url}: ${err.message}. Received text: ${responseText}`);
|
|
80
87
|
}
|
|
81
88
|
});
|
|
82
|
-
if (cached) {
|
|
89
|
+
if (cached && cachedResponse) {
|
|
83
90
|
logger_1.default.debug(`Returning cached response for ${url}: ${cachedResponse}`);
|
|
84
91
|
}
|
|
85
92
|
return {
|
package/dist/src/cache.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../../src/cache.ts"],"names":[],"mappings":";;;;;;AAAA,4CAAoB;AACpB,gDAAwB;AAExB,kEAAyC;AACzC,kFAA4C;AAE5C,sDAA8B;AAC9B,mCAA2C;AAC3C,iCAAgD;AAKhD,IAAI,aAAgC,CAAC;AAErC,IAAI,OAAO,GACT,OAAO,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,WAAW;IACxD,CAAC,CAAC,IAAI;IACN,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,GAAG;QAC3C,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,MAAM;QAC9C,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,KAAK,CAAC;AAEpD,
|
|
1
|
+
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../../src/cache.ts"],"names":[],"mappings":";;;;;;AAAA,4CAAoB;AACpB,gDAAwB;AAExB,kEAAyC;AACzC,kFAA4C;AAE5C,sDAA8B;AAC9B,mCAA2C;AAC3C,iCAAgD;AAKhD,IAAI,aAAgC,CAAC;AAErC,IAAI,OAAO,GACT,OAAO,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,WAAW;IACxD,CAAC,CAAC,IAAI;IACN,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,GAAG;QAC3C,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,MAAM;QAC9C,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,KAAK,CAAC;AAEpD,IAAI,SAAS,GACX,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;AAE5F,SAAgB,QAAQ;IACtB,IAAI,CAAC,aAAa,EAAE;QAClB,IAAI,SAAS,GAAG,EAAE,CAAC;QACnB,IAAI,SAAS,KAAK,MAAM,IAAI,OAAO,EAAE;YACnC,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,cAAI,CAAC,IAAI,CAAC,IAAA,6BAAsB,GAAE,EAAE,OAAO,CAAC,CAAC;YAC7F,IAAI,CAAC,YAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE;gBAC7B,gBAAM,CAAC,IAAI,CAAC,4BAA4B,SAAS,GAAG,CAAC,CAAC;gBACtD,YAAE,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;aAC9C;SACF;QACD,aAAa,GAAG,uBAAY,CAAC,OAAO,CAAC;YACnC,KAAK,EAAE,SAAS,KAAK,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,+BAAO,CAAC,CAAC,CAAC,QAAQ;YAC3D,OAAO,EAAE;gBACP,GAAG,EAAE,OAAO,CAAC,GAAG,CAAC,8BAA8B,IAAI,KAAM;gBACzD,IAAI,EAAE,SAAS;gBACf,GAAG,EAAE,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE;gBACzD,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,wBAAwB,IAAI,GAAG,EAAE,iBAAiB;gBACvE,+CAA+C;aAChD;SACF,CAAC,CAAC;KACJ;IACD,OAAO,aAAa,CAAC;AACvB,CAAC;AAtBD,4BAsBC;AAEM,KAAK,UAAU,cAAc,CAClC,GAAgB,EAChB,UAAuB,EAAE,EACzB,OAAe,EACf,SAA0B,MAAM,EAChC,OAAgB,KAAK;IAErB,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;QACpB,MAAM,IAAI,GAAG,MAAM,IAAA,wBAAgB,EAAC,GAAG,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;QAC3D,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;QACnC,IAAI;YACF,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ;aAC1D,CAAC;SACH;QAAC,OAAO,KAAK,EAAE;YACd,MAAM,IAAI,KAAK,CAAC,mCAAmC,QAAQ,EAAE,CAAC,CAAC;SAChE;KACF;IAED,MAAM,KAAK,GAAG,MAAM,QAAQ,EAAE,CAAC;IAE/B,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;IACxC,OAAO,IAAI,CAAC,OAAO,CAAC;IACpB,MAAM,QAAQ,GAAG,SAAS,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC;IAExD,IAAI,MAAM,GAAG,IAAI,CAAC;IAClB,IAAI,aAAa,GAAG,IAAI,CAAC;IAEzB,sFAAsF;IACtF,MAAM,cAAc,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,KAAK,IAAI,EAAE;QAC3D,kDAAkD;QAClD,MAAM,GAAG,KAAK,CAAC;QACf,MAAM,QAAQ,GAAG,MAAM,IAAA,wBAAgB,EAAC,GAAG,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;QAC/D,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC3C,IAAI;YACF,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;YACzF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE;gBAChB,aAAa,GAAG,IAAI,CAAC;gBACrB,8BAA8B;gBAC9B,OAAO;aACR;YACD,IAAI,CAAC,IAAI,EAAE;gBACT,8BAA8B;gBAC9B,OAAO;aACR;YACD,gBAAM,CAAC,KAAK,CAAC,WAAW,GAAG,uBAAuB,IAAI,EAAE,CAAC,CAAC;YAC1D,OAAO,IAAI,CAAC;SACb;QAAC,OAAO,GAAG,EAAE;YACZ,MAAM,IAAI,KAAK,CACb,+BAA+B,GAAG,KAC/B,GAAa,CAAC,OACjB,oBAAoB,YAAY,EAAE,CACnC,CAAC;SACH;IACH,CAAC,CAAC,CAAC;IAEH,IAAI,MAAM,IAAI,cAAc,EAAE;QAC5B,gBAAM,CAAC,KAAK,CAAC,iCAAiC,GAAG,KAAK,cAAc,EAAE,CAAC,CAAC;KACzE;IAED,OAAO;QACL,MAAM;QACN,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,cAAc,IAAI,aAAa,CAAW,CAAC;KAC9D,CAAC;AACJ,CAAC;AAjED,wCAiEC;AAED,SAAgB,WAAW;IACzB,OAAO,GAAG,IAAI,CAAC;AACjB,CAAC;AAFD,kCAEC;AAED,SAAgB,YAAY;IAC1B,OAAO,GAAG,KAAK,CAAC;AAClB,CAAC;AAFD,oCAEC;AAEM,KAAK,UAAU,UAAU;IAC9B,OAAO,QAAQ,EAAE,CAAC,KAAK,EAAE,CAAC;AAC5B,CAAC;AAFD,gCAEC;AAED,SAAgB,cAAc;IAC5B,OAAO,OAAO,CAAC;AACjB,CAAC;AAFD,wCAEC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"filterFailingTests.d.ts","sourceRoot":"","sources":["../../../../src/commands/eval/filterFailingTests.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,SAAS,EAAC,MAAM,aAAa,CAAC;AAGtC,KAAK,KAAK,GAAG,WAAW,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;AAE7C,wBAAsB,kBAAkB,CAAC,SAAS,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAejG"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.filterFailingTests = void 0;
|
|
4
|
+
const util_1 = require("../../util");
|
|
5
|
+
async function filterFailingTests(testSuite, outputPath) {
|
|
6
|
+
if (!testSuite.tests) {
|
|
7
|
+
return [];
|
|
8
|
+
}
|
|
9
|
+
const { results } = await (0, util_1.readOutput)(outputPath);
|
|
10
|
+
const failingResults = results.results.filter((result) => !result.success);
|
|
11
|
+
if (failingResults.length === 0) {
|
|
12
|
+
return [];
|
|
13
|
+
}
|
|
14
|
+
return [...testSuite.tests].filter((test) => {
|
|
15
|
+
return failingResults.some((result) => (0, util_1.resultIsForTestCase)(result, test));
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
exports.filterFailingTests = filterFailingTests;
|
|
19
|
+
//# sourceMappingURL=filterFailingTests.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"filterFailingTests.js","sourceRoot":"","sources":["../../../../src/commands/eval/filterFailingTests.ts"],"names":[],"mappings":";;;AACA,qCAA2D;AAIpD,KAAK,UAAU,kBAAkB,CAAC,SAAoB,EAAE,UAAkB;IAC/E,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE;QACpB,OAAO,EAAE,CAAC;KACX;IAED,MAAM,EAAC,OAAO,EAAC,GAAG,MAAM,IAAA,iBAAU,EAAC,UAAU,CAAC,CAAC;IAC/C,MAAM,cAAc,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAE3E,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,EAAE;QAC/B,OAAO,EAAE,CAAC;KACX;IAED,OAAO,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;QAC1C,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,IAAA,0BAAmB,EAAC,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC;IAC5E,CAAC,CAAU,CAAC;AACd,CAAC;AAfD,gDAeC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { TestSuite } from "../../types";
|
|
2
|
+
interface Args {
|
|
3
|
+
firstN?: string;
|
|
4
|
+
pattern?: string;
|
|
5
|
+
failing?: string;
|
|
6
|
+
}
|
|
7
|
+
type Tests = TestSuite['tests'];
|
|
8
|
+
export declare function filterTests(testSuite: TestSuite, args: Args): Promise<Tests>;
|
|
9
|
+
export {};
|
|
10
|
+
//# sourceMappingURL=filterTests.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"filterTests.d.ts","sourceRoot":"","sources":["../../../../src/commands/eval/filterTests.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,SAAS,EAAC,MAAM,aAAa,CAAC;AAGtC,UAAU,IAAI;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,KAAK,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;AAEhC,wBAAsB,WAAW,CAAC,SAAS,EAAE,SAAS,EAAE,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,CAmClF"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.filterTests = void 0;
|
|
4
|
+
const filterFailingTests_1 = require("./filterFailingTests");
|
|
5
|
+
async function filterTests(testSuite, args) {
|
|
6
|
+
const tests = testSuite.tests;
|
|
7
|
+
if (!tests) {
|
|
8
|
+
return tests;
|
|
9
|
+
}
|
|
10
|
+
if (Object.keys(args).length === 0) {
|
|
11
|
+
return tests;
|
|
12
|
+
}
|
|
13
|
+
const { firstN, pattern, failing } = args;
|
|
14
|
+
let newTests;
|
|
15
|
+
if (failing) {
|
|
16
|
+
newTests = await (0, filterFailingTests_1.filterFailingTests)(testSuite, failing);
|
|
17
|
+
}
|
|
18
|
+
else {
|
|
19
|
+
newTests = [...tests];
|
|
20
|
+
}
|
|
21
|
+
if (pattern) {
|
|
22
|
+
newTests = newTests.filter((test) => test.description && test.description.match(pattern));
|
|
23
|
+
}
|
|
24
|
+
if (firstN) {
|
|
25
|
+
const count = parseInt(firstN);
|
|
26
|
+
if (isNaN(count)) {
|
|
27
|
+
throw new Error(`firstN must be a number, got: ${firstN}`);
|
|
28
|
+
}
|
|
29
|
+
newTests = newTests.slice(0, count);
|
|
30
|
+
}
|
|
31
|
+
return newTests;
|
|
32
|
+
}
|
|
33
|
+
exports.filterTests = filterTests;
|
|
34
|
+
//# sourceMappingURL=filterTests.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"filterTests.js","sourceRoot":"","sources":["../../../../src/commands/eval/filterTests.ts"],"names":[],"mappings":";;;AACA,6DAAwD;AAUjD,KAAK,UAAU,WAAW,CAAC,SAAoB,EAAE,IAAU;IAChE,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC;IAE9B,IAAI,CAAC,KAAK,EAAE;QACV,OAAO,KAAK,CAAC;KACd;IAED,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE;QAClC,OAAO,KAAK,CAAC;KACd;IAED,MAAM,EAAC,MAAM,EAAE,OAAO,EAAE,OAAO,EAAC,GAAG,IAAI,CAAC;IACxC,IAAI,QAA4B,CAAC;IAEjC,IAAI,OAAO,EAAE;QACX,QAAQ,GAAG,MAAM,IAAA,uCAAkB,EAAC,SAAS,EAAE,OAAO,CAAC,CAAC;KACzD;SAAM;QACL,QAAQ,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;KACvB;IAED,IAAI,OAAO,EAAG;QACZ,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;KAC3F;IAED,IAAI,MAAM,EAAE;QACV,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;QAE/B,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE;YAChB,MAAM,IAAI,KAAK,CAAC,iCAAiC,MAAM,EAAE,CAAC,CAAC;SAC5D;QAED,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;KACrC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAnCD,kCAmCC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"export.d.ts","sourceRoot":"","sources":["../../../src/commands/export.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAQpC,wBAAgB,aAAa,CAAC,OAAO,EAAE,OAAO,QA6C7C"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.exportCommand = void 0;
|
|
7
|
+
const drizzle_orm_1 = require("drizzle-orm");
|
|
8
|
+
const database_1 = require("../database");
|
|
9
|
+
const fs_1 = __importDefault(require("fs"));
|
|
10
|
+
const logger_1 = __importDefault(require("../logger"));
|
|
11
|
+
const telemetry_1 = __importDefault(require("../telemetry"));
|
|
12
|
+
function exportCommand(program) {
|
|
13
|
+
program
|
|
14
|
+
.command('export <evalId>')
|
|
15
|
+
.description('Export an eval record to a JSON file')
|
|
16
|
+
.option('-o, --output [outputPath]', 'Output path for the exported file')
|
|
17
|
+
.action(async (evalId, cmdObj) => {
|
|
18
|
+
try {
|
|
19
|
+
const db = (0, database_1.getDb)();
|
|
20
|
+
const result = await db
|
|
21
|
+
.select({
|
|
22
|
+
id: database_1.evals.id,
|
|
23
|
+
createdAt: database_1.evals.createdAt,
|
|
24
|
+
description: database_1.evals.description,
|
|
25
|
+
results: database_1.evals.results,
|
|
26
|
+
config: database_1.evals.config,
|
|
27
|
+
})
|
|
28
|
+
.from(database_1.evals)
|
|
29
|
+
.where((0, drizzle_orm_1.eq)(database_1.evals.id, evalId))
|
|
30
|
+
.execute();
|
|
31
|
+
if (!result || result.length === 0) {
|
|
32
|
+
logger_1.default.error(`No eval found with ID ${evalId}`);
|
|
33
|
+
process.exit(1);
|
|
34
|
+
}
|
|
35
|
+
const jsonData = JSON.stringify(result[0], null, 2);
|
|
36
|
+
if (cmdObj.output) {
|
|
37
|
+
fs_1.default.writeFileSync(cmdObj.output, jsonData);
|
|
38
|
+
logger_1.default.info(`Eval with ID ${evalId} has been successfully exported to ${cmdObj.output}.`);
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
console.log(jsonData);
|
|
42
|
+
}
|
|
43
|
+
telemetry_1.default.record('command_used', {
|
|
44
|
+
name: 'export',
|
|
45
|
+
evalId: evalId,
|
|
46
|
+
});
|
|
47
|
+
await telemetry_1.default.send();
|
|
48
|
+
}
|
|
49
|
+
catch (error) {
|
|
50
|
+
logger_1.default.error(`Failed to export eval: ${error}`);
|
|
51
|
+
process.exit(1);
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
exports.exportCommand = exportCommand;
|
|
56
|
+
//# sourceMappingURL=export.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"export.js","sourceRoot":"","sources":["../../../src/commands/export.ts"],"names":[],"mappings":";;;;;;AACA,6CAAiC;AAEjC,0CAA2C;AAC3C,4CAAoB;AACpB,uDAA+B;AAC/B,6DAAqC;AAErC,SAAgB,aAAa,CAAC,OAAgB;IAC5C,OAAO;SACJ,OAAO,CAAC,iBAAiB,CAAC;SAC1B,WAAW,CAAC,sCAAsC,CAAC;SACnD,MAAM,CAAC,2BAA2B,EAAE,mCAAmC,CAAC;SACxE,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE;QAC/B,IAAI;YACF,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;YACnB,MAAM,MAAM,GAAG,MAAM,EAAE;iBACpB,MAAM,CAAC;gBACN,EAAE,EAAE,gBAAK,CAAC,EAAE;gBACZ,SAAS,EAAE,gBAAK,CAAC,SAAS;gBAC1B,WAAW,EAAE,gBAAK,CAAC,WAAW;gBAC9B,OAAO,EAAE,gBAAK,CAAC,OAAO;gBACtB,MAAM,EAAE,gBAAK,CAAC,MAAM;aACrB,CAAC;iBACD,IAAI,CAAC,gBAAK,CAAC;iBACX,KAAK,CAAC,IAAA,gBAAE,EAAC,gBAAK,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;iBAC3B,OAAO,EAAE,CAAC;YAEb,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE;gBAClC,gBAAM,CAAC,KAAK,CAAC,yBAAyB,MAAM,EAAE,CAAC,CAAC;gBAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;aACjB;YAED,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YACpD,IAAI,MAAM,CAAC,MAAM,EAAE;gBACjB,YAAE,CAAC,aAAa,CAAC,MAAM,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;gBAC1C,gBAAM,CAAC,IAAI,CACT,gBAAgB,MAAM,sCAAsC,MAAM,CAAC,MAAM,GAAG,CAC7E,CAAC;aACH;iBAAM;gBACL,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;aACvB;YAED,mBAAS,CAAC,MAAM,CAAC,cAAc,EAAE;gBAC/B,IAAI,EAAE,QAAQ;gBACd,MAAM,EAAE,MAAM;aACf,CAAC,CAAC;YACH,MAAM,mBAAS,CAAC,IAAI,EAAE,CAAC;SACxB;QAAC,OAAO,KAAK,EAAE;YACd,gBAAM,CAAC,KAAK,CAAC,0BAA0B,KAAK,EAAE,CAAC,CAAC;YAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;SACjB;IACH,CAAC,CAAC,CAAC;AACP,CAAC;AA7CD,sCA6CC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"import.d.ts","sourceRoot":"","sources":["../../../src/commands/import.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAMpC,wBAAgB,aAAa,CAAC,OAAO,EAAE,OAAO,QAgC7C"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.importCommand = void 0;
|
|
7
|
+
const database_1 = require("../database");
|
|
8
|
+
const fs_1 = __importDefault(require("fs"));
|
|
9
|
+
const logger_1 = __importDefault(require("../logger"));
|
|
10
|
+
const telemetry_1 = __importDefault(require("../telemetry"));
|
|
11
|
+
function importCommand(program) {
|
|
12
|
+
program
|
|
13
|
+
.command('import <file>')
|
|
14
|
+
.description('Import an eval record from a JSON file')
|
|
15
|
+
.action(async (file) => {
|
|
16
|
+
try {
|
|
17
|
+
const fileContent = fs_1.default.readFileSync(file, 'utf-8');
|
|
18
|
+
const evalData = JSON.parse(fileContent);
|
|
19
|
+
const db = (0, database_1.getDb)();
|
|
20
|
+
await db
|
|
21
|
+
.insert(database_1.evals)
|
|
22
|
+
.values({
|
|
23
|
+
id: evalData.id,
|
|
24
|
+
createdAt: evalData.createdAt,
|
|
25
|
+
description: evalData.description,
|
|
26
|
+
results: evalData.results,
|
|
27
|
+
config: evalData.config,
|
|
28
|
+
})
|
|
29
|
+
.run();
|
|
30
|
+
logger_1.default.info(`Eval with ID ${evalData.id} has been successfully imported.`);
|
|
31
|
+
telemetry_1.default.record('command_used', {
|
|
32
|
+
name: 'import',
|
|
33
|
+
evalId: evalData.id,
|
|
34
|
+
});
|
|
35
|
+
await telemetry_1.default.send();
|
|
36
|
+
}
|
|
37
|
+
catch (error) {
|
|
38
|
+
logger_1.default.error(`Failed to import eval: ${error}`);
|
|
39
|
+
process.exit(1);
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
exports.importCommand = importCommand;
|
|
44
|
+
//# sourceMappingURL=import.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"import.js","sourceRoot":"","sources":["../../../src/commands/import.ts"],"names":[],"mappings":";;;;;;AACA,0CAA2C;AAC3C,4CAAoB;AACpB,uDAA+B;AAC/B,6DAAqC;AAErC,SAAgB,aAAa,CAAC,OAAgB;IAC5C,OAAO;SACJ,OAAO,CAAC,eAAe,CAAC;SACxB,WAAW,CAAC,wCAAwC,CAAC;SACrD,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;QACrB,IAAI;YACF,MAAM,WAAW,GAAG,YAAE,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YAEzC,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;YACnB,MAAM,EAAE;iBACL,MAAM,CAAC,gBAAK,CAAC;iBACb,MAAM,CAAC;gBACN,EAAE,EAAE,QAAQ,CAAC,EAAE;gBACf,SAAS,EAAE,QAAQ,CAAC,SAAS;gBAC7B,WAAW,EAAE,QAAQ,CAAC,WAAW;gBACjC,OAAO,EAAE,QAAQ,CAAC,OAAO;gBACzB,MAAM,EAAE,QAAQ,CAAC,MAAM;aACxB,CAAC;iBACD,GAAG,EAAE,CAAC;YAET,gBAAM,CAAC,IAAI,CAAC,gBAAgB,QAAQ,CAAC,EAAE,kCAAkC,CAAC,CAAC;YAC3E,mBAAS,CAAC,MAAM,CAAC,cAAc,EAAE;gBAC/B,IAAI,EAAE,QAAQ;gBACd,MAAM,EAAE,QAAQ,CAAC,EAAE;aACpB,CAAC,CAAC;YACH,MAAM,mBAAS,CAAC,IAAI,EAAE,CAAC;SACxB;QAAC,OAAO,KAAK,EAAE;YACd,gBAAM,CAAC,KAAK,CAAC,0BAA0B,KAAK,EAAE,CAAC,CAAC;YAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;SACjB;IACH,CAAC,CAAC,CAAC;AACP,CAAC;AAhCD,sCAgCC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"list.d.ts","sourceRoot":"","sources":["../../../src/commands/list.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAOpC,wBAAgB,WAAW,CAAC,OAAO,EAAE,OAAO,
|
|
1
|
+
{"version":3,"file":"list.d.ts","sourceRoot":"","sources":["../../../src/commands/list.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAOpC,wBAAgB,WAAW,CAAC,OAAO,EAAE,OAAO,QA0G3C"}
|
|
@@ -15,6 +15,7 @@ function listCommand(program) {
|
|
|
15
15
|
.command('evals')
|
|
16
16
|
.description('List evaluations.')
|
|
17
17
|
.option('--env-path <path>', 'Path to the environment file')
|
|
18
|
+
.option('-n <limit>', 'Number of evals to display')
|
|
18
19
|
.action(async (cmdObj) => {
|
|
19
20
|
(0, util_1.setupEnv)(cmdObj.envPath);
|
|
20
21
|
telemetry_1.default.maybeShowNotice();
|
|
@@ -22,7 +23,7 @@ function listCommand(program) {
|
|
|
22
23
|
name: 'list evals',
|
|
23
24
|
});
|
|
24
25
|
await telemetry_1.default.send();
|
|
25
|
-
const evals = await (0, util_1.getEvals)();
|
|
26
|
+
const evals = await (0, util_1.getEvals)(Number(cmdObj.n) || undefined);
|
|
26
27
|
const tableData = evals.map((evl) => ({
|
|
27
28
|
'Eval ID': evl.id,
|
|
28
29
|
Description: evl.description || '',
|
|
@@ -38,6 +39,7 @@ function listCommand(program) {
|
|
|
38
39
|
.command('prompts')
|
|
39
40
|
.description('List prompts used')
|
|
40
41
|
.option('--env-path <path>', 'Path to the environment file')
|
|
42
|
+
.option('-n <limit>', 'Number of prompts to display')
|
|
41
43
|
.action(async (cmdObj) => {
|
|
42
44
|
(0, util_1.setupEnv)(cmdObj.envPath);
|
|
43
45
|
telemetry_1.default.maybeShowNotice();
|
|
@@ -45,7 +47,7 @@ function listCommand(program) {
|
|
|
45
47
|
name: 'list prompts',
|
|
46
48
|
});
|
|
47
49
|
await telemetry_1.default.send();
|
|
48
|
-
const prompts = (await (0, util_1.getPrompts)()).sort((a, b) => b.recentEvalId.localeCompare(a.recentEvalId));
|
|
50
|
+
const prompts = (await (0, util_1.getPrompts)(Number(cmdObj.n) || undefined)).sort((a, b) => b.recentEvalId.localeCompare(a.recentEvalId));
|
|
49
51
|
const tableData = prompts.map((prompt) => ({
|
|
50
52
|
'Prompt ID': prompt.id.slice(0, 6),
|
|
51
53
|
Raw: prompt.prompt.raw.slice(0, 100) + (prompt.prompt.raw.length > 100 ? '...' : ''),
|
|
@@ -61,6 +63,7 @@ function listCommand(program) {
|
|
|
61
63
|
.command('datasets')
|
|
62
64
|
.description('List datasets used')
|
|
63
65
|
.option('--env-path <path>', 'Path to the environment file')
|
|
66
|
+
.option('-n <limit>', 'Number of datasets to display')
|
|
64
67
|
.action(async (cmdObj) => {
|
|
65
68
|
(0, util_1.setupEnv)(cmdObj.envPath);
|
|
66
69
|
telemetry_1.default.maybeShowNotice();
|
|
@@ -68,7 +71,7 @@ function listCommand(program) {
|
|
|
68
71
|
name: 'list datasets',
|
|
69
72
|
});
|
|
70
73
|
await telemetry_1.default.send();
|
|
71
|
-
const datasets = (await (0, util_1.getTestCases)()).sort((a, b) => b.recentEvalId.localeCompare(a.recentEvalId));
|
|
74
|
+
const datasets = (await (0, util_1.getTestCases)(Number(cmdObj.n) || undefined)).sort((a, b) => b.recentEvalId.localeCompare(a.recentEvalId));
|
|
72
75
|
const tableData = datasets.map((dataset) => ({
|
|
73
76
|
'Dataset ID': dataset.id.slice(0, 6),
|
|
74
77
|
'Highest scoring prompt': dataset.prompts
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"list.js","sourceRoot":"","sources":["../../../src/commands/list.ts"],"names":[],"mappings":";;;;;;AAAA,kDAA0B;AAG1B,kCAA4F;AAC5F,oCAAqC;AACrC,uDAA+B;AAC/B,6DAAqC;AAErC,SAAgB,WAAW,CAAC,OAAgB;IAC1C,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,WAAW,CAAC,wBAAwB,CAAC,CAAC;IAElF,WAAW;SACR,OAAO,CAAC,OAAO,CAAC;SAChB,WAAW,CAAC,mBAAmB,CAAC;SAChC,MAAM,CAAC,mBAAmB,EAAE,8BAA8B,CAAC;SAC3D,MAAM,CAAC,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"list.js","sourceRoot":"","sources":["../../../src/commands/list.ts"],"names":[],"mappings":";;;;;;AAAA,kDAA0B;AAG1B,kCAA4F;AAC5F,oCAAqC;AACrC,uDAA+B;AAC/B,6DAAqC;AAErC,SAAgB,WAAW,CAAC,OAAgB;IAC1C,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,WAAW,CAAC,wBAAwB,CAAC,CAAC;IAElF,WAAW;SACR,OAAO,CAAC,OAAO,CAAC;SAChB,WAAW,CAAC,mBAAmB,CAAC;SAChC,MAAM,CAAC,mBAAmB,EAAE,8BAA8B,CAAC;SAC3D,MAAM,CAAC,YAAY,EAAE,4BAA4B,CAAC;SAClD,MAAM,CAAC,KAAK,EAAE,MAAwC,EAAE,EAAE;QACzD,IAAA,eAAQ,EAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACzB,mBAAS,CAAC,eAAe,EAAE,CAAC;QAC5B,mBAAS,CAAC,MAAM,CAAC,cAAc,EAAE;YAC/B,IAAI,EAAE,YAAY;SACnB,CAAC,CAAC;QACH,MAAM,mBAAS,CAAC,IAAI,EAAE,CAAC;QAEvB,MAAM,KAAK,GAAG,MAAM,IAAA,eAAQ,EAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC;QAC5D,MAAM,SAAS,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;YACpC,SAAS,EAAE,GAAG,CAAC,EAAE;YACjB,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,EAAE;YAClC,OAAO,EAAE,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAA,aAAM,EAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;YACxF,IAAI,EAAE,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;SAC3D,CAAC,CAAC,CAAC;QAEJ,gBAAM,CAAC,IAAI,CAAC,IAAA,iBAAS,EAAC,SAAS,CAAC,CAAC,CAAC;QAClC,IAAA,kBAAW,GAAE,CAAC;QAEd,gBAAM,CAAC,IAAI,CACT,OAAO,eAAK,CAAC,KAAK,CAAC,0BAA0B,CAAC,2CAA2C,CAC1F,CAAC;QACF,gBAAM,CAAC,IAAI,CACT,OAAO,eAAK,CAAC,KAAK,CAAC,4BAA4B,CAAC,uCAAuC,CACxF,CAAC;IACJ,CAAC,CAAC,CAAC;IAEL,WAAW;SACR,OAAO,CAAC,SAAS,CAAC;SAClB,WAAW,CAAC,mBAAmB,CAAC;SAChC,MAAM,CAAC,mBAAmB,EAAE,8BAA8B,CAAC;SAC3D,MAAM,CAAC,YAAY,EAAE,8BAA8B,CAAC;SACpD,MAAM,CAAC,KAAK,EAAE,MAAwC,EAAE,EAAE;QACzD,IAAA,eAAQ,EAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACzB,mBAAS,CAAC,eAAe,EAAE,CAAC;QAC5B,mBAAS,CAAC,MAAM,CAAC,cAAc,EAAE;YAC/B,IAAI,EAAE,cAAc;SACrB,CAAC,CAAC;QACH,MAAM,mBAAS,CAAC,IAAI,EAAE,CAAC;QAEvB,MAAM,OAAO,GAAG,CAAC,MAAM,IAAA,iBAAU,EAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAC9E,CAAC,CAAC,YAAY,CAAC,aAAa,CAAC,CAAC,CAAC,YAAY,CAAC,CAC7C,CAAC;QACF,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;YACzC,WAAW,EAAE,MAAM,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAClC,GAAG,EAAE,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;YACpF,SAAS,EAAE,MAAM,CAAC,KAAK;YACvB,kBAAkB,EAAE,MAAM,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;SACpD,CAAC,CAAC,CAAC;QAEJ,gBAAM,CAAC,IAAI,CAAC,IAAA,iBAAS,EAAC,SAAS,CAAC,CAAC,CAAC;QAClC,IAAA,kBAAW,GAAE,CAAC;QACd,gBAAM,CAAC,IAAI,CACT,OAAO,eAAK,CAAC,KAAK,CAAC,4BAA4B,CAAC,uCAAuC,CACxF,CAAC;QACF,gBAAM,CAAC,IAAI,CACT,OAAO,eAAK,CAAC,KAAK,CAAC,0BAA0B,CAAC,2CAA2C,CAC1F,CAAC;IACJ,CAAC,CAAC,CAAC;IAEL,WAAW;SACR,OAAO,CAAC,UAAU,CAAC;SACnB,WAAW,CAAC,oBAAoB,CAAC;SACjC,MAAM,CAAC,mBAAmB,EAAE,8BAA8B,CAAC;SAC3D,MAAM,CAAC,YAAY,EAAE,+BAA+B,CAAC;SACrD,MAAM,CAAC,KAAK,EAAE,MAAwC,EAAE,EAAE;QACzD,IAAA,eAAQ,EAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACzB,mBAAS,CAAC,eAAe,EAAE,CAAC;QAC5B,mBAAS,CAAC,MAAM,CAAC,cAAc,EAAE;YAC/B,IAAI,EAAE,eAAe;SACtB,CAAC,CAAC;QACH,MAAM,mBAAS,CAAC,IAAI,EAAE,CAAC;QAEvB,MAAM,QAAQ,GAAG,CAAC,MAAM,IAAA,mBAAY,EAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACjF,CAAC,CAAC,YAAY,CAAC,aAAa,CAAC,CAAC,CAAC,YAAY,CAAC,CAC7C,CAAC;QACF,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;YAC3C,YAAY,EAAE,OAAO,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YACpC,wBAAwB,EAAE,OAAO,CAAC,OAAO;iBACtC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;iBAClF,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YACjB,SAAS,EAAE,OAAO,CAAC,KAAK;YACxB,WAAW,EAAE,OAAO,CAAC,OAAO,CAAC,MAAM;YACnC,kBAAkB,EAAE,OAAO,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;SACrD,CAAC,CAAC,CAAC;QAEJ,gBAAM,CAAC,IAAI,CAAC,IAAA,iBAAS,EAAC,SAAS,CAAC,CAAC,CAAC;QAClC,IAAA,kBAAW,GAAE,CAAC;QACd,gBAAM,CAAC,IAAI,CACT,OAAO,eAAK,CAAC,KAAK,CAAC,6BAA6B,CAAC,wCAAwC,CAC1F,CAAC;QACF,gBAAM,CAAC,IAAI,CACT,OAAO,eAAK,CAAC,KAAK,CAAC,4BAA4B,CAAC,uCAAuC,CACxF,CAAC;QACF,gBAAM,CAAC,IAAI,CACT,OAAO,eAAK,CAAC,KAAK,CAAC,0BAA0B,CAAC,2CAA2C,CAC1F,CAAC;IACJ,CAAC,CAAC,CAAC;AACP,CAAC;AA1GD,kCA0GC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../../src/evaluator.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../../src/evaluator.ts"],"names":[],"mappings":"AAsBA,OAAO,KAAK,EACV,WAAW,EAEX,eAAe,EAGf,eAAe,EAEf,iBAAiB,EACjB,MAAM,EAEN,SAAS,EAEV,MAAM,SAAS,CAAC;AACjB,eAAO,MAAM,uBAAuB,IAAI,CAAC;AAEzC,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,GAAG,CAAC,GAC5C,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,GAAG,EAAE,CAAC,EAAE,CAqClC;AAED,wBAAgB,gBAAgB,CAC9B,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACzC,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,CA4BjC;AAED,wBAAsB,YAAY,CAChC,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,EACrC,eAAe,CAAC,EAAE,iBAAiB,EACnC,QAAQ,CAAC,EAAE,WAAW,GACrB,OAAO,CAAC,MAAM,CAAC,CAsHjB;AA+uBD,wBAAgB,QAAQ,CAAC,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,eAAe,4BAGtE"}
|
package/dist/src/evaluator.js
CHANGED
|
@@ -43,7 +43,8 @@ const suggestions_1 = require("./suggestions");
|
|
|
43
43
|
const util_1 = require("./util");
|
|
44
44
|
const azureopenaiUtil_1 = require("./providers/azureopenaiUtil");
|
|
45
45
|
const wrapper_1 = require("./python/wrapper");
|
|
46
|
-
const
|
|
46
|
+
const esm_1 = require("./esm");
|
|
47
|
+
const cache_1 = require("./cache");
|
|
47
48
|
exports.DEFAULT_MAX_CONCURRENCY = 4;
|
|
48
49
|
function generateVarCombinations(vars) {
|
|
49
50
|
const keys = Object.keys(vars);
|
|
@@ -55,6 +56,9 @@ function generateVarCombinations(vars) {
|
|
|
55
56
|
const resolvedPath = path.resolve(cliState_1.default.basePath || '', filePath);
|
|
56
57
|
const filePaths = (0, glob_1.globSync)(resolvedPath.replace(/\\/g, '/'));
|
|
57
58
|
values = filePaths.map((path) => `file://${path}`);
|
|
59
|
+
if (values.length === 0) {
|
|
60
|
+
throw new Error(`No files found for variable ${key} at path ${resolvedPath}`);
|
|
61
|
+
}
|
|
58
62
|
}
|
|
59
63
|
else {
|
|
60
64
|
values = Array.isArray(vars[key]) ? vars[key] : [vars[key]];
|
|
@@ -116,12 +120,12 @@ async function renderPrompt(prompt, vars, nunjucksFilters, provider) {
|
|
|
116
120
|
logger_1.default.debug(`Loading var ${varName} from file: ${filePath}`);
|
|
117
121
|
switch (fileExtension) {
|
|
118
122
|
case 'js':
|
|
119
|
-
const javascriptOutput =
|
|
123
|
+
const javascriptOutput = (await (await (0, esm_1.importModule)(filePath))(varName, basePrompt, vars, provider));
|
|
120
124
|
if (javascriptOutput.error) {
|
|
121
|
-
throw new Error(`Error running
|
|
125
|
+
throw new Error(`Error running ${filePath}: ${javascriptOutput.error}`);
|
|
122
126
|
}
|
|
123
127
|
if (!javascriptOutput.output) {
|
|
124
|
-
throw new Error(`
|
|
128
|
+
throw new Error(`Expected ${filePath} to return { output: string } but got ${javascriptOutput}`);
|
|
125
129
|
}
|
|
126
130
|
vars[varName] = javascriptOutput.output;
|
|
127
131
|
break;
|
|
@@ -167,9 +171,17 @@ async function renderPrompt(prompt, vars, nunjucksFilters, provider) {
|
|
|
167
171
|
resolveVariables(vars);
|
|
168
172
|
// Third party integrations
|
|
169
173
|
if (prompt.raw.startsWith('portkey://')) {
|
|
170
|
-
const
|
|
174
|
+
const { getPrompt } = await Promise.resolve().then(() => __importStar(require('./integrations/portkey')));
|
|
175
|
+
const portKeyResult = await getPrompt(prompt.raw.slice('portkey://'.length), vars);
|
|
171
176
|
return JSON.stringify(portKeyResult.messages);
|
|
172
177
|
}
|
|
178
|
+
else if (prompt.raw.startsWith('langfuse://')) {
|
|
179
|
+
const { getPrompt } = await Promise.resolve().then(() => __importStar(require('./integrations/langfuse')));
|
|
180
|
+
const langfusePrompt = prompt.raw.slice('langfuse://'.length);
|
|
181
|
+
const [helper, version] = langfusePrompt.split(':');
|
|
182
|
+
const langfuseResult = await getPrompt(helper, Number(version));
|
|
183
|
+
return langfuseResult;
|
|
184
|
+
}
|
|
173
185
|
// Render prompt
|
|
174
186
|
try {
|
|
175
187
|
if (process.env.PROMPTFOO_DISABLE_JSON_AUTOESCAPE) {
|
|
@@ -258,16 +270,20 @@ class Evaluator {
|
|
|
258
270
|
cost: 0,
|
|
259
271
|
cached: false,
|
|
260
272
|
};
|
|
261
|
-
if (
|
|
262
|
-
response =
|
|
273
|
+
if (test.providerOutput) {
|
|
274
|
+
response.output = test.providerOutput;
|
|
275
|
+
}
|
|
276
|
+
else {
|
|
277
|
+
response = await (test.provider || provider).callApi(renderedPrompt, {
|
|
263
278
|
vars,
|
|
279
|
+
logger: logger_1.default,
|
|
280
|
+
fetchWithCache: cache_1.fetchWithCache,
|
|
281
|
+
getCache: cache_1.getCache,
|
|
264
282
|
}, {
|
|
283
|
+
originalProvider: provider,
|
|
265
284
|
includeLogProbs: test.assert?.some((a) => a.type === 'perplexity'),
|
|
266
285
|
});
|
|
267
286
|
}
|
|
268
|
-
else {
|
|
269
|
-
response.output = test.providerOutput;
|
|
270
|
-
}
|
|
271
287
|
const endTime = Date.now();
|
|
272
288
|
latencyMs = endTime - startTime;
|
|
273
289
|
let conversationLastInput = undefined;
|