promptfoo 0.119.13 → 0.119.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +28 -26
- package/dist/src/app/assets/index-eJ2lMe94.js +51 -0
- package/dist/src/app/assets/{source-map-support-Bnh0UQ2S.js → source-map-support-1v4oeb7P.js} +1 -1
- package/dist/src/app/assets/sync-CtLQRuC1.js +1 -0
- package/dist/src/app/assets/{vendor-charts-T60Uk0Z3.js → vendor-charts-DnVv66VV.js} +1 -1
- package/dist/src/app/assets/{vendor-markdown-DLig-KJh.js → vendor-markdown-DCpQIyMA.js} +1 -1
- package/dist/src/app/assets/{vendor-mui-core-5BLaiG3c.js → vendor-mui-core-Boqnpf9f.js} +1 -1
- package/dist/src/app/assets/{vendor-mui-icons-fn39Fu2e.js → vendor-mui-icons-B8MqoVbj.js} +1 -1
- package/dist/src/app/assets/vendor-mui-x-CGSS6QHF.js +45 -0
- package/dist/src/app/assets/{vendor-utils-DYBMEuwX.js → vendor-utils-DdfHIEy8.js} +1 -1
- package/dist/src/app/index.html +7 -7
- package/dist/src/assertions/guardrails.d.ts +1 -1
- package/dist/src/assertions/guardrails.js +18 -9
- package/dist/src/assertions/index.d.ts +1 -1
- package/dist/src/assertions/index.js +9 -3
- package/dist/src/assertions/searchRubric.d.ts +3 -0
- package/dist/src/assertions/searchRubric.js +18 -0
- package/dist/src/commands/eval.js +1 -1
- package/dist/src/commands/modelScan.d.ts +7 -1
- package/dist/src/commands/modelScan.js +121 -59
- package/dist/src/database/index.d.ts +6 -0
- package/dist/src/database/index.js +11 -0
- package/dist/src/database/tables.d.ts +46 -24
- package/dist/src/envars.d.ts +17 -0
- package/dist/src/generated/constants.js +1 -1
- package/dist/src/logger.d.ts +5 -0
- package/dist/src/logger.js +28 -0
- package/dist/src/main.js +17 -6
- package/dist/src/matchers.d.ts +1 -0
- package/dist/src/matchers.js +80 -0
- package/dist/src/models/eval.d.ts +2 -1
- package/dist/src/models/eval.js +44 -2
- package/dist/src/prompts/grading.d.ts +1 -0
- package/dist/src/prompts/grading.js +26 -1
- package/dist/src/prompts/index.d.ts +1 -0
- package/dist/src/prompts/index.js +4 -1
- package/dist/src/providers/adaline.gateway.js +2 -2
- package/dist/src/providers/anthropic/defaults.d.ts +1 -1
- package/dist/src/providers/anthropic/defaults.js +15 -0
- package/dist/src/providers/azure/chat.d.ts +3 -1
- package/dist/src/providers/azure/chat.js +16 -3
- package/dist/src/providers/azure/defaults.js +660 -141
- package/dist/src/providers/azure/responses.d.ts +5 -0
- package/dist/src/providers/azure/responses.js +33 -4
- package/dist/src/providers/azure/types.d.ts +4 -0
- package/dist/src/providers/bedrock/agents.d.ts +1 -1
- package/dist/src/providers/bedrock/agents.js +2 -2
- package/dist/src/providers/bedrock/base.d.ts +40 -0
- package/dist/src/providers/bedrock/base.js +171 -0
- package/dist/src/providers/bedrock/converse.d.ts +146 -0
- package/dist/src/providers/bedrock/converse.js +1044 -0
- package/dist/src/providers/bedrock/index.d.ts +1 -34
- package/dist/src/providers/bedrock/index.js +4 -159
- package/dist/src/providers/bedrock/knowledgeBase.d.ts +1 -1
- package/dist/src/providers/bedrock/knowledgeBase.js +2 -2
- package/dist/src/providers/bedrock/nova-sonic.d.ts +2 -1
- package/dist/src/providers/bedrock/nova-sonic.js +2 -2
- package/dist/src/providers/claude-agent-sdk.d.ts +58 -1
- package/dist/src/providers/claude-agent-sdk.js +22 -1
- package/dist/src/providers/defaults.js +4 -0
- package/dist/src/providers/github/defaults.js +6 -6
- package/dist/src/providers/google/types.d.ts +25 -0
- package/dist/src/providers/google/util.d.ts +2 -0
- package/dist/src/providers/google/vertex.js +78 -22
- package/dist/src/providers/{groq.d.ts → groq/chat.d.ts} +26 -20
- package/dist/src/providers/groq/chat.js +79 -0
- package/dist/src/providers/groq/index.d.ts +5 -0
- package/dist/src/providers/groq/index.js +24 -0
- package/dist/src/providers/groq/responses.d.ts +106 -0
- package/dist/src/providers/groq/responses.js +64 -0
- package/dist/src/providers/groq/types.d.ts +44 -0
- package/dist/src/providers/groq/types.js +3 -0
- package/dist/src/providers/groq/util.d.ts +15 -0
- package/dist/src/providers/groq/util.js +28 -0
- package/dist/src/providers/mcp/client.d.ts +8 -0
- package/dist/src/providers/mcp/client.js +60 -10
- package/dist/src/providers/mcp/types.d.ts +21 -0
- package/dist/src/providers/openai/chatkit-pool.d.ts +114 -0
- package/dist/src/providers/openai/chatkit-pool.js +548 -0
- package/dist/src/providers/openai/chatkit-types.d.ts +73 -0
- package/dist/src/providers/openai/chatkit-types.js +3 -0
- package/dist/src/providers/openai/chatkit.d.ts +76 -0
- package/dist/src/providers/openai/chatkit.js +879 -0
- package/dist/src/providers/openai/codex-sdk.d.ts +109 -0
- package/dist/src/providers/openai/codex-sdk.js +346 -0
- package/dist/src/providers/openai/defaults.d.ts +2 -0
- package/dist/src/providers/openai/defaults.js +10 -4
- package/dist/src/providers/registry.js +48 -9
- package/dist/src/providers/responses/types.d.ts +1 -1
- package/dist/src/providers/sagemaker.d.ts +2 -2
- package/dist/src/providers/webSearchUtils.d.ts +17 -0
- package/dist/src/providers/webSearchUtils.js +169 -0
- package/dist/src/providers/xai/chat.d.ts +61 -0
- package/dist/src/providers/xai/chat.js +68 -3
- package/dist/src/providers/xai/responses.d.ts +189 -0
- package/dist/src/providers/xai/responses.js +268 -0
- package/dist/src/redteam/constants/plugins.d.ts +1 -1
- package/dist/src/redteam/constants/plugins.js +1 -1
- package/dist/src/redteam/constants/strategies.d.ts +1 -1
- package/dist/src/redteam/constants/strategies.js +1 -0
- package/dist/src/redteam/plugins/vlguard.d.ts +53 -4
- package/dist/src/redteam/plugins/vlguard.js +362 -46
- package/dist/src/redteam/providers/constants.d.ts +2 -2
- package/dist/src/redteam/providers/constants.js +2 -2
- package/dist/src/redteam/providers/crescendo/index.d.ts +1 -1
- package/dist/src/redteam/providers/crescendo/index.js +5 -3
- package/dist/src/redteam/providers/hydra/index.js +1 -1
- package/dist/src/server/routes/modelAudit.js +4 -4
- package/dist/src/share.js +4 -2
- package/dist/src/telemetry.js +44 -8
- package/dist/src/types/env.d.ts +3 -0
- package/dist/src/types/env.js +1 -0
- package/dist/src/types/index.d.ts +896 -615
- package/dist/src/types/index.js +1 -0
- package/dist/src/types/providers.d.ts +1 -0
- package/dist/src/types/tracing.d.ts +3 -0
- package/dist/src/util/database.d.ts +6 -4
- package/dist/src/util/file.js +6 -4
- package/dist/src/util/modelAuditCliParser.d.ts +4 -4
- package/dist/src/util/xlsx.js +52 -26
- package/dist/src/validators/providers.d.ts +142 -122
- package/dist/src/validators/providers.js +4 -6
- package/dist/src/validators/redteam.d.ts +36 -28
- package/dist/src/validators/redteam.js +9 -3
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +28 -26
- package/dist/drizzle/CLAUDE.md +0 -65
- package/dist/src/app/assets/index-DifT6VGT.js +0 -51
- package/dist/src/app/assets/sync-Oo-W_Rbj.js +0 -1
- package/dist/src/app/assets/vendor-mui-x-C2xF-yiO.js +0 -45
- package/dist/src/providers/groq.js +0 -48
package/dist/src/types/index.js
CHANGED
|
@@ -248,6 +248,7 @@ exports.BaseAssertionTypesSchema = zod_1.z.enum([
|
|
|
248
248
|
'trace-error-spans',
|
|
249
249
|
'trace-span-count',
|
|
250
250
|
'trace-span-duration',
|
|
251
|
+
'search-rubric',
|
|
251
252
|
'webhook',
|
|
252
253
|
]);
|
|
253
254
|
exports.SpecialAssertionTypesSchema = zod_1.z.enum(['select-best', 'human', 'max-score']);
|
|
@@ -174,6 +174,7 @@ export declare function getTestCases(limit?: number): Promise<{
|
|
|
174
174
|
OPENAI_API_KEY?: string | undefined;
|
|
175
175
|
OPENAI_BASE_URL?: string | undefined;
|
|
176
176
|
OPENAI_ORGANIZATION?: string | undefined;
|
|
177
|
+
CODEX_API_KEY?: string | undefined;
|
|
177
178
|
PALM_API_HOST?: string | undefined;
|
|
178
179
|
PALM_API_KEY?: string | undefined;
|
|
179
180
|
PORTKEY_API_KEY?: string | undefined;
|
|
@@ -263,7 +264,7 @@ export declare function getTestCases(limit?: number): Promise<{
|
|
|
263
264
|
description?: string | undefined;
|
|
264
265
|
providerOutput?: string | {} | undefined;
|
|
265
266
|
assert?: ({
|
|
266
|
-
type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-webhook" | "select-best" | "human" | "max-score";
|
|
267
|
+
type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "search-rubric" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-search-rubric" | "not-webhook" | "select-best" | "human" | "max-score";
|
|
267
268
|
value?: import("../types/index").AssertionValue | undefined;
|
|
268
269
|
provider?: any;
|
|
269
270
|
config?: Record<string, any> | undefined;
|
|
@@ -279,7 +280,7 @@ export declare function getTestCases(limit?: number): Promise<{
|
|
|
279
280
|
} | {
|
|
280
281
|
type: "assert-set";
|
|
281
282
|
assert: {
|
|
282
|
-
type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-webhook" | "select-best" | "human" | "max-score";
|
|
283
|
+
type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "search-rubric" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-search-rubric" | "not-webhook" | "select-best" | "human" | "max-score";
|
|
283
284
|
value?: import("../types/index").AssertionValue | undefined;
|
|
284
285
|
provider?: any;
|
|
285
286
|
config?: Record<string, any> | undefined;
|
|
@@ -469,6 +470,7 @@ export declare function getDatasetFromHash(hash: string): Promise<{
|
|
|
469
470
|
OPENAI_API_KEY?: string | undefined;
|
|
470
471
|
OPENAI_BASE_URL?: string | undefined;
|
|
471
472
|
OPENAI_ORGANIZATION?: string | undefined;
|
|
473
|
+
CODEX_API_KEY?: string | undefined;
|
|
472
474
|
PALM_API_HOST?: string | undefined;
|
|
473
475
|
PALM_API_KEY?: string | undefined;
|
|
474
476
|
PORTKEY_API_KEY?: string | undefined;
|
|
@@ -558,7 +560,7 @@ export declare function getDatasetFromHash(hash: string): Promise<{
|
|
|
558
560
|
description?: string | undefined;
|
|
559
561
|
providerOutput?: string | {} | undefined;
|
|
560
562
|
assert?: ({
|
|
561
|
-
type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-webhook" | "select-best" | "human" | "max-score";
|
|
563
|
+
type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "search-rubric" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-search-rubric" | "not-webhook" | "select-best" | "human" | "max-score";
|
|
562
564
|
value?: import("../types/index").AssertionValue | undefined;
|
|
563
565
|
provider?: any;
|
|
564
566
|
config?: Record<string, any> | undefined;
|
|
@@ -574,7 +576,7 @@ export declare function getDatasetFromHash(hash: string): Promise<{
|
|
|
574
576
|
} | {
|
|
575
577
|
type: "assert-set";
|
|
576
578
|
assert: {
|
|
577
|
-
type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-webhook" | "select-best" | "human" | "max-score";
|
|
579
|
+
type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "search-rubric" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-search-rubric" | "not-webhook" | "select-best" | "human" | "max-score";
|
|
578
580
|
value?: import("../types/index").AssertionValue | undefined;
|
|
579
581
|
provider?: any;
|
|
580
582
|
config?: Record<string, any> | undefined;
|
package/dist/src/util/file.js
CHANGED
|
@@ -104,10 +104,12 @@ function maybeLoadFromExternalFile(filePath, context) {
|
|
|
104
104
|
logger_1.default.debug(`Preserving Python/JS file reference in assertion context: ${renderedFilePath}`);
|
|
105
105
|
return renderedFilePath;
|
|
106
106
|
}
|
|
107
|
-
// In vars contexts, preserve file://
|
|
108
|
-
// This prevents premature
|
|
109
|
-
|
|
110
|
-
|
|
107
|
+
// In vars contexts, preserve all file:// references for test case expansion
|
|
108
|
+
// This prevents premature file loading - JS/Python files should be executed at runtime
|
|
109
|
+
// by renderPrompt in evaluatorHelpers.ts, and glob patterns should be expanded by
|
|
110
|
+
// generateVarCombinations in evaluator.ts
|
|
111
|
+
if (context === 'vars') {
|
|
112
|
+
logger_1.default.debug(`Preserving file reference in vars context: ${renderedFilePath}`);
|
|
111
113
|
return renderedFilePath;
|
|
112
114
|
}
|
|
113
115
|
// For Python/JS files with function names, return the original string unchanged
|
|
@@ -21,8 +21,8 @@ export declare const ModelAuditCliOptionsSchema: z.ZodObject<{
|
|
|
21
21
|
cache: z.ZodOptional<z.ZodBoolean>;
|
|
22
22
|
stream: z.ZodOptional<z.ZodBoolean>;
|
|
23
23
|
}, "strip", z.ZodTypeAny, {
|
|
24
|
-
strict?: boolean | undefined;
|
|
25
24
|
output?: string | undefined;
|
|
25
|
+
strict?: boolean | undefined;
|
|
26
26
|
cache?: boolean | undefined;
|
|
27
27
|
verbose?: boolean | undefined;
|
|
28
28
|
quiet?: boolean | undefined;
|
|
@@ -35,8 +35,8 @@ export declare const ModelAuditCliOptionsSchema: z.ZodObject<{
|
|
|
35
35
|
maxSize?: string | undefined;
|
|
36
36
|
dryRun?: boolean | undefined;
|
|
37
37
|
}, {
|
|
38
|
-
strict?: boolean | undefined;
|
|
39
38
|
output?: string | undefined;
|
|
39
|
+
strict?: boolean | undefined;
|
|
40
40
|
cache?: boolean | undefined;
|
|
41
41
|
verbose?: boolean | undefined;
|
|
42
42
|
quiet?: boolean | undefined;
|
|
@@ -96,8 +96,8 @@ export declare const validateModelAuditOptions: (options: unknown) => ModelAudit
|
|
|
96
96
|
export declare const safeValidateModelAuditOptions: (options: unknown) => {
|
|
97
97
|
success: true;
|
|
98
98
|
data: {
|
|
99
|
-
strict?: boolean | undefined;
|
|
100
99
|
output?: string | undefined;
|
|
100
|
+
strict?: boolean | undefined;
|
|
101
101
|
cache?: boolean | undefined;
|
|
102
102
|
verbose?: boolean | undefined;
|
|
103
103
|
quiet?: boolean | undefined;
|
|
@@ -114,8 +114,8 @@ export declare const safeValidateModelAuditOptions: (options: unknown) => {
|
|
|
114
114
|
} | {
|
|
115
115
|
success: false;
|
|
116
116
|
error: z.ZodError<{
|
|
117
|
-
strict?: boolean | undefined;
|
|
118
117
|
output?: string | undefined;
|
|
118
|
+
strict?: boolean | undefined;
|
|
119
119
|
cache?: boolean | undefined;
|
|
120
120
|
verbose?: boolean | undefined;
|
|
121
121
|
quiet?: boolean | undefined;
|
package/dist/src/util/xlsx.js
CHANGED
|
@@ -40,55 +40,80 @@ async function parseXlsxFile(filePath) {
|
|
|
40
40
|
// Parse file path and optional sheet name
|
|
41
41
|
// Supports syntax: file.xlsx#SheetName or file.xlsx#2 (1-based index)
|
|
42
42
|
const [actualFilePath, sheetSpecifier] = filePath.split('#');
|
|
43
|
-
// Try to import xlsx first to give proper error if not installed
|
|
44
|
-
const xlsx = await Promise.resolve().then(() => __importStar(require('xlsx')));
|
|
45
43
|
// Check if file exists before attempting to read it
|
|
46
44
|
if (!fs.existsSync(actualFilePath)) {
|
|
47
45
|
throw new Error(`File not found: ${actualFilePath}`);
|
|
48
46
|
}
|
|
49
|
-
|
|
47
|
+
// Try to import read-excel-file first to give proper error if not installed
|
|
48
|
+
let readXlsxFile;
|
|
49
|
+
let readSheetNames;
|
|
50
|
+
try {
|
|
51
|
+
const module = await Promise.resolve().then(() => __importStar(require('read-excel-file/node')));
|
|
52
|
+
readXlsxFile = module.default;
|
|
53
|
+
readSheetNames = module.readSheetNames;
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
throw new Error('read-excel-file is not installed. Please install it with: npm install read-excel-file\n' +
|
|
57
|
+
'Note: read-excel-file is an optional peer dependency for reading Excel files.');
|
|
58
|
+
}
|
|
59
|
+
// Get all sheet names to validate and determine which sheet to use
|
|
60
|
+
const sheetNames = await readSheetNames(actualFilePath);
|
|
50
61
|
// Validate that the workbook has at least one sheet
|
|
51
|
-
if (!
|
|
62
|
+
if (!sheetNames || sheetNames.length === 0) {
|
|
52
63
|
throw new Error('Excel file has no sheets');
|
|
53
64
|
}
|
|
54
65
|
// Determine which sheet to use
|
|
55
|
-
let
|
|
66
|
+
let sheetOption;
|
|
56
67
|
if (sheetSpecifier) {
|
|
57
68
|
// Check if it's a numeric index (1-based)
|
|
58
69
|
const sheetIndex = parseInt(sheetSpecifier, 10);
|
|
59
70
|
if (isNaN(sheetIndex)) {
|
|
60
71
|
// It's a sheet name
|
|
61
|
-
if (!
|
|
62
|
-
throw new Error(`Sheet "${sheetSpecifier}" not found. Available sheets: ${
|
|
72
|
+
if (!sheetNames.includes(sheetSpecifier)) {
|
|
73
|
+
throw new Error(`Sheet "${sheetSpecifier}" not found. Available sheets: ${sheetNames.join(', ')}`);
|
|
63
74
|
}
|
|
64
|
-
|
|
75
|
+
sheetOption = sheetSpecifier;
|
|
65
76
|
}
|
|
66
77
|
else {
|
|
67
|
-
//
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
throw new Error(`Sheet index ${sheetIndex} is out of range. Available sheets: ${workbook.SheetNames.length} (1-${workbook.SheetNames.length})`);
|
|
78
|
+
// Validate 1-based index
|
|
79
|
+
if (sheetIndex < 1 || sheetIndex > sheetNames.length) {
|
|
80
|
+
throw new Error(`Sheet index ${sheetIndex} is out of range. Available sheets: ${sheetNames.length} (1-${sheetNames.length})`);
|
|
71
81
|
}
|
|
72
|
-
|
|
82
|
+
sheetOption = sheetIndex;
|
|
73
83
|
}
|
|
74
84
|
}
|
|
75
85
|
else {
|
|
76
|
-
// Use the first sheet by default
|
|
77
|
-
|
|
86
|
+
// Use the first sheet by default (1-based index)
|
|
87
|
+
sheetOption = 1;
|
|
78
88
|
}
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
89
|
+
// Get the sheet name for error messages
|
|
90
|
+
const sheetName = typeof sheetOption === 'number' ? sheetNames[sheetOption - 1] : sheetOption;
|
|
91
|
+
// Read the sheet - returns array of arrays
|
|
92
|
+
const rows = await readXlsxFile(actualFilePath, { sheet: sheetOption });
|
|
82
93
|
// Check if the sheet is empty
|
|
83
|
-
if (
|
|
94
|
+
if (rows.length === 0) {
|
|
84
95
|
throw new Error(`Sheet "${sheetName}" is empty or contains no valid data rows`);
|
|
85
96
|
}
|
|
97
|
+
// First row should be headers
|
|
98
|
+
const headers = rows[0].map((cell) => (cell != null ? String(cell) : ''));
|
|
86
99
|
// Check if the first row has any headers
|
|
87
|
-
|
|
88
|
-
const headers = Object.keys(firstRow);
|
|
89
|
-
if (headers.length === 0) {
|
|
100
|
+
if (headers.length === 0 || headers.every((h) => h === '')) {
|
|
90
101
|
throw new Error(`Sheet "${sheetName}" has no valid column headers`);
|
|
91
102
|
}
|
|
103
|
+
// Check if there's only headers with no data rows
|
|
104
|
+
if (rows.length === 1) {
|
|
105
|
+
throw new Error(`Sheet "${sheetName}" is empty or contains no valid data rows`);
|
|
106
|
+
}
|
|
107
|
+
// Convert rows to array of objects (similar to xlsx's sheet_to_json with defval: '')
|
|
108
|
+
const data = rows.slice(1).map((row) => {
|
|
109
|
+
const obj = {};
|
|
110
|
+
headers.forEach((header, index) => {
|
|
111
|
+
// Use empty string as default value (like xlsx's defval: '')
|
|
112
|
+
const cellValue = row[index];
|
|
113
|
+
obj[header] = cellValue != null ? String(cellValue) : '';
|
|
114
|
+
});
|
|
115
|
+
return obj;
|
|
116
|
+
});
|
|
92
117
|
// Check for completely empty columns (all values are empty strings)
|
|
93
118
|
const hasValidData = data.some((row) => headers.some((header) => row[header] && row[header].toString().trim() !== ''));
|
|
94
119
|
if (!hasValidData) {
|
|
@@ -98,10 +123,10 @@ async function parseXlsxFile(filePath) {
|
|
|
98
123
|
}
|
|
99
124
|
catch (error) {
|
|
100
125
|
if (error instanceof Error) {
|
|
101
|
-
// Handle missing
|
|
102
|
-
if (error.message.includes("Cannot find module '
|
|
103
|
-
throw new Error('
|
|
104
|
-
'Note:
|
|
126
|
+
// Handle missing read-excel-file module
|
|
127
|
+
if (error.message.includes("Cannot find module 'read-excel-file")) {
|
|
128
|
+
throw new Error('read-excel-file is not installed. Please install it with: npm install read-excel-file\n' +
|
|
129
|
+
'Note: read-excel-file is an optional peer dependency for reading Excel files.');
|
|
105
130
|
}
|
|
106
131
|
// Re-throw our own validation errors without wrapping
|
|
107
132
|
// These already have descriptive messages
|
|
@@ -111,6 +136,7 @@ async function parseXlsxFile(filePath) {
|
|
|
111
136
|
'Sheet "',
|
|
112
137
|
'Sheet index',
|
|
113
138
|
'contains only empty data',
|
|
139
|
+
'read-excel-file is not installed',
|
|
114
140
|
];
|
|
115
141
|
if (knownErrors.some((prefix) => error.message.startsWith(prefix))) {
|
|
116
142
|
throw error;
|