npm - promptfoo - Versions diffs - 0.119.13 → 0.119.14 - Mend

promptfoo 0.119.13 → 0.119.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

package/dist/package.json +28 -26
package/dist/src/app/assets/index-eJ2lMe94.js +51 -0
package/dist/src/app/assets/{source-map-support-Bnh0UQ2S.js → source-map-support-1v4oeb7P.js} +1 -1
package/dist/src/app/assets/sync-CtLQRuC1.js +1 -0
package/dist/src/app/assets/{vendor-charts-T60Uk0Z3.js → vendor-charts-DnVv66VV.js} +1 -1
package/dist/src/app/assets/{vendor-markdown-DLig-KJh.js → vendor-markdown-DCpQIyMA.js} +1 -1
package/dist/src/app/assets/{vendor-mui-core-5BLaiG3c.js → vendor-mui-core-Boqnpf9f.js} +1 -1
package/dist/src/app/assets/{vendor-mui-icons-fn39Fu2e.js → vendor-mui-icons-B8MqoVbj.js} +1 -1
package/dist/src/app/assets/vendor-mui-x-CGSS6QHF.js +45 -0
package/dist/src/app/assets/{vendor-utils-DYBMEuwX.js → vendor-utils-DdfHIEy8.js} +1 -1
package/dist/src/app/index.html +7 -7
package/dist/src/assertions/guardrails.d.ts +1 -1
package/dist/src/assertions/guardrails.js +18 -9
package/dist/src/assertions/index.d.ts +1 -1
package/dist/src/assertions/index.js +9 -3
package/dist/src/assertions/searchRubric.d.ts +3 -0
package/dist/src/assertions/searchRubric.js +18 -0
package/dist/src/commands/eval.js +1 -1
package/dist/src/commands/modelScan.d.ts +7 -1
package/dist/src/commands/modelScan.js +121 -59
package/dist/src/database/index.d.ts +6 -0
package/dist/src/database/index.js +11 -0
package/dist/src/database/tables.d.ts +46 -24
package/dist/src/envars.d.ts +17 -0
package/dist/src/generated/constants.js +1 -1
package/dist/src/logger.d.ts +5 -0
package/dist/src/logger.js +28 -0
package/dist/src/main.js +17 -6
package/dist/src/matchers.d.ts +1 -0
package/dist/src/matchers.js +80 -0
package/dist/src/models/eval.d.ts +2 -1
package/dist/src/models/eval.js +44 -2
package/dist/src/prompts/grading.d.ts +1 -0
package/dist/src/prompts/grading.js +26 -1
package/dist/src/prompts/index.d.ts +1 -0
package/dist/src/prompts/index.js +4 -1
package/dist/src/providers/adaline.gateway.js +2 -2
package/dist/src/providers/anthropic/defaults.d.ts +1 -1
package/dist/src/providers/anthropic/defaults.js +15 -0
package/dist/src/providers/azure/chat.d.ts +3 -1
package/dist/src/providers/azure/chat.js +16 -3
package/dist/src/providers/azure/defaults.js +660 -141
package/dist/src/providers/azure/responses.d.ts +5 -0
package/dist/src/providers/azure/responses.js +33 -4
package/dist/src/providers/azure/types.d.ts +4 -0
package/dist/src/providers/bedrock/agents.d.ts +1 -1
package/dist/src/providers/bedrock/agents.js +2 -2
package/dist/src/providers/bedrock/base.d.ts +40 -0
package/dist/src/providers/bedrock/base.js +171 -0
package/dist/src/providers/bedrock/converse.d.ts +146 -0
package/dist/src/providers/bedrock/converse.js +1044 -0
package/dist/src/providers/bedrock/index.d.ts +1 -34
package/dist/src/providers/bedrock/index.js +4 -159
package/dist/src/providers/bedrock/knowledgeBase.d.ts +1 -1
package/dist/src/providers/bedrock/knowledgeBase.js +2 -2
package/dist/src/providers/bedrock/nova-sonic.d.ts +2 -1
package/dist/src/providers/bedrock/nova-sonic.js +2 -2
package/dist/src/providers/claude-agent-sdk.d.ts +58 -1
package/dist/src/providers/claude-agent-sdk.js +22 -1
package/dist/src/providers/defaults.js +4 -0
package/dist/src/providers/github/defaults.js +6 -6
package/dist/src/providers/google/types.d.ts +25 -0
package/dist/src/providers/google/util.d.ts +2 -0
package/dist/src/providers/google/vertex.js +78 -22
package/dist/src/providers/{groq.d.ts → groq/chat.d.ts} +26 -20
package/dist/src/providers/groq/chat.js +79 -0
package/dist/src/providers/groq/index.d.ts +5 -0
package/dist/src/providers/groq/index.js +24 -0
package/dist/src/providers/groq/responses.d.ts +106 -0
package/dist/src/providers/groq/responses.js +64 -0
package/dist/src/providers/groq/types.d.ts +44 -0
package/dist/src/providers/groq/types.js +3 -0
package/dist/src/providers/groq/util.d.ts +15 -0
package/dist/src/providers/groq/util.js +28 -0
package/dist/src/providers/mcp/client.d.ts +8 -0
package/dist/src/providers/mcp/client.js +60 -10
package/dist/src/providers/mcp/types.d.ts +21 -0
package/dist/src/providers/openai/chatkit-pool.d.ts +114 -0
package/dist/src/providers/openai/chatkit-pool.js +548 -0
package/dist/src/providers/openai/chatkit-types.d.ts +73 -0
package/dist/src/providers/openai/chatkit-types.js +3 -0
package/dist/src/providers/openai/chatkit.d.ts +76 -0
package/dist/src/providers/openai/chatkit.js +879 -0
package/dist/src/providers/openai/codex-sdk.d.ts +109 -0
package/dist/src/providers/openai/codex-sdk.js +346 -0
package/dist/src/providers/openai/defaults.d.ts +2 -0
package/dist/src/providers/openai/defaults.js +10 -4
package/dist/src/providers/registry.js +48 -9
package/dist/src/providers/responses/types.d.ts +1 -1
package/dist/src/providers/sagemaker.d.ts +2 -2
package/dist/src/providers/webSearchUtils.d.ts +17 -0
package/dist/src/providers/webSearchUtils.js +169 -0
package/dist/src/providers/xai/chat.d.ts +61 -0
package/dist/src/providers/xai/chat.js +68 -3
package/dist/src/providers/xai/responses.d.ts +189 -0
package/dist/src/providers/xai/responses.js +268 -0
package/dist/src/redteam/constants/plugins.d.ts +1 -1
package/dist/src/redteam/constants/plugins.js +1 -1
package/dist/src/redteam/constants/strategies.d.ts +1 -1
package/dist/src/redteam/constants/strategies.js +1 -0
package/dist/src/redteam/plugins/vlguard.d.ts +53 -4
package/dist/src/redteam/plugins/vlguard.js +362 -46
package/dist/src/redteam/providers/constants.d.ts +2 -2
package/dist/src/redteam/providers/constants.js +2 -2
package/dist/src/redteam/providers/crescendo/index.d.ts +1 -1
package/dist/src/redteam/providers/crescendo/index.js +5 -3
package/dist/src/redteam/providers/hydra/index.js +1 -1
package/dist/src/server/routes/modelAudit.js +4 -4
package/dist/src/share.js +4 -2
package/dist/src/telemetry.js +44 -8
package/dist/src/types/env.d.ts +3 -0
package/dist/src/types/env.js +1 -0
package/dist/src/types/index.d.ts +896 -615
package/dist/src/types/index.js +1 -0
package/dist/src/types/providers.d.ts +1 -0
package/dist/src/types/tracing.d.ts +3 -0
package/dist/src/util/database.d.ts +6 -4
package/dist/src/util/file.js +6 -4
package/dist/src/util/modelAuditCliParser.d.ts +4 -4
package/dist/src/util/xlsx.js +52 -26
package/dist/src/validators/providers.d.ts +142 -122
package/dist/src/validators/providers.js +4 -6
package/dist/src/validators/redteam.d.ts +36 -28
package/dist/src/validators/redteam.js +9 -3
package/dist/tsconfig.tsbuildinfo +1 -1
package/package.json +28 -26
package/dist/drizzle/CLAUDE.md +0 -65
package/dist/src/app/assets/index-DifT6VGT.js +0 -51
package/dist/src/app/assets/sync-Oo-W_Rbj.js +0 -1
package/dist/src/app/assets/vendor-mui-x-C2xF-yiO.js +0 -45
package/dist/src/providers/groq.js +0 -48

package/dist/src/types/index.js CHANGED Viewed

@@ -248,6 +248,7 @@ exports.BaseAssertionTypesSchema = zod_1.z.enum([
     'trace-error-spans',
     'trace-span-count',
     'trace-span-duration',
+    'search-rubric',
     'webhook',
 ]);
 exports.SpecialAssertionTypesSchema = zod_1.z.enum(['select-best', 'human', 'max-score']);

package/dist/src/types/providers.d.ts CHANGED Viewed

@@ -185,5 +185,6 @@ export interface DefaultProviders {
     moderationProvider: ApiProvider;
     suggestionsProvider: ApiProvider;
     synthesizeProvider: ApiProvider;
+    webSearchProvider?: ApiProvider;
 }
 //# sourceMappingURL=providers.d.ts.map

package/dist/src/types/tracing.d.ts CHANGED Viewed

@@ -10,6 +10,9 @@ export interface TraceSpan {
 }
 export interface TraceData {
     traceId: string;
+    evaluationId: string;
+    testCaseId: string;
+    metadata?: Record<string, any>;
     spans: TraceSpan[];
 }
 //# sourceMappingURL=tracing.d.ts.map

package/dist/src/util/database.d.ts CHANGED Viewed

@@ -174,6 +174,7 @@ export declare function getTestCases(limit?: number): Promise<{
                 OPENAI_API_KEY?: string | undefined;
                 OPENAI_BASE_URL?: string | undefined;
                 OPENAI_ORGANIZATION?: string | undefined;
+                CODEX_API_KEY?: string | undefined;
                 PALM_API_HOST?: string | undefined;
                 PALM_API_KEY?: string | undefined;
                 PORTKEY_API_KEY?: string | undefined;
@@ -263,7 +264,7 @@ export declare function getTestCases(limit?: number): Promise<{
         description?: string | undefined;
         providerOutput?: string | {} | undefined;
         assert?: ({
-            type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-webhook" | "select-best" | "human" | "max-score";
+            type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "search-rubric" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-search-rubric" | "not-webhook" | "select-best" | "human" | "max-score";
             value?: import("../types/index").AssertionValue | undefined;
             provider?: any;
             config?: Record<string, any> | undefined;
@@ -279,7 +280,7 @@ export declare function getTestCases(limit?: number): Promise<{
         } | {
             type: "assert-set";
             assert: {
-                type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-webhook" | "select-best" | "human" | "max-score";
+                type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "search-rubric" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-search-rubric" | "not-webhook" | "select-best" | "human" | "max-score";
                 value?: import("../types/index").AssertionValue | undefined;
                 provider?: any;
                 config?: Record<string, any> | undefined;
@@ -469,6 +470,7 @@ export declare function getDatasetFromHash(hash: string): Promise<{
                 OPENAI_API_KEY?: string | undefined;
                 OPENAI_BASE_URL?: string | undefined;
                 OPENAI_ORGANIZATION?: string | undefined;
+                CODEX_API_KEY?: string | undefined;
                 PALM_API_HOST?: string | undefined;
                 PALM_API_KEY?: string | undefined;
                 PORTKEY_API_KEY?: string | undefined;
@@ -558,7 +560,7 @@ export declare function getDatasetFromHash(hash: string): Promise<{
         description?: string | undefined;
         providerOutput?: string | {} | undefined;
         assert?: ({
-            type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-webhook" | "select-best" | "human" | "max-score";
+            type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "search-rubric" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-search-rubric" | "not-webhook" | "select-best" | "human" | "max-score";
             value?: import("../types/index").AssertionValue | undefined;
             provider?: any;
             config?: Record<string, any> | undefined;
@@ -574,7 +576,7 @@ export declare function getDatasetFromHash(hash: string): Promise<{
         } | {
             type: "assert-set";
             assert: {
-                type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-webhook" | "select-best" | "human" | "max-score";
+                type: "moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "search-rubric" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-search-rubric" | "not-webhook" | "select-best" | "human" | "max-score";
                 value?: import("../types/index").AssertionValue | undefined;
                 provider?: any;
                 config?: Record<string, any> | undefined;

package/dist/src/util/file.js CHANGED Viewed

@@ -104,10 +104,12 @@ function maybeLoadFromExternalFile(filePath, context) {
         logger_1.default.debug(`Preserving Python/JS file reference in assertion context: ${renderedFilePath}`);
         return renderedFilePath;
     }
-    // In vars contexts, preserve file:// glob patterns for test case expansion
-    // This prevents premature glob expansion that should be handled by generateVarCombinations
-    if (context === 'vars' && (0, glob_1.hasMagic)(renderedFilePath)) {
-        logger_1.default.debug(`Preserving glob pattern in vars context: ${renderedFilePath}`);
+    // In vars contexts, preserve all file:// references for test case expansion
+    // This prevents premature file loading - JS/Python files should be executed at runtime
+    // by renderPrompt in evaluatorHelpers.ts, and glob patterns should be expanded by
+    // generateVarCombinations in evaluator.ts
+    if (context === 'vars') {
+        logger_1.default.debug(`Preserving file reference in vars context: ${renderedFilePath}`);
         return renderedFilePath;
     }
     // For Python/JS files with function names, return the original string unchanged

package/dist/src/util/modelAuditCliParser.d.ts CHANGED Viewed

@@ -21,8 +21,8 @@ export declare const ModelAuditCliOptionsSchema: z.ZodObject<{
     cache: z.ZodOptional<z.ZodBoolean>;
     stream: z.ZodOptional<z.ZodBoolean>;
 }, "strip", z.ZodTypeAny, {
-    strict?: boolean | undefined;
     output?: string | undefined;
+    strict?: boolean | undefined;
     cache?: boolean | undefined;
     verbose?: boolean | undefined;
     quiet?: boolean | undefined;
@@ -35,8 +35,8 @@ export declare const ModelAuditCliOptionsSchema: z.ZodObject<{
     maxSize?: string | undefined;
     dryRun?: boolean | undefined;
 }, {
-    strict?: boolean | undefined;
     output?: string | undefined;
+    strict?: boolean | undefined;
     cache?: boolean | undefined;
     verbose?: boolean | undefined;
     quiet?: boolean | undefined;
@@ -96,8 +96,8 @@ export declare const validateModelAuditOptions: (options: unknown) => ModelAudit
 export declare const safeValidateModelAuditOptions: (options: unknown) => {
     success: true;
     data: {
-        strict?: boolean | undefined;
         output?: string | undefined;
+        strict?: boolean | undefined;
         cache?: boolean | undefined;
         verbose?: boolean | undefined;
         quiet?: boolean | undefined;
@@ -114,8 +114,8 @@ export declare const safeValidateModelAuditOptions: (options: unknown) => {
 } | {
     success: false;
     error: z.ZodError<{
-        strict?: boolean | undefined;
         output?: string | undefined;
+        strict?: boolean | undefined;
         cache?: boolean | undefined;
         verbose?: boolean | undefined;
         quiet?: boolean | undefined;

package/dist/src/util/xlsx.js CHANGED Viewed

@@ -40,55 +40,80 @@ async function parseXlsxFile(filePath) {
         // Parse file path and optional sheet name
         // Supports syntax: file.xlsx#SheetName or file.xlsx#2 (1-based index)
         const [actualFilePath, sheetSpecifier] = filePath.split('#');
-        // Try to import xlsx first to give proper error if not installed
-        const xlsx = await Promise.resolve().then(() => __importStar(require('xlsx')));
         // Check if file exists before attempting to read it
         if (!fs.existsSync(actualFilePath)) {
             throw new Error(`File not found: ${actualFilePath}`);
         }
-        const workbook = xlsx.readFile(actualFilePath);
+        // Try to import read-excel-file first to give proper error if not installed
+        let readXlsxFile;
+        let readSheetNames;
+        try {
+            const module = await Promise.resolve().then(() => __importStar(require('read-excel-file/node')));
+            readXlsxFile = module.default;
+            readSheetNames = module.readSheetNames;
+        }
+        catch {
+            throw new Error('read-excel-file is not installed. Please install it with: npm install read-excel-file\n' +
+                'Note: read-excel-file is an optional peer dependency for reading Excel files.');
+        }
+        // Get all sheet names to validate and determine which sheet to use
+        const sheetNames = await readSheetNames(actualFilePath);
         // Validate that the workbook has at least one sheet
-        if (!workbook.SheetNames || workbook.SheetNames.length === 0) {
+        if (!sheetNames || sheetNames.length === 0) {
             throw new Error('Excel file has no sheets');
         }
         // Determine which sheet to use
-        let sheetName;
+        let sheetOption;
         if (sheetSpecifier) {
             // Check if it's a numeric index (1-based)
             const sheetIndex = parseInt(sheetSpecifier, 10);
             if (isNaN(sheetIndex)) {
                 // It's a sheet name
-                if (!workbook.SheetNames.includes(sheetSpecifier)) {
-                    throw new Error(`Sheet "${sheetSpecifier}" not found. Available sheets: ${workbook.SheetNames.join(', ')}`);
+                if (!sheetNames.includes(sheetSpecifier)) {
+                    throw new Error(`Sheet "${sheetSpecifier}" not found. Available sheets: ${sheetNames.join(', ')}`);
                 }
-                sheetName = sheetSpecifier;
+                sheetOption = sheetSpecifier;
             }
             else {
-                // Convert to 0-based index
-                const zeroBasedIndex = sheetIndex - 1;
-                if (zeroBasedIndex < 0 || zeroBasedIndex >= workbook.SheetNames.length) {
-                    throw new Error(`Sheet index ${sheetIndex} is out of range. Available sheets: ${workbook.SheetNames.length} (1-${workbook.SheetNames.length})`);
+                // Validate 1-based index
+                if (sheetIndex < 1 || sheetIndex > sheetNames.length) {
+                    throw new Error(`Sheet index ${sheetIndex} is out of range. Available sheets: ${sheetNames.length} (1-${sheetNames.length})`);
                 }
-                sheetName = workbook.SheetNames[zeroBasedIndex];
+                sheetOption = sheetIndex;
             }
         }
         else {
-            // Use the first sheet by default
-            sheetName = workbook.SheetNames[0];
+            // Use the first sheet by default (1-based index)
+            sheetOption = 1;
         }
-        const sheet = workbook.Sheets[sheetName];
-        // Convert sheet to JSON and validate the result
-        const data = xlsx.utils.sheet_to_json(sheet, { defval: '' });
+        // Get the sheet name for error messages
+        const sheetName = typeof sheetOption === 'number' ? sheetNames[sheetOption - 1] : sheetOption;
+        // Read the sheet - returns array of arrays
+        const rows = await readXlsxFile(actualFilePath, { sheet: sheetOption });
         // Check if the sheet is empty
-        if (data.length === 0) {
+        if (rows.length === 0) {
             throw new Error(`Sheet "${sheetName}" is empty or contains no valid data rows`);
         }
+        // First row should be headers
+        const headers = rows[0].map((cell) => (cell != null ? String(cell) : ''));
         // Check if the first row has any headers
-        const firstRow = data[0];
-        const headers = Object.keys(firstRow);
-        if (headers.length === 0) {
+        if (headers.length === 0 || headers.every((h) => h === '')) {
             throw new Error(`Sheet "${sheetName}" has no valid column headers`);
         }
+        // Check if there's only headers with no data rows
+        if (rows.length === 1) {
+            throw new Error(`Sheet "${sheetName}" is empty or contains no valid data rows`);
+        }
+        // Convert rows to array of objects (similar to xlsx's sheet_to_json with defval: '')
+        const data = rows.slice(1).map((row) => {
+            const obj = {};
+            headers.forEach((header, index) => {
+                // Use empty string as default value (like xlsx's defval: '')
+                const cellValue = row[index];
+                obj[header] = cellValue != null ? String(cellValue) : '';
+            });
+            return obj;
+        });
         // Check for completely empty columns (all values are empty strings)
         const hasValidData = data.some((row) => headers.some((header) => row[header] && row[header].toString().trim() !== ''));
         if (!hasValidData) {
@@ -98,10 +123,10 @@ async function parseXlsxFile(filePath) {
     }
     catch (error) {
         if (error instanceof Error) {
-            // Handle missing xlsx module
-            if (error.message.includes("Cannot find module 'xlsx'")) {
-                throw new Error('xlsx is not installed. Please install it with: npm install xlsx\n' +
-                    'Note: xlsx is an optional peer dependency for reading Excel files.');
+            // Handle missing read-excel-file module
+            if (error.message.includes("Cannot find module 'read-excel-file")) {
+                throw new Error('read-excel-file is not installed. Please install it with: npm install read-excel-file\n' +
+                    'Note: read-excel-file is an optional peer dependency for reading Excel files.');
             }
             // Re-throw our own validation errors without wrapping
             // These already have descriptive messages
@@ -111,6 +136,7 @@ async function parseXlsxFile(filePath) {
                 'Sheet "',
                 'Sheet index',
                 'contains only empty data',
+                'read-excel-file is not installed',
             ];
             if (knownErrors.some((prefix) => error.message.startsWith(prefix))) {
                 throw error;