npm - promptfoo - Versions diffs - 0.102.2 → 0.102.4 - Mend

promptfoo 0.102.2 → 0.102.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

package/README.md +1 -0
package/dist/package.json +13 -13
package/dist/src/app/assets/{index-Ce_ypVwN.js → index-DVUcCcZX.js} +255 -255
package/dist/src/app/assets/{index.es-BhCb3aAk.js → index.es-Bxgo-NgH.js} +1 -1
package/dist/src/app/assets/{sync-BXsc6UV3.js → sync-BiF17zM_.js} +1 -1
package/dist/src/app/index.html +1 -1
package/dist/src/assertions/geval.d.ts +3 -0
package/dist/src/assertions/geval.d.ts.map +1 -0
package/dist/src/assertions/geval.js +39 -0
package/dist/src/assertions/geval.js.map +1 -0
package/dist/src/assertions/index.d.ts +1 -1
package/dist/src/assertions/index.d.ts.map +1 -1
package/dist/src/assertions/index.js +4 -0
package/dist/src/assertions/index.js.map +1 -1
package/dist/src/assertions/refusal.d.ts +3 -0
package/dist/src/assertions/refusal.d.ts.map +1 -0
package/dist/src/assertions/refusal.js +23 -0
package/dist/src/assertions/refusal.js.map +1 -0
package/dist/src/assertions/utils.d.ts +139 -1
package/dist/src/assertions/utils.d.ts.map +1 -1
package/dist/src/database/tables.d.ts +12 -12
package/dist/src/matchers.d.ts +1 -0
package/dist/src/matchers.d.ts.map +1 -1
package/dist/src/matchers.js +74 -0
package/dist/src/matchers.js.map +1 -1
package/dist/src/models/evalResult.d.ts.map +1 -1
package/dist/src/models/evalResult.js +8 -1
package/dist/src/models/evalResult.js.map +1 -1
package/dist/src/providers/bedrock.d.ts.map +1 -1
package/dist/src/providers/bedrock.js +10 -3
package/dist/src/providers/bedrock.js.map +1 -1
package/dist/src/providers/shared.js +2 -2
package/dist/src/providers/shared.js.map +1 -1
package/dist/src/redteam/constants.d.ts +5 -1
package/dist/src/redteam/constants.d.ts.map +1 -1
package/dist/src/redteam/constants.js +21 -0
package/dist/src/redteam/constants.js.map +1 -1
package/dist/src/redteam/index.d.ts.map +1 -1
package/dist/src/redteam/index.js +4 -0
package/dist/src/redteam/index.js.map +1 -1
package/dist/src/redteam/plugins/index.d.ts.map +1 -1
package/dist/src/redteam/plugins/index.js +2 -0
package/dist/src/redteam/plugins/index.js.map +1 -1
package/dist/src/redteam/plugins/pliny.d.ts +9 -0
package/dist/src/redteam/plugins/pliny.d.ts.map +1 -0
package/dist/src/redteam/plugins/pliny.js +68 -0
package/dist/src/redteam/plugins/pliny.js.map +1 -0
package/dist/src/redteam/providers/crescendo/index.d.ts +4 -7
package/dist/src/redteam/providers/crescendo/index.d.ts.map +1 -1
package/dist/src/redteam/providers/crescendo/index.js +1 -1
package/dist/src/redteam/providers/crescendo/index.js.map +1 -1
package/dist/src/redteam/providers/goat.d.ts.map +1 -1
package/dist/src/redteam/providers/goat.js +15 -10
package/dist/src/redteam/providers/goat.js.map +1 -1
package/dist/src/redteam/providers/shared.d.ts +5 -0
package/dist/src/redteam/providers/shared.d.ts.map +1 -1
package/dist/src/redteam/providers/shared.js +3 -1
package/dist/src/redteam/providers/shared.js.map +1 -1
package/dist/src/redteam/shared.d.ts.map +1 -1
package/dist/src/redteam/shared.js +1 -0
package/dist/src/redteam/shared.js.map +1 -1
package/dist/src/redteam/util.d.ts.map +1 -1
package/dist/src/redteam/util.js +19 -2
package/dist/src/redteam/util.js.map +1 -1
package/dist/src/server/routes/redteam.js +2 -1
package/dist/src/server/routes/redteam.js.map +1 -1
package/dist/src/types/index.d.ts +343 -343
package/dist/src/types/index.d.ts.map +1 -1
package/dist/src/types/index.js +2 -0
package/dist/src/types/index.js.map +1 -1
package/dist/src/util/index.d.ts +4 -4
package/dist/src/validators/redteam.js +2 -2
package/dist/src/validators/redteam.js.map +1 -1
package/dist/test/factories/evalFactory.d.ts +8 -8
package/dist/test/models/evalResult.test.d.ts +2 -0
package/dist/test/models/evalResult.test.d.ts.map +1 -0
package/dist/test/models/evalResult.test.js +217 -0
package/dist/test/models/evalResult.test.js.map +1 -0
package/dist/test/providers/bedrock.test.js +110 -0
package/dist/test/providers/bedrock.test.js.map +1 -1
package/dist/test/redteam/validators.test.js +2 -0
package/dist/test/redteam/validators.test.js.map +1 -1
package/dist/tsconfig.tsbuildinfo +1 -1
package/package.json +13 -13

package/dist/src/assertions/utils.d.ts CHANGED Viewed

@@ -1,5 +1,143 @@
 import { type Assertion, type TestCase } from '../types';
-export declare function getFinalTest(test: TestCase, assertion: Assertion): Readonly<TestCase>;
+export declare function getFinalTest(test: TestCase, assertion: Assertion): Readonly<{
+    options?: ({
+        prefix?: string | undefined;
+        suffix?: string | undefined;
+    } & {
+        transform?: string | undefined;
+        postprocess?: string | undefined;
+        transformVars?: string | undefined;
+        storeOutputAs?: string | undefined;
+    } & {
+        provider?: any;
+        rubricPrompt?: string | string[] | {
+            role: string;
+            content: string;
+        }[] | undefined;
+        factuality?: {
+            subset?: number | undefined;
+            superset?: number | undefined;
+            agree?: number | undefined;
+            disagree?: number | undefined;
+            differButFactual?: number | undefined;
+        } | undefined;
+    } & {
+        disableVarExpansion?: boolean | undefined;
+        disableConversationVar?: boolean | undefined;
+        runSerially?: boolean | undefined;
+    }) | undefined;
+    vars?: Record<string, string | any[] | string[] | {}> | undefined;
+    provider?: string | {
+        id?: string | undefined;
+        config?: any;
+        label?: string | undefined;
+        prompts?: string[] | undefined;
+        transform?: string | undefined;
+        delay?: number | undefined;
+        env?: {
+            PROMPTFOO_REMOTE_GENERATION_URL?: string | undefined;
+            AI21_API_BASE_URL?: string | undefined;
+            AI21_API_KEY?: string | undefined;
+            ANTHROPIC_API_KEY?: string | undefined;
+            AWS_BEDROCK_REGION?: string | undefined;
+            FAL_KEY?: string | undefined;
+            GROQ_API_KEY?: string | undefined;
+            LOCALAI_BASE_URL?: string | undefined;
+            WATSONX_AI_APIKEY?: string | undefined;
+            WATSONX_AI_PROJECT_ID?: string | undefined;
+            WATSONX_AI_BEARER_TOKEN?: string | undefined;
+            AZURE_CLIENT_SECRET?: string | undefined;
+            AZURE_CLIENT_ID?: string | undefined;
+            AZURE_TENANT_ID?: string | undefined;
+            AZURE_AUTHORITY_HOST?: string | undefined;
+            AZURE_TOKEN_SCOPE?: string | undefined;
+            AZURE_DEPLOYMENT_NAME?: string | undefined;
+            AZURE_EMBEDDING_DEPLOYMENT_NAME?: string | undefined;
+            AZURE_OPENAI_DEPLOYMENT_NAME?: string | undefined;
+            AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME?: string | undefined;
+            AZURE_OPENAI_API_BASE_URL?: string | undefined;
+            AZURE_OPENAI_API_HOST?: string | undefined;
+            AZURE_OPENAI_API_KEY?: string | undefined;
+            AZURE_API_BASE_URL?: string | undefined;
+            AZURE_API_HOST?: string | undefined;
+            AZURE_API_KEY?: string | undefined;
+            AZURE_OPENAI_BASE_URL?: string | undefined;
+            BAM_API_HOST?: string | undefined;
+            BAM_API_KEY?: string | undefined;
+            CLOUDFLARE_ACCOUNT_ID?: string | undefined;
+            CLOUDFLARE_API_KEY?: string | undefined;
+            COHERE_API_KEY?: string | undefined;
+            GOOGLE_API_HOST?: string | undefined;
+            GOOGLE_API_KEY?: string | undefined;
+            MISTRAL_API_BASE_URL?: string | undefined;
+            MISTRAL_API_HOST?: string | undefined;
+            MISTRAL_API_KEY?: string | undefined;
+            OPENAI_API_BASE_URL?: string | undefined;
+            OPENAI_API_HOST?: string | undefined;
+            OPENAI_API_KEY?: string | undefined;
+            OPENAI_BASE_URL?: string | undefined;
+            OPENAI_ORGANIZATION?: string | undefined;
+            PALM_API_HOST?: string | undefined;
+            PALM_API_KEY?: string | undefined;
+            REPLICATE_API_KEY?: string | undefined;
+            REPLICATE_API_TOKEN?: string | undefined;
+            VERTEX_API_HOST?: string | undefined;
+            VERTEX_API_KEY?: string | undefined;
+            VERTEX_PROJECT_ID?: string | undefined;
+            VERTEX_PUBLISHER?: string | undefined;
+            VERTEX_REGION?: string | undefined;
+        } | undefined;
+    } | {
+        callApi: import("../types").CallApiFunction;
+        id: (...args: unknown[]) => string;
+        config?: any;
+        label?: string | undefined;
+        transform?: string | undefined;
+        delay?: number | undefined;
+        callEmbeddingApi?: ((args_0: string, ...args: unknown[]) => Promise<import("../types").ProviderEmbeddingResponse>) | undefined;
+        callClassificationApi?: ((args_0: string, ...args: unknown[]) => Promise<import("../types").ProviderClassificationResponse>) | undefined;
+    } | undefined;
+    metadata?: (Record<string, any> & {
+        pluginConfig?: import("../types").PluginConfig | undefined;
+        strategyConfig?: import("../types").RedteamObjectConfig | undefined;
+    }) | undefined;
+    description?: string | undefined;
+    providerOutput?: string | {} | undefined;
+    assert?: ({
+        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+        value?: import("../types").AssertionValue | undefined;
+        config?: Record<string, any> | undefined;
+        provider?: any;
+        transform?: string | undefined;
+        rubricPrompt?: string | string[] | {
+            role: string;
+            content: string;
+        }[] | undefined;
+        threshold?: number | undefined;
+        weight?: number | undefined;
+        metric?: string | undefined;
+    } | {
+        type: "assert-set";
+        assert: {
+            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+            value?: import("../types").AssertionValue | undefined;
+            config?: Record<string, any> | undefined;
+            provider?: any;
+            transform?: string | undefined;
+            rubricPrompt?: string | string[] | {
+                role: string;
+                content: string;
+            }[] | undefined;
+            threshold?: number | undefined;
+            weight?: number | undefined;
+            metric?: string | undefined;
+        }[];
+        threshold?: number | undefined;
+        weight?: number | undefined;
+        metric?: string | undefined;
+    })[] | undefined;
+    threshold?: number | undefined;
+}>;
 export declare function processFileReference(fileRef: string): object | string;
 export declare function coerceString(value: string | object): string;
 //# sourceMappingURL=utils.d.ts.map

package/dist/src/assertions/utils.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/assertions/utils.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,KAAK,SAAS,EAAE,KAAK,QAAQ,EAAE,MAAM,UAAU,CAAC;AAIzD,wBAAgB,YAAY,CAAC,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS~~,sBAUhE~~;AAED,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAYrE;AAED,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAK3D"}
1	+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/assertions/utils.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,KAAK,SAAS,EAAE,KAAK,QAAQ,EAAE,MAAM,UAAU,CAAC;AAIzD,wBAAgB,YAAY,CAAC,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAUhE;AAED,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAYrE;AAED,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAK3D"}

package/dist/src/database/tables.d.ts CHANGED Viewed

@@ -539,7 +539,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                     description?: string | undefined;
                     providerOutput?: string | {} | undefined;
                     assert?: ({
-                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                         value?: import("../types").AssertionValue | undefined;
                         config?: Record<string, any> | undefined;
                         provider?: any;
@@ -554,7 +554,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                     } | {
                         type: "assert-set";
                         assert: {
-                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                             value?: import("../types").AssertionValue | undefined;
                             config?: Record<string, any> | undefined;
                             provider?: any;
@@ -681,7 +681,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                         description?: string | undefined;
                         providerOutput?: string | {} | undefined;
                         assert?: ({
-                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                             value?: import("../types").AssertionValue | undefined;
                             config?: Record<string, any> | undefined;
                             provider?: any;
@@ -696,7 +696,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                         } | {
                             type: "assert-set";
                             assert: {
-                                type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                                type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                                 value?: import("../types").AssertionValue | undefined;
                                 config?: Record<string, any> | undefined;
                                 provider?: any;
@@ -820,7 +820,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                         description?: string | undefined;
                         providerOutput?: string | {} | undefined;
                         assert?: ({
-                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                             value?: import("../types").AssertionValue | undefined;
                             config?: Record<string, any> | undefined;
                             provider?: any;
@@ -835,7 +835,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                         } | {
                             type: "assert-set";
                             assert: {
-                                type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                                type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                                 value?: import("../types").AssertionValue | undefined;
                                 config?: Record<string, any> | undefined;
                                 provider?: any;
@@ -960,7 +960,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                     }) | undefined;
                     providerOutput?: string | {} | undefined;
                     assert?: ({
-                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                         value?: import("../types").AssertionValue | undefined;
                         config?: Record<string, any> | undefined;
                         provider?: any;
@@ -975,7 +975,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                     } | {
                         type: "assert-set";
                         assert: {
-                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                             value?: import("../types").AssertionValue | undefined;
                             config?: Record<string, any> | undefined;
                             provider?: any;
@@ -1493,7 +1493,7 @@ export declare const evalResultsTable: import("drizzle-orm/sqlite-core").SQLiteT
                 description?: string | undefined;
                 providerOutput?: string | {} | undefined;
                 assert?: ({
-                    type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                    type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                     value?: import("../types").AssertionValue | undefined;
                     config?: Record<string, any> | undefined;
                     provider?: any;
@@ -1508,7 +1508,7 @@ export declare const evalResultsTable: import("drizzle-orm/sqlite-core").SQLiteT
                 } | {
                     type: "assert-set";
                     assert: {
-                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                         value?: import("../types").AssertionValue | undefined;
                         config?: Record<string, any> | undefined;
                         provider?: any;
@@ -1966,7 +1966,7 @@ export declare const datasetsTable: import("drizzle-orm/sqlite-core").SQLiteTabl
                 description?: string | undefined;
                 providerOutput?: string | {} | undefined;
                 assert?: ({
-                    type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                    type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                     value?: import("../types").AssertionValue | undefined;
                     config?: Record<string, any> | undefined;
                     provider?: any;
@@ -1981,7 +1981,7 @@ export declare const datasetsTable: import("drizzle-orm/sqlite-core").SQLiteTabl
                 } | {
                     type: "assert-set";
                     assert: {
-                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                         value?: import("../types").AssertionValue | undefined;
                         config?: Record<string, any> | undefined;
                         provider?: any;

package/dist/src/matchers.d.ts CHANGED Viewed

@@ -15,6 +15,7 @@ export declare function renderLlmRubricPrompt(rubric: string, llmOutput: string,
 export declare function matchesLlmRubric(rubric: string, llmOutput: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
 export declare function matchesFactuality(input: string, expected: string, output: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
 export declare function matchesClosedQa(input: string, expected: string, output: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
+export declare function matchesGEval(criteria: string, input: string, output: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;
 export declare function matchesAnswerRelevance(input: string, output: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;
 export declare function matchesContextRecall(context: string, groundTruth: string, threshold: number, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
 export declare function matchesContextRelevance(question: string, context: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;

package/dist/src/matchers.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"matchers.d.ts","sourceRoot":"","sources":["../../src/matchers.ts"],"names":[],"mappings":"~~AAqBA~~,OAAO,KAAK,EAGV,WAAW,EAEX,aAAa,EACb,aAAa,EAIb,YAAY,EAEb,MAAM,SAAS,CAAC;AAiDjB,wBAAsB,kBAAkB,CACtC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,GAClC,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAqC7B;AAED,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,EACnC,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,WAAW,CAAC,CAmCtB;AAgBD,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,OAAe,EACxB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA8F3C;AAED;;;;;;;GAOG;AACH,wBAAsB,qBAAqB,CACzC,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoC3C;AAED,wBAAgB,qBAAqB,CACnC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,UAavC;AAED,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyD3C;AAED,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAwF3C;AAED,wBAAsB,eAAe,CACnC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoD3C;AAED,wBAAsB,sBAAsB,CAC1C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA6F3C;AAED,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0C3C;AAED,wBAAsB,uBAAuB,CAC3C,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyC3C;AAED,wBAAsB,0BAA0B,CAC9C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0E3C;AAED,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,EAAE,CAAC,CAyD7C;AAED,UAAU,sBAAsB;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB;AAED,wBAAsB,iBAAiB,CACrC,EAAE,UAAU,EAAE,iBAAiB,EAAE,UAAe,EAAE,EAAE,sBAAsB,EAC1E,OAAO,CAAC,EAAE,aAAa;;;;GAwDxB"}
1	+ {"version":3,"file":"matchers.d.ts","sourceRoot":"","sources":["../../src/matchers.ts"],"names":[],"mappings":"AAsBA,OAAO,KAAK,EAGV,WAAW,EAEX,aAAa,EACb,aAAa,EAIb,YAAY,EAEb,MAAM,SAAS,CAAC;AAiDjB,wBAAsB,kBAAkB,CACtC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,GAClC,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAqC7B;AAED,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,EACnC,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,WAAW,CAAC,CAmCtB;AAgBD,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,OAAe,EACxB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA8F3C;AAED;;;;;;;GAOG;AACH,wBAAsB,qBAAqB,CACzC,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoC3C;AAED,wBAAgB,qBAAqB,CACnC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,UAavC;AAED,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyD3C;AAED,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAwF3C;AAED,wBAAsB,eAAe,CACnC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoD3C;AAED,wBAAsB,YAAY,CAChC,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAmF3C;AAED,wBAAsB,sBAAsB,CAC1C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA6F3C;AAED,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0C3C;AAED,wBAAsB,uBAAuB,CAC3C,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyC3C;AAED,wBAAsB,0BAA0B,CAC9C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0E3C;AAED,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,EAAE,CAAC,CAyD7C;AAED,UAAU,sBAAsB;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB;AAED,wBAAsB,iBAAiB,CACrC,EAAE,UAAU,EAAE,iBAAiB,EAAE,UAAe,EAAE,EAAE,sBAAsB,EAC1E,OAAO,CAAC,EAAE,aAAa;;;;GAwDxB"}

package/dist/src/matchers.js CHANGED Viewed

@@ -11,12 +11,14 @@ exports.renderLlmRubricPrompt = renderLlmRubricPrompt;
 exports.matchesLlmRubric = matchesLlmRubric;
 exports.matchesFactuality = matchesFactuality;
 exports.matchesClosedQa = matchesClosedQa;
+exports.matchesGEval = matchesGEval;
 exports.matchesAnswerRelevance = matchesAnswerRelevance;
 exports.matchesContextRecall = matchesContextRecall;
 exports.matchesContextRelevance = matchesContextRelevance;
 exports.matchesContextFaithfulness = matchesContextFaithfulness;
 exports.matchesSelectBest = matchesSelectBest;
 exports.matchesModeration = matchesModeration;
+const dedent_1 = __importDefault(require("dedent"));
 const cliState_1 = __importDefault(require("./cliState"));
 const envars_1 = require("./envars");
 const logger_1 = __importDefault(require("./logger"));
@@ -428,6 +430,78 @@ async function matchesClosedQa(input, expected, output, grading, vars) {
         return fail(`Error parsing output: ${err.message}`, resp.tokenUsage);
     }
 }
+async function matchesGEval(criteria, input, output, threshold, grading) {
+    if (!input) {
+        throw Error('No source text to estimate reply');
+    }
+    const maxScore = 10;
+    const textProvider = await getAndCheckProvider('text', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).gradingProvider, 'reply geval check');
+    const promptSteps = (0, dedent_1.default) `
+    Given an evaluation criteria which outlines how you should judge some text, generate 3-4 concise evaluation steps for any text based on the criteria below.
+    Evaluation Criteria:
+    ${criteria}
+    **
+    IMPORTANT: Please make sure to only return in minified JSON format, with the "steps" key as a list of strings. No additional words, explanation or formatting is needed.
+    Example JSON:
+    {"steps": <list_of_strings>}
+    **
+    JSON:
+    `;
+    const respSteps = await textProvider.callApi(promptSteps);
+    let steps;
+    try {
+        // NOTE: use regexp for reliable, because sometimes LLM wraps response to markdown format ```json...```
+        steps = JSON.parse(respSteps.output.match(/\{"steps".+\}/g)[0]).steps;
+        if (!steps.length) {
+            return fail('LLM does not propose any evaluation step');
+        }
+    }
+    catch {
+        return fail(`LLM-proposed evaluation steps are not in JSON format: ${respSteps.output}`);
+    }
+    const promptText = (0, dedent_1.default) `
+    You will be given one Reply for a Source Text below. Your task is to rate the Reply on one metric.
+    Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
+    Evaluation Criteria:
+    ${criteria}
+    Evaluation Steps:
+    - ${steps.join('\n- ')}
+    - Given the evaluation steps, return a JSON with two keys: 1) a "score" key ranging from 0 - ${maxScore}, with ${maxScore} being that it follows the Evaluation Criteria outlined in the Evaluation Steps and 0 being that it does not; 2) a "reason" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Source Text and Reply in your reason, but be very concise with it!
+    Source Text:
+    ${input}
+    Reply:
+    ${output}
+    **
+    IMPORTANT: Please make sure to only return in minified JSON format, with the "score" and "reason" key. No additional words, explanation or formatting is needed.
+    Example JSON:
+    {"score":0,"reason":"The text does not follow the evaluation steps provided."}
+    **
+    JSON:
+    `;
+    const resp = await textProvider.callApi(promptText);
+    let result;
+    try {
+        result = JSON.parse(resp.output.match(/\{.+\}/g)[0]);
+    }
+    catch {
+        return fail(`LLM-proposed evaluation result is not in JSON format: ${resp.output}`);
+    }
+    return {
+        pass: result.score / maxScore >= threshold,
+        score: result.score / maxScore,
+        reason: result.reason,
+    };
+}
 async function matchesAnswerRelevance(input, output, threshold, grading) {
     const embeddingProvider = await getAndCheckProvider('embedding', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).embeddingProvider, 'answer relevancy check');
     const textProvider = await getAndCheckProvider('text', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).gradingProvider, 'answer relevancy check');