npm - promptfoo - Versions diffs - 0.102.3 → 0.103.0 - Mend

promptfoo 0.102.3 → 0.103.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

package/dist/package.json +1 -1
package/dist/src/app/assets/{index-isMhDyh1.js → index-hVGk-Oul.js} +276 -277
package/dist/src/app/assets/{index.es-BNF9PUeN.js → index.es-CcK3JjZn.js} +1 -1
package/dist/src/app/assets/{sync-DaNyhPy-.js → sync-BaigR5eq.js} +1 -1
package/dist/src/app/index.html +1 -1
package/dist/src/assertions/geval.d.ts +3 -0
package/dist/src/assertions/geval.d.ts.map +1 -0
package/dist/src/assertions/geval.js +39 -0
package/dist/src/assertions/geval.js.map +1 -0
package/dist/src/assertions/index.d.ts +1 -1
package/dist/src/assertions/index.d.ts.map +1 -1
package/dist/src/assertions/index.js +2 -0
package/dist/src/assertions/index.js.map +1 -1
package/dist/src/assertions/utils.d.ts +2 -2
package/dist/src/database/tables.d.ts +12 -12
package/dist/src/googleSheets.d.ts +7 -0
package/dist/src/googleSheets.d.ts.map +1 -1
package/dist/src/googleSheets.js +32 -1
package/dist/src/googleSheets.js.map +1 -1
package/dist/src/integrations/huggingfaceDatasets.d.ts +3 -0
package/dist/src/integrations/huggingfaceDatasets.d.ts.map +1 -0
package/dist/src/integrations/huggingfaceDatasets.js +87 -0
package/dist/src/integrations/huggingfaceDatasets.js.map +1 -0
package/dist/src/matchers.d.ts +1 -0
package/dist/src/matchers.d.ts.map +1 -1
package/dist/src/matchers.js +74 -0
package/dist/src/matchers.js.map +1 -1
package/dist/src/models/evalResult.d.ts.map +1 -1
package/dist/src/models/evalResult.js +8 -1
package/dist/src/models/evalResult.js.map +1 -1
package/dist/src/providers/bedrock.d.ts.map +1 -1
package/dist/src/providers/bedrock.js +10 -3
package/dist/src/providers/bedrock.js.map +1 -1
package/dist/src/providers/simulatedUser.d.ts.map +1 -1
package/dist/src/providers/simulatedUser.js +5 -0
package/dist/src/providers/simulatedUser.js.map +1 -1
package/dist/src/providers.d.ts.map +1 -1
package/dist/src/providers.js +4 -0
package/dist/src/providers.js.map +1 -1
package/dist/src/redteam/constants.d.ts +7 -3
package/dist/src/redteam/constants.d.ts.map +1 -1
package/dist/src/redteam/constants.js +27 -1
package/dist/src/redteam/constants.js.map +1 -1
package/dist/src/redteam/graders.d.ts.map +1 -1
package/dist/src/redteam/graders.js +2 -0
package/dist/src/redteam/graders.js.map +1 -1
package/dist/src/redteam/index.d.ts +1 -1
package/dist/src/redteam/index.d.ts.map +1 -1
package/dist/src/redteam/index.js +10 -2
package/dist/src/redteam/index.js.map +1 -1
package/dist/src/redteam/plugins/beavertails.d.ts +17 -0
package/dist/src/redteam/plugins/beavertails.d.ts.map +1 -0
package/dist/src/redteam/plugins/beavertails.js +104 -0
package/dist/src/redteam/plugins/beavertails.js.map +1 -0
package/dist/src/redteam/plugins/index.d.ts.map +1 -1
package/dist/src/redteam/plugins/index.js +2 -0
package/dist/src/redteam/plugins/index.js.map +1 -1
package/dist/src/redteam/plugins/intent.d.ts +6 -3
package/dist/src/redteam/plugins/intent.d.ts.map +1 -1
package/dist/src/redteam/plugins/intent.js +32 -13
package/dist/src/redteam/plugins/intent.js.map +1 -1
package/dist/src/redteam/plugins/pliny.d.ts.map +1 -1
package/dist/src/redteam/plugins/pliny.js +1 -1
package/dist/src/redteam/plugins/pliny.js.map +1 -1
package/dist/src/redteam/providers/bestOfN.d.ts +16 -0
package/dist/src/redteam/providers/bestOfN.d.ts.map +1 -0
package/dist/src/redteam/providers/bestOfN.js +103 -0
package/dist/src/redteam/providers/bestOfN.js.map +1 -0
package/dist/src/redteam/providers/crescendo/index.d.ts.map +1 -1
package/dist/src/redteam/providers/crescendo/index.js +20 -0
package/dist/src/redteam/providers/crescendo/index.js.map +1 -1
package/dist/src/redteam/providers/goat.d.ts.map +1 -1
package/dist/src/redteam/providers/goat.js +7 -0
package/dist/src/redteam/providers/goat.js.map +1 -1
package/dist/src/redteam/providers/iterative.d.ts +3 -0
package/dist/src/redteam/providers/iterative.d.ts.map +1 -1
package/dist/src/redteam/providers/iterative.js +63 -26
package/dist/src/redteam/providers/iterative.js.map +1 -1
package/dist/src/redteam/providers/iterativeImage.d.ts.map +1 -1
package/dist/src/redteam/providers/iterativeImage.js +17 -0
package/dist/src/redteam/providers/iterativeImage.js.map +1 -1
package/dist/src/redteam/providers/iterativeTree.d.ts +7 -17
package/dist/src/redteam/providers/iterativeTree.d.ts.map +1 -1
package/dist/src/redteam/providers/iterativeTree.js +59 -9
package/dist/src/redteam/providers/iterativeTree.js.map +1 -1
package/dist/src/redteam/providers/prompts.d.ts.map +1 -1
package/dist/src/redteam/providers/prompts.js +25 -3
package/dist/src/redteam/providers/prompts.js.map +1 -1
package/dist/src/redteam/providers/shared.d.ts +1 -0
package/dist/src/redteam/providers/shared.d.ts.map +1 -1
package/dist/src/redteam/providers/shared.js +8 -0
package/dist/src/redteam/providers/shared.js.map +1 -1
package/dist/src/redteam/shared.d.ts.map +1 -1
package/dist/src/redteam/shared.js +1 -0
package/dist/src/redteam/shared.js.map +1 -1
package/dist/src/redteam/strategies/bestOfN.d.ts +3 -0
package/dist/src/redteam/strategies/bestOfN.d.ts.map +1 -0
package/dist/src/redteam/strategies/bestOfN.js +35 -0
package/dist/src/redteam/strategies/bestOfN.js.map +1 -0
package/dist/src/redteam/strategies/index.d.ts.map +1 -1
package/dist/src/redteam/strategies/index.js +22 -12
package/dist/src/redteam/strategies/index.js.map +1 -1
package/dist/src/redteam/util.d.ts.map +1 -1
package/dist/src/redteam/util.js +14 -3
package/dist/src/redteam/util.js.map +1 -1
package/dist/src/server/routes/redteam.js +2 -1
package/dist/src/server/routes/redteam.js.map +1 -1
package/dist/src/telemetry.d.ts +6 -0
package/dist/src/telemetry.d.ts.map +1 -1
package/dist/src/telemetry.js +6 -0
package/dist/src/telemetry.js.map +1 -1
package/dist/src/testCases.d.ts.map +1 -1
package/dist/src/testCases.js +7 -0
package/dist/src/testCases.js.map +1 -1
package/dist/src/types/index.d.ts +343 -343
package/dist/src/types/index.d.ts.map +1 -1
package/dist/src/types/index.js +1 -0
package/dist/src/types/index.js.map +1 -1
package/dist/src/util/index.d.ts +4 -4
package/dist/src/validators/redteam.js +2 -2
package/dist/src/validators/redteam.js.map +1 -1
package/dist/test/factories/evalFactory.d.ts +8 -8
package/dist/test/googleSheets.test.d.ts +2 -0
package/dist/test/googleSheets.test.d.ts.map +1 -0
package/dist/test/googleSheets.test.js +240 -0
package/dist/test/googleSheets.test.js.map +1 -0
package/dist/test/integrations/huggingfaceDatasets.test.d.ts +2 -0
package/dist/test/integrations/huggingfaceDatasets.test.d.ts.map +1 -0
package/dist/test/integrations/huggingfaceDatasets.test.js +147 -0
package/dist/test/integrations/huggingfaceDatasets.test.js.map +1 -0
package/dist/test/models/evalResult.test.d.ts +2 -0
package/dist/test/models/evalResult.test.d.ts.map +1 -0
package/dist/test/models/evalResult.test.js +217 -0
package/dist/test/models/evalResult.test.js.map +1 -0
package/dist/test/providers/bedrock.test.js +110 -0
package/dist/test/providers/bedrock.test.js.map +1 -1
package/dist/test/redteam/plugins/intent.test.d.ts +2 -0
package/dist/test/redteam/plugins/intent.test.d.ts.map +1 -0
package/dist/test/redteam/plugins/intent.test.js +172 -0
package/dist/test/redteam/plugins/intent.test.js.map +1 -0
package/dist/test/redteam/providers/iterative.test.d.ts +2 -0
package/dist/test/redteam/providers/iterative.test.d.ts.map +1 -0
package/dist/test/redteam/providers/iterative.test.js +61 -0
package/dist/test/redteam/providers/iterative.test.js.map +1 -0
package/dist/test/redteam/providers/iterativeTree.test.js +46 -8
package/dist/test/redteam/providers/iterativeTree.test.js.map +1 -1
package/dist/test/redteam/validators.test.js +2 -0
package/dist/test/redteam/validators.test.js.map +1 -1
package/dist/tsconfig.tsbuildinfo +1 -1
package/package.json +1 -1

package/dist/src/database/tables.d.ts CHANGED Viewed

@@ -539,7 +539,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                     description?: string | undefined;
                     providerOutput?: string | {} | undefined;
                     assert?: ({
-                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                         value?: import("../types").AssertionValue | undefined;
                         config?: Record<string, any> | undefined;
                         provider?: any;
@@ -554,7 +554,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                     } | {
                         type: "assert-set";
                         assert: {
-                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                             value?: import("../types").AssertionValue | undefined;
                             config?: Record<string, any> | undefined;
                             provider?: any;
@@ -681,7 +681,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                         description?: string | undefined;
                         providerOutput?: string | {} | undefined;
                         assert?: ({
-                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                             value?: import("../types").AssertionValue | undefined;
                             config?: Record<string, any> | undefined;
                             provider?: any;
@@ -696,7 +696,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                         } | {
                             type: "assert-set";
                             assert: {
-                                type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                                type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                                 value?: import("../types").AssertionValue | undefined;
                                 config?: Record<string, any> | undefined;
                                 provider?: any;
@@ -820,7 +820,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                         description?: string | undefined;
                         providerOutput?: string | {} | undefined;
                         assert?: ({
-                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                             value?: import("../types").AssertionValue | undefined;
                             config?: Record<string, any> | undefined;
                             provider?: any;
@@ -835,7 +835,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                         } | {
                             type: "assert-set";
                             assert: {
-                                type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                                type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                                 value?: import("../types").AssertionValue | undefined;
                                 config?: Record<string, any> | undefined;
                                 provider?: any;
@@ -960,7 +960,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                     }) | undefined;
                     providerOutput?: string | {} | undefined;
                     assert?: ({
-                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                         value?: import("../types").AssertionValue | undefined;
                         config?: Record<string, any> | undefined;
                         provider?: any;
@@ -975,7 +975,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
                     } | {
                         type: "assert-set";
                         assert: {
-                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                            type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                             value?: import("../types").AssertionValue | undefined;
                             config?: Record<string, any> | undefined;
                             provider?: any;
@@ -1493,7 +1493,7 @@ export declare const evalResultsTable: import("drizzle-orm/sqlite-core").SQLiteT
                 description?: string | undefined;
                 providerOutput?: string | {} | undefined;
                 assert?: ({
-                    type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                    type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                     value?: import("../types").AssertionValue | undefined;
                     config?: Record<string, any> | undefined;
                     provider?: any;
@@ -1508,7 +1508,7 @@ export declare const evalResultsTable: import("drizzle-orm/sqlite-core").SQLiteT
                 } | {
                     type: "assert-set";
                     assert: {
-                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                         value?: import("../types").AssertionValue | undefined;
                         config?: Record<string, any> | undefined;
                         provider?: any;
@@ -1966,7 +1966,7 @@ export declare const datasetsTable: import("drizzle-orm/sqlite-core").SQLiteTabl
                 description?: string | undefined;
                 providerOutput?: string | {} | undefined;
                 assert?: ({
-                    type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                    type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                     value?: import("../types").AssertionValue | undefined;
                     config?: Record<string, any> | undefined;
                     provider?: any;
@@ -1981,7 +1981,7 @@ export declare const datasetsTable: import("drizzle-orm/sqlite-core").SQLiteTabl
                 } | {
                     type: "assert-set";
                     assert: {
-                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
+                        type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
                         value?: import("../types").AssertionValue | undefined;
                         config?: Record<string, any> | undefined;
                         provider?: any;

package/dist/src/googleSheets.d.ts CHANGED Viewed

@@ -1,4 +1,11 @@
 import type { CsvRow } from './types';
+export declare function checkGoogleSheetAccess(url: string): Promise<{
+    public: boolean;
+    status: number;
+} | {
+    public: boolean;
+    status?: undefined;
+}>;
 export declare function fetchCsvFromGoogleSheetUnauthenticated(url: string): Promise<CsvRow[]>;
 export declare function fetchCsvFromGoogleSheetAuthenticated(url: string): Promise<CsvRow[]>;
 export declare function fetchCsvFromGoogleSheet(url: string): Promise<CsvRow[]>;

package/dist/src/googleSheets.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"googleSheets.d.ts","sourceRoot":"","sources":["../../src/googleSheets.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;~~AAgBtC~~,wBAAsB,sCAAsC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAa3F;AAED,wBAAsB,oCAAoC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CA0CzF;AAED,wBAAsB,uBAAuB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAO5E;AAED,wBAAsB,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,~~CA+BtF~~"}
1	+ {"version":3,"file":"googleSheets.d.ts","sourceRoot":"","sources":["../../src/googleSheets.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAEtC,wBAAsB,sBAAsB,CAAC,GAAG,EAAE,MAAM;;;;;;GAYvD;AAED,wBAAsB,sCAAsC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAa3F;AAED,wBAAsB,oCAAoC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CA0CzF;AAED,wBAAsB,uBAAuB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAO5E;AAED,wBAAsB,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CA6DtF"}

package/dist/src/googleSheets.js CHANGED Viewed

@@ -36,6 +36,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
+exports.checkGoogleSheetAccess = checkGoogleSheetAccess;
 exports.fetchCsvFromGoogleSheetUnauthenticated = fetchCsvFromGoogleSheetUnauthenticated;
 exports.fetchCsvFromGoogleSheetAuthenticated = fetchCsvFromGoogleSheetAuthenticated;
 exports.fetchCsvFromGoogleSheet = fetchCsvFromGoogleSheet;
@@ -125,7 +126,37 @@ async function writeCsvToGoogleSheet(rows, url) {
         throw new Error(`Invalid Google Sheets URL: ${url}`);
     }
     const spreadsheetId = match[1];
-    const range = 'A1:ZZZ';
+    let range = 'A1:ZZZ';
+    const gid = Number(new URL(url).searchParams.get('gid'));
+    if (gid) {
+        const spreadsheet = await sheets.spreadsheets.get({ spreadsheetId, auth });
+        const sheetName = spreadsheet.data.sheets?.find((sheet) => sheet.properties?.sheetId === gid)
+            ?.properties?.title;
+        if (!sheetName) {
+            throw new Error(`Sheet not found for gid: ${gid}`);
+        }
+        range = `${sheetName}!${range}`;
+    }
+    else {
+        // Create a new sheet if no gid is provided
+        const newSheetTitle = `Sheet${Date.now()}`;
+        await sheets.spreadsheets.batchUpdate({
+            spreadsheetId,
+            auth,
+            requestBody: {
+                requests: [
+                    {
+                        addSheet: {
+                            properties: {
+                                title: newSheetTitle,
+                            },
+                        },
+                    },
+                ],
+            },
+        });
+        range = `${newSheetTitle}!${range}`;
+    }
     // Extract headers from the first row
     const headers = Object.keys(rows[0]);
     // Convert rows to a 2D array

package/dist/src/googleSheets.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"googleSheets.js","sourceRoot":"","sources":["../../src/googleSheets.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;~~AAiBA~~,wFAaC;AAED,oFA0CC;AAED,0DAOC;AAED,~~sDA+BC~~;~~AApHD~~,sDAA8B;~~AAG9B~~,KAAK,UAAU,sBAAsB,CAAC,GAAW;~~IAC/C~~,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,QAAQ,CAAC,EAAE,EAAE,CAAC;YAChB,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC;QACnD,CAAC;aAAM,CAAC;YACN,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC;QACpD,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,gBAAM,CAAC,KAAK,CAAC,8BAA8B,EAAE,KAAK,CAAC,CAAC;QACpD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IAC3B,CAAC;AACH,CAAC;AAEM,KAAK,UAAU,sCAAsC,CAAC,GAAW;IACtE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,wDAAa,gBAAgB,GAAC,CAAC;IAC3D,MAAM,EAAE,cAAc,EAAE,GAAG,wDAAa,SAAS,GAAC,CAAC;IAEnD,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IACjD,MAAM,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,SAAS,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;IAE9F,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,MAAM,CAAC,CAAC;IAC9C,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;QAC5B,MAAM,IAAI,KAAK,CAAC,+CAA+C,GAAG,EAAE,CAAC,CAAC;IACxE,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IACtC,OAAO,QAAQ,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;AAC9C,CAAC;AAEM,KAAK,UAAU,oCAAoC,CAAC,GAAW;IACpE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,wDAAa,oBAAoB,GAAC,CAAC;IACtF,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC;QACrC,MAAM,EAAE,CAAC,uDAAuD,CAAC;KAClE,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACxC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,MAAM,aAAa,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAE/B,IAAI,KAAK,GAAG,QAAQ,CAAC;IACrB,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;IACzD,IAAI,GAAG,EAAE,CAAC;QACR,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,OAAO,KAAK,GAAG,CAAC;YAC3F,EAAE,UAAU,EAAE,KAAK,CAAC;QACtB,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,KAAK,GAAG,GAAG,SAAS,IAAI,KAAK,EAAE,CAAC;IAClC,CAAC;IACD,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAEtF,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;IAClC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,uCAAuC,GAAG,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,0CAA0C;IAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACxB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE/B,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC1B,MAAM,MAAM,GAAW,EAAE,CAAC;QAC1B,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;YAChC,MAAM,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;QACH,OAAO,MAAM,CAAC;IAChB,CAAC,CAAC,CAAC;AACL,CAAC;AAEM,KAAK,UAAU,uBAAuB,CAAC,GAAW;IACvD,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,sBAAsB,CAAC,GAAG,CAAC,CAAC;IAC/D,gBAAM,CAAC,KAAK,CAAC,sBAAsB,GAAG,eAAe,QAAQ,EAAE,CAAC,CAAC;IACjE,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,sCAAsC,CAAC,GAAG,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,oCAAoC,CAAC,GAAG,CAAC,CAAC;AACnD,CAAC;AAEM,KAAK,UAAU,qBAAqB,CAAC,IAAc,EAAE,GAAW;IACrE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,wDAAa,oBAAoB,GAAC,CAAC;IACtF,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC;QACrC,MAAM,EAAE,CAAC,8CAA8C,CAAC;KACzD,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACxC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,MAAM,aAAa,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;~~IAC~~/B,MAAM,KAAK,GAAG,QAAQ,CAAC;~~IAEvB~~,qCAAqC;IACrC,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAErC,6BAA6B;IAC7B,MAAM,MAAM,GAAG,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAErF,0BAA0B;IAC1B,gBAAM,CAAC,KAAK,CAAC,qCAAqC,GAAG,SAAS,MAAM,CAAC,MAAM,OAAO,CAAC,CAAC;IACpF,MAAM,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC;QACtC,aAAa;QACb,KAAK;QACL,gBAAgB,EAAE,cAAc;QAChC,IAAI;QACJ,WAAW,EAAE;YACX,MAAM;SACP;KACF,CAAC,CAAC;AACL,CAAC"}
1	+ {"version":3,"file":"googleSheets.js","sourceRoot":"","sources":["../../src/googleSheets.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAGA,wDAYC;AAED,wFAaC;AAED,oFA0CC;AAED,0DAOC;AAED,sDA6DC;AAlJD,sDAA8B;AAGvB,KAAK,UAAU,sBAAsB,CAAC,GAAW;IACtD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,QAAQ,CAAC,EAAE,EAAE,CAAC;YAChB,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC;QACnD,CAAC;aAAM,CAAC;YACN,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC;QACpD,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,gBAAM,CAAC,KAAK,CAAC,8BAA8B,EAAE,KAAK,CAAC,CAAC;QACpD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IAC3B,CAAC;AACH,CAAC;AAEM,KAAK,UAAU,sCAAsC,CAAC,GAAW;IACtE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,wDAAa,gBAAgB,GAAC,CAAC;IAC3D,MAAM,EAAE,cAAc,EAAE,GAAG,wDAAa,SAAS,GAAC,CAAC;IAEnD,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IACjD,MAAM,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,SAAS,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;IAE9F,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,MAAM,CAAC,CAAC;IAC9C,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;QAC5B,MAAM,IAAI,KAAK,CAAC,+CAA+C,GAAG,EAAE,CAAC,CAAC;IACxE,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IACtC,OAAO,QAAQ,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;AAC9C,CAAC;AAEM,KAAK,UAAU,oCAAoC,CAAC,GAAW;IACpE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,wDAAa,oBAAoB,GAAC,CAAC;IACtF,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC;QACrC,MAAM,EAAE,CAAC,uDAAuD,CAAC;KAClE,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACxC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,MAAM,aAAa,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAE/B,IAAI,KAAK,GAAG,QAAQ,CAAC;IACrB,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;IACzD,IAAI,GAAG,EAAE,CAAC;QACR,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,OAAO,KAAK,GAAG,CAAC;YAC3F,EAAE,UAAU,EAAE,KAAK,CAAC;QACtB,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,KAAK,GAAG,GAAG,SAAS,IAAI,KAAK,EAAE,CAAC;IAClC,CAAC;IACD,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAEtF,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;IAClC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,uCAAuC,GAAG,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,0CAA0C;IAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACxB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE/B,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC1B,MAAM,MAAM,GAAW,EAAE,CAAC;QAC1B,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;YAChC,MAAM,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;QACH,OAAO,MAAM,CAAC;IAChB,CAAC,CAAC,CAAC;AACL,CAAC;AAEM,KAAK,UAAU,uBAAuB,CAAC,GAAW;IACvD,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,sBAAsB,CAAC,GAAG,CAAC,CAAC;IAC/D,gBAAM,CAAC,KAAK,CAAC,sBAAsB,GAAG,eAAe,QAAQ,EAAE,CAAC,CAAC;IACjE,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,sCAAsC,CAAC,GAAG,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,oCAAoC,CAAC,GAAG,CAAC,CAAC;AACnD,CAAC;AAEM,KAAK,UAAU,qBAAqB,CAAC,IAAc,EAAE,GAAW;IACrE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,wDAAa,oBAAoB,GAAC,CAAC;IACtF,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC;QACrC,MAAM,EAAE,CAAC,8CAA8C,CAAC;KACzD,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACxC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,MAAM,aAAa,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAE/B,IAAI,KAAK,GAAG,QAAQ,CAAC;IACrB,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;IACzD,IAAI,GAAG,EAAE,CAAC;QACR,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,OAAO,KAAK,GAAG,CAAC;YAC3F,EAAE,UAAU,EAAE,KAAK,CAAC;QACtB,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,KAAK,GAAG,GAAG,SAAS,IAAI,KAAK,EAAE,CAAC;IAClC,CAAC;SAAM,CAAC;QACN,2CAA2C;QAC3C,MAAM,aAAa,GAAG,QAAQ,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;QAC3C,MAAM,MAAM,CAAC,YAAY,CAAC,WAAW,CAAC;YACpC,aAAa;YACb,IAAI;YACJ,WAAW,EAAE;gBACX,QAAQ,EAAE;oBACR;wBACE,QAAQ,EAAE;4BACR,UAAU,EAAE;gCACV,KAAK,EAAE,aAAa;6BACrB;yBACF;qBACF;iBACF;aACF;SACF,CAAC,CAAC;QACH,KAAK,GAAG,GAAG,aAAa,IAAI,KAAK,EAAE,CAAC;IACtC,CAAC;IAED,qCAAqC;IACrC,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAErC,6BAA6B;IAC7B,MAAM,MAAM,GAAG,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAErF,0BAA0B;IAC1B,gBAAM,CAAC,KAAK,CAAC,qCAAqC,GAAG,SAAS,MAAM,CAAC,MAAM,OAAO,CAAC,CAAC;IACpF,MAAM,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC;QACtC,aAAa;QACb,KAAK;QACL,gBAAgB,EAAE,cAAc;QAChC,IAAI;QACJ,WAAW,EAAE;YACX,MAAM;SACP;KACF,CAAC,CAAC;AACL,CAAC"}

package/dist/src/integrations/huggingfaceDatasets.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { TestCase } from '../types';
+export declare function fetchHuggingFaceDataset(datasetPath: string, limit?: number): Promise<TestCase[]>;
+//# sourceMappingURL=huggingfaceDatasets.d.ts.map

package/dist/src/integrations/huggingfaceDatasets.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"huggingfaceDatasets.d.ts","sourceRoot":"","sources":["../../../src/integrations/huggingfaceDatasets.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AA+CzC,wBAAsB,uBAAuB,CAC3C,WAAW,EAAE,MAAM,EACnB,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,QAAQ,EAAE,CAAC,CA6ErB"}

package/dist/src/integrations/huggingfaceDatasets.js ADDED Viewed

@@ -0,0 +1,87 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.fetchHuggingFaceDataset = fetchHuggingFaceDataset;
+const fetch_1 = require("../fetch");
+const logger_1 = __importDefault(require("../logger"));
+function parseDatasetPath(path) {
+    // Remove the huggingface://datasets/ prefix and split into path and query
+    const [pathPart, queryPart] = path.replace('huggingface://datasets/', '').split('?');
+    const [owner, repo] = pathPart.split('/');
+    // Start with default parameters
+    const defaultParams = new URLSearchParams({
+        split: 'test',
+        config: 'default',
+    });
+    // Parse user query parameters
+    const userParams = new URLSearchParams(queryPart || '');
+    // Merge user params into defaults (user params override defaults)
+    const queryParams = new URLSearchParams();
+    for (const [key, value] of defaultParams) {
+        queryParams.set(key, value);
+    }
+    for (const [key, value] of userParams) {
+        queryParams.set(key, value);
+    }
+    return { owner, repo, queryParams };
+}
+async function fetchHuggingFaceDataset(datasetPath, limit) {
+    const baseUrl = 'https://datasets-server.huggingface.co/rows';
+    const { owner, repo, queryParams } = parseDatasetPath(datasetPath);
+    logger_1.default.info(`[Huggingface Dataset] Fetching dataset: ${owner}/${repo} ...`);
+    const tests = [];
+    let offset = 0;
+    const pageSize = 100; // Number of rows per request
+    const queryParamLimit = queryParams.get('limit');
+    const userLimit = limit ?? (queryParamLimit ? Number.parseInt(queryParamLimit, 10) : undefined);
+    while (true) {
+        // Create a new URLSearchParams for this request
+        const requestParams = new URLSearchParams(queryParams);
+        requestParams.set('offset', offset.toString());
+        requestParams.set('length', Math.min(pageSize, userLimit ? userLimit - offset : pageSize).toString());
+        const url = `${baseUrl}?dataset=${encodeURIComponent(`${owner}/${repo}`)}&${requestParams.toString()}`;
+        logger_1.default.debug(`[Huggingface Dataset] Fetching page from ${url}`);
+        const response = await (0, fetch_1.fetchWithProxy)(url);
+        if (!response.ok) {
+            const error = `[Huggingface Dataset] Failed to fetch dataset: ${response.statusText}.\nFetched ${url}`;
+            logger_1.default.error(error);
+            throw new Error(error);
+        }
+        const data = (await response.json());
+        logger_1.default.debug(`[Huggingface Dataset] Received ${data.rows.length} rows (total: ${data.num_rows_total})`);
+        if (offset === 0) {
+            // Log schema information on first request
+            logger_1.default.debug('[Huggingface Dataset] Dataset features:', data.features);
+            logger_1.default.debug('[Huggingface Dataset] Using query parameters:', Object.fromEntries(queryParams));
+        }
+        // Convert HuggingFace rows to test cases
+        for (const { row } of data.rows) {
+            const test = {
+                vars: {
+                    ...row,
+                },
+            };
+            tests.push(test);
+        }
+        logger_1.default.debug(`[Huggingface Dataset] Processed ${tests.length} total test cases so far`);
+        // Check if we've reached user's limit or end of dataset
+        if (userLimit && tests.length >= userLimit) {
+            logger_1.default.debug(`[Huggingface Dataset] Reached user-specified limit of ${userLimit}`);
+            break;
+        }
+        // Check if we've fetched all rows
+        if (offset + data.rows.length >= data.num_rows_total) {
+            logger_1.default.debug('[Huggingface Dataset] Finished fetching all rows');
+            break;
+        }
+        offset += data.rows.length;
+        logger_1.default.debug(`[Huggingface Dataset] Fetching next page starting at offset ${offset}`);
+    }
+    // If user specified a limit, ensure we don't return more than that
+    const finalTests = userLimit ? tests.slice(0, userLimit) : tests;
+    logger_1.default.debug(`[Huggingface Dataset] Successfully loaded ${finalTests.length} test cases`);
+    return finalTests;
+}
+//# sourceMappingURL=huggingfaceDatasets.js.map

package/dist/src/integrations/huggingfaceDatasets.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"huggingfaceDatasets.js","sourceRoot":"","sources":["../../../src/integrations/huggingfaceDatasets.ts"],"names":[],"mappings":";;;;;AAiDA,0DAgFC;AAjID,oCAA0C;AAC1C,uDAA+B;AAkB/B,SAAS,gBAAgB,CAAC,IAAY;IAKpC,0EAA0E;IAC1E,MAAM,CAAC,QAAQ,EAAE,SAAS,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,yBAAyB,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACrF,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAE1C,gCAAgC;IAChC,MAAM,aAAa,GAAG,IAAI,eAAe,CAAC;QACxC,KAAK,EAAE,MAAM;QACb,MAAM,EAAE,SAAS;KAClB,CAAC,CAAC;IAEH,8BAA8B;IAC9B,MAAM,UAAU,GAAG,IAAI,eAAe,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC;IAExD,kEAAkE;IAClE,MAAM,WAAW,GAAG,IAAI,eAAe,EAAE,CAAC;IAC1C,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,aAAa,EAAE,CAAC;QACzC,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC9B,CAAC;IACD,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,UAAU,EAAE,CAAC;QACtC,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC;AACtC,CAAC;AAEM,KAAK,UAAU,uBAAuB,CAC3C,WAAmB,EACnB,KAAc;IAEd,MAAM,OAAO,GAAG,6CAA6C,CAAC;IAC9D,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,GAAG,gBAAgB,CAAC,WAAW,CAAC,CAAC;IAEnE,gBAAM,CAAC,IAAI,CAAC,2CAA2C,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC;IAE5E,MAAM,KAAK,GAAe,EAAE,CAAC;IAC7B,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,QAAQ,GAAG,GAAG,CAAC,CAAC,6BAA6B;IACnD,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IACjD,MAAM,SAAS,GAAG,KAAK,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;IAChG,OAAO,IAAI,EAAE,CAAC;QACZ,gDAAgD;QAChD,MAAM,aAAa,GAAG,IAAI,eAAe,CAAC,WAAW,CAAC,CAAC;QACvD,aAAa,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QAC/C,aAAa,CAAC,GAAG,CACf,QAAQ,EACR,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,QAAQ,EAAE,CACzE,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,OAAO,YAAY,kBAAkB,CAAC,GAAG,KAAK,IAAI,IAAI,EAAE,CAAC,IAAI,aAAa,CAAC,QAAQ,EAAE,EAAE,CAAC;QACvG,gBAAM,CAAC,KAAK,CAAC,4CAA4C,GAAG,EAAE,CAAC,CAAC;QAEhE,MAAM,QAAQ,GAAG,MAAM,IAAA,sBAAc,EAAC,GAAG,CAAC,CAAC;QAC3C,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,KAAK,GAAG,kDAAkD,QAAQ,CAAC,UAAU,cAAc,GAAG,EAAE,CAAC;YACvG,gBAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC;QACzB,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAwB,CAAC;QAC5D,gBAAM,CAAC,KAAK,CACV,kCAAkC,IAAI,CAAC,IAAI,CAAC,MAAM,iBAAiB,IAAI,CAAC,cAAc,GAAG,CAC1F,CAAC;QAEF,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;YACjB,0CAA0C;YAC1C,gBAAM,CAAC,KAAK,CAAC,yCAAyC,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;YACvE,gBAAM,CAAC,KAAK,CACV,+CAA+C,EAC/C,MAAM,CAAC,WAAW,CAAC,WAAW,CAAC,CAChC,CAAC;QACJ,CAAC;QAED,yCAAyC;QACzC,KAAK,MAAM,EAAE,GAAG,EAAE,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YAChC,MAAM,IAAI,GAAa;gBACrB,IAAI,EAAE;oBACJ,GAAG,GAAG;iBACP;aACF,CAAC;YACF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;QAED,gBAAM,CAAC,KAAK,CAAC,mCAAmC,KAAK,CAAC,MAAM,0BAA0B,CAAC,CAAC;QAExF,wDAAwD;QACxD,IAAI,SAAS,IAAI,KAAK,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;YAC3C,gBAAM,CAAC,KAAK,CAAC,yDAAyD,SAAS,EAAE,CAAC,CAAC;YACnF,MAAM;QACR,CAAC;QAED,kCAAkC;QAClC,IAAI,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACrD,gBAAM,CAAC,KAAK,CAAC,kDAAkD,CAAC,CAAC;YACjE,MAAM;QACR,CAAC;QAED,MAAM,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;QAC3B,gBAAM,CAAC,KAAK,CAAC,+DAA+D,MAAM,EAAE,CAAC,CAAC;IACxF,CAAC;IAED,mEAAmE;IACnE,MAAM,UAAU,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IAEjE,gBAAM,CAAC,KAAK,CAAC,6CAA6C,UAAU,CAAC,MAAM,aAAa,CAAC,CAAC;IAC1F,OAAO,UAAU,CAAC;AACpB,CAAC"}

package/dist/src/matchers.d.ts CHANGED Viewed

@@ -15,6 +15,7 @@ export declare function renderLlmRubricPrompt(rubric: string, llmOutput: string,
 export declare function matchesLlmRubric(rubric: string, llmOutput: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
 export declare function matchesFactuality(input: string, expected: string, output: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
 export declare function matchesClosedQa(input: string, expected: string, output: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
+export declare function matchesGEval(criteria: string, input: string, output: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;
 export declare function matchesAnswerRelevance(input: string, output: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;
 export declare function matchesContextRecall(context: string, groundTruth: string, threshold: number, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
 export declare function matchesContextRelevance(question: string, context: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;

package/dist/src/matchers.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"matchers.d.ts","sourceRoot":"","sources":["../../src/matchers.ts"],"names":[],"mappings":"~~AAqBA~~,OAAO,KAAK,EAGV,WAAW,EAEX,aAAa,EACb,aAAa,EAIb,YAAY,EAEb,MAAM,SAAS,CAAC;AAiDjB,wBAAsB,kBAAkB,CACtC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,GAClC,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAqC7B;AAED,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,EACnC,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,WAAW,CAAC,CAmCtB;AAgBD,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,OAAe,EACxB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA8F3C;AAED;;;;;;;GAOG;AACH,wBAAsB,qBAAqB,CACzC,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoC3C;AAED,wBAAgB,qBAAqB,CACnC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,UAavC;AAED,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyD3C;AAED,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAwF3C;AAED,wBAAsB,eAAe,CACnC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoD3C;AAED,wBAAsB,sBAAsB,CAC1C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA6F3C;AAED,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0C3C;AAED,wBAAsB,uBAAuB,CAC3C,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyC3C;AAED,wBAAsB,0BAA0B,CAC9C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0E3C;AAED,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,EAAE,CAAC,CAyD7C;AAED,UAAU,sBAAsB;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB;AAED,wBAAsB,iBAAiB,CACrC,EAAE,UAAU,EAAE,iBAAiB,EAAE,UAAe,EAAE,EAAE,sBAAsB,EAC1E,OAAO,CAAC,EAAE,aAAa;;;;GAwDxB"}
1	+ {"version":3,"file":"matchers.d.ts","sourceRoot":"","sources":["../../src/matchers.ts"],"names":[],"mappings":"AAsBA,OAAO,KAAK,EAGV,WAAW,EAEX,aAAa,EACb,aAAa,EAIb,YAAY,EAEb,MAAM,SAAS,CAAC;AAiDjB,wBAAsB,kBAAkB,CACtC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,GAClC,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAqC7B;AAED,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,EACnC,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,WAAW,CAAC,CAmCtB;AAgBD,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,OAAe,EACxB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA8F3C;AAED;;;;;;;GAOG;AACH,wBAAsB,qBAAqB,CACzC,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoC3C;AAED,wBAAgB,qBAAqB,CACnC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,UAavC;AAED,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyD3C;AAED,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAwF3C;AAED,wBAAsB,eAAe,CACnC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoD3C;AAED,wBAAsB,YAAY,CAChC,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAmF3C;AAED,wBAAsB,sBAAsB,CAC1C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA6F3C;AAED,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0C3C;AAED,wBAAsB,uBAAuB,CAC3C,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyC3C;AAED,wBAAsB,0BAA0B,CAC9C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0E3C;AAED,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,EAAE,CAAC,CAyD7C;AAED,UAAU,sBAAsB;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB;AAED,wBAAsB,iBAAiB,CACrC,EAAE,UAAU,EAAE,iBAAiB,EAAE,UAAe,EAAE,EAAE,sBAAsB,EAC1E,OAAO,CAAC,EAAE,aAAa;;;;GAwDxB"}

package/dist/src/matchers.js CHANGED Viewed

@@ -11,12 +11,14 @@ exports.renderLlmRubricPrompt = renderLlmRubricPrompt;
 exports.matchesLlmRubric = matchesLlmRubric;
 exports.matchesFactuality = matchesFactuality;
 exports.matchesClosedQa = matchesClosedQa;
+exports.matchesGEval = matchesGEval;
 exports.matchesAnswerRelevance = matchesAnswerRelevance;
 exports.matchesContextRecall = matchesContextRecall;
 exports.matchesContextRelevance = matchesContextRelevance;
 exports.matchesContextFaithfulness = matchesContextFaithfulness;
 exports.matchesSelectBest = matchesSelectBest;
 exports.matchesModeration = matchesModeration;
+const dedent_1 = __importDefault(require("dedent"));
 const cliState_1 = __importDefault(require("./cliState"));
 const envars_1 = require("./envars");
 const logger_1 = __importDefault(require("./logger"));
@@ -428,6 +430,78 @@ async function matchesClosedQa(input, expected, output, grading, vars) {
         return fail(`Error parsing output: ${err.message}`, resp.tokenUsage);
     }
 }
+async function matchesGEval(criteria, input, output, threshold, grading) {
+    if (!input) {
+        throw Error('No source text to estimate reply');
+    }
+    const maxScore = 10;
+    const textProvider = await getAndCheckProvider('text', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).gradingProvider, 'reply geval check');
+    const promptSteps = (0, dedent_1.default) `
+    Given an evaluation criteria which outlines how you should judge some text, generate 3-4 concise evaluation steps for any text based on the criteria below.
+    Evaluation Criteria:
+    ${criteria}
+    **
+    IMPORTANT: Please make sure to only return in minified JSON format, with the "steps" key as a list of strings. No additional words, explanation or formatting is needed.
+    Example JSON:
+    {"steps": <list_of_strings>}
+    **
+    JSON:
+    `;
+    const respSteps = await textProvider.callApi(promptSteps);
+    let steps;
+    try {
+        // NOTE: use regexp for reliable, because sometimes LLM wraps response to markdown format ```json...```
+        steps = JSON.parse(respSteps.output.match(/\{"steps".+\}/g)[0]).steps;
+        if (!steps.length) {
+            return fail('LLM does not propose any evaluation step');
+        }
+    }
+    catch {
+        return fail(`LLM-proposed evaluation steps are not in JSON format: ${respSteps.output}`);
+    }
+    const promptText = (0, dedent_1.default) `
+    You will be given one Reply for a Source Text below. Your task is to rate the Reply on one metric.
+    Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
+    Evaluation Criteria:
+    ${criteria}
+    Evaluation Steps:
+    - ${steps.join('\n- ')}
+    - Given the evaluation steps, return a JSON with two keys: 1) a "score" key ranging from 0 - ${maxScore}, with ${maxScore} being that it follows the Evaluation Criteria outlined in the Evaluation Steps and 0 being that it does not; 2) a "reason" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Source Text and Reply in your reason, but be very concise with it!
+    Source Text:
+    ${input}
+    Reply:
+    ${output}
+    **
+    IMPORTANT: Please make sure to only return in minified JSON format, with the "score" and "reason" key. No additional words, explanation or formatting is needed.
+    Example JSON:
+    {"score":0,"reason":"The text does not follow the evaluation steps provided."}
+    **
+    JSON:
+    `;
+    const resp = await textProvider.callApi(promptText);
+    let result;
+    try {
+        result = JSON.parse(resp.output.match(/\{.+\}/g)[0]);
+    }
+    catch {
+        return fail(`LLM-proposed evaluation result is not in JSON format: ${resp.output}`);
+    }
+    return {
+        pass: result.score / maxScore >= threshold,
+        score: result.score / maxScore,
+        reason: result.reason,
+    };
+}
 async function matchesAnswerRelevance(input, output, threshold, grading) {
     const embeddingProvider = await getAndCheckProvider('embedding', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).embeddingProvider, 'answer relevancy check');
     const textProvider = await getAndCheckProvider('text', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).gradingProvider, 'answer relevancy check');