promptfoo 0.102.3 → 0.103.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +1 -1
- package/dist/src/app/assets/{index-isMhDyh1.js → index-hVGk-Oul.js} +276 -277
- package/dist/src/app/assets/{index.es-BNF9PUeN.js → index.es-CcK3JjZn.js} +1 -1
- package/dist/src/app/assets/{sync-DaNyhPy-.js → sync-BaigR5eq.js} +1 -1
- package/dist/src/app/index.html +1 -1
- package/dist/src/assertions/geval.d.ts +3 -0
- package/dist/src/assertions/geval.d.ts.map +1 -0
- package/dist/src/assertions/geval.js +39 -0
- package/dist/src/assertions/geval.js.map +1 -0
- package/dist/src/assertions/index.d.ts +1 -1
- package/dist/src/assertions/index.d.ts.map +1 -1
- package/dist/src/assertions/index.js +2 -0
- package/dist/src/assertions/index.js.map +1 -1
- package/dist/src/assertions/utils.d.ts +2 -2
- package/dist/src/database/tables.d.ts +12 -12
- package/dist/src/googleSheets.d.ts +7 -0
- package/dist/src/googleSheets.d.ts.map +1 -1
- package/dist/src/googleSheets.js +32 -1
- package/dist/src/googleSheets.js.map +1 -1
- package/dist/src/integrations/huggingfaceDatasets.d.ts +3 -0
- package/dist/src/integrations/huggingfaceDatasets.d.ts.map +1 -0
- package/dist/src/integrations/huggingfaceDatasets.js +87 -0
- package/dist/src/integrations/huggingfaceDatasets.js.map +1 -0
- package/dist/src/matchers.d.ts +1 -0
- package/dist/src/matchers.d.ts.map +1 -1
- package/dist/src/matchers.js +74 -0
- package/dist/src/matchers.js.map +1 -1
- package/dist/src/models/evalResult.d.ts.map +1 -1
- package/dist/src/models/evalResult.js +8 -1
- package/dist/src/models/evalResult.js.map +1 -1
- package/dist/src/providers/bedrock.d.ts.map +1 -1
- package/dist/src/providers/bedrock.js +10 -3
- package/dist/src/providers/bedrock.js.map +1 -1
- package/dist/src/providers/simulatedUser.d.ts.map +1 -1
- package/dist/src/providers/simulatedUser.js +5 -0
- package/dist/src/providers/simulatedUser.js.map +1 -1
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +4 -0
- package/dist/src/providers.js.map +1 -1
- package/dist/src/redteam/constants.d.ts +7 -3
- package/dist/src/redteam/constants.d.ts.map +1 -1
- package/dist/src/redteam/constants.js +27 -1
- package/dist/src/redteam/constants.js.map +1 -1
- package/dist/src/redteam/graders.d.ts.map +1 -1
- package/dist/src/redteam/graders.js +2 -0
- package/dist/src/redteam/graders.js.map +1 -1
- package/dist/src/redteam/index.d.ts +1 -1
- package/dist/src/redteam/index.d.ts.map +1 -1
- package/dist/src/redteam/index.js +10 -2
- package/dist/src/redteam/index.js.map +1 -1
- package/dist/src/redteam/plugins/beavertails.d.ts +17 -0
- package/dist/src/redteam/plugins/beavertails.d.ts.map +1 -0
- package/dist/src/redteam/plugins/beavertails.js +104 -0
- package/dist/src/redteam/plugins/beavertails.js.map +1 -0
- package/dist/src/redteam/plugins/index.d.ts.map +1 -1
- package/dist/src/redteam/plugins/index.js +2 -0
- package/dist/src/redteam/plugins/index.js.map +1 -1
- package/dist/src/redteam/plugins/intent.d.ts +6 -3
- package/dist/src/redteam/plugins/intent.d.ts.map +1 -1
- package/dist/src/redteam/plugins/intent.js +32 -13
- package/dist/src/redteam/plugins/intent.js.map +1 -1
- package/dist/src/redteam/plugins/pliny.d.ts.map +1 -1
- package/dist/src/redteam/plugins/pliny.js +1 -1
- package/dist/src/redteam/plugins/pliny.js.map +1 -1
- package/dist/src/redteam/providers/bestOfN.d.ts +16 -0
- package/dist/src/redteam/providers/bestOfN.d.ts.map +1 -0
- package/dist/src/redteam/providers/bestOfN.js +103 -0
- package/dist/src/redteam/providers/bestOfN.js.map +1 -0
- package/dist/src/redteam/providers/crescendo/index.d.ts.map +1 -1
- package/dist/src/redteam/providers/crescendo/index.js +20 -0
- package/dist/src/redteam/providers/crescendo/index.js.map +1 -1
- package/dist/src/redteam/providers/goat.d.ts.map +1 -1
- package/dist/src/redteam/providers/goat.js +7 -0
- package/dist/src/redteam/providers/goat.js.map +1 -1
- package/dist/src/redteam/providers/iterative.d.ts +3 -0
- package/dist/src/redteam/providers/iterative.d.ts.map +1 -1
- package/dist/src/redteam/providers/iterative.js +63 -26
- package/dist/src/redteam/providers/iterative.js.map +1 -1
- package/dist/src/redteam/providers/iterativeImage.d.ts.map +1 -1
- package/dist/src/redteam/providers/iterativeImage.js +17 -0
- package/dist/src/redteam/providers/iterativeImage.js.map +1 -1
- package/dist/src/redteam/providers/iterativeTree.d.ts +7 -17
- package/dist/src/redteam/providers/iterativeTree.d.ts.map +1 -1
- package/dist/src/redteam/providers/iterativeTree.js +59 -9
- package/dist/src/redteam/providers/iterativeTree.js.map +1 -1
- package/dist/src/redteam/providers/prompts.d.ts.map +1 -1
- package/dist/src/redteam/providers/prompts.js +25 -3
- package/dist/src/redteam/providers/prompts.js.map +1 -1
- package/dist/src/redteam/providers/shared.d.ts +1 -0
- package/dist/src/redteam/providers/shared.d.ts.map +1 -1
- package/dist/src/redteam/providers/shared.js +8 -0
- package/dist/src/redteam/providers/shared.js.map +1 -1
- package/dist/src/redteam/shared.d.ts.map +1 -1
- package/dist/src/redteam/shared.js +1 -0
- package/dist/src/redteam/shared.js.map +1 -1
- package/dist/src/redteam/strategies/bestOfN.d.ts +3 -0
- package/dist/src/redteam/strategies/bestOfN.d.ts.map +1 -0
- package/dist/src/redteam/strategies/bestOfN.js +35 -0
- package/dist/src/redteam/strategies/bestOfN.js.map +1 -0
- package/dist/src/redteam/strategies/index.d.ts.map +1 -1
- package/dist/src/redteam/strategies/index.js +22 -12
- package/dist/src/redteam/strategies/index.js.map +1 -1
- package/dist/src/redteam/util.d.ts.map +1 -1
- package/dist/src/redteam/util.js +14 -3
- package/dist/src/redteam/util.js.map +1 -1
- package/dist/src/server/routes/redteam.js +2 -1
- package/dist/src/server/routes/redteam.js.map +1 -1
- package/dist/src/telemetry.d.ts +6 -0
- package/dist/src/telemetry.d.ts.map +1 -1
- package/dist/src/telemetry.js +6 -0
- package/dist/src/telemetry.js.map +1 -1
- package/dist/src/testCases.d.ts.map +1 -1
- package/dist/src/testCases.js +7 -0
- package/dist/src/testCases.js.map +1 -1
- package/dist/src/types/index.d.ts +343 -343
- package/dist/src/types/index.d.ts.map +1 -1
- package/dist/src/types/index.js +1 -0
- package/dist/src/types/index.js.map +1 -1
- package/dist/src/util/index.d.ts +4 -4
- package/dist/src/validators/redteam.js +2 -2
- package/dist/src/validators/redteam.js.map +1 -1
- package/dist/test/factories/evalFactory.d.ts +8 -8
- package/dist/test/googleSheets.test.d.ts +2 -0
- package/dist/test/googleSheets.test.d.ts.map +1 -0
- package/dist/test/googleSheets.test.js +240 -0
- package/dist/test/googleSheets.test.js.map +1 -0
- package/dist/test/integrations/huggingfaceDatasets.test.d.ts +2 -0
- package/dist/test/integrations/huggingfaceDatasets.test.d.ts.map +1 -0
- package/dist/test/integrations/huggingfaceDatasets.test.js +147 -0
- package/dist/test/integrations/huggingfaceDatasets.test.js.map +1 -0
- package/dist/test/models/evalResult.test.d.ts +2 -0
- package/dist/test/models/evalResult.test.d.ts.map +1 -0
- package/dist/test/models/evalResult.test.js +217 -0
- package/dist/test/models/evalResult.test.js.map +1 -0
- package/dist/test/providers/bedrock.test.js +110 -0
- package/dist/test/providers/bedrock.test.js.map +1 -1
- package/dist/test/redteam/plugins/intent.test.d.ts +2 -0
- package/dist/test/redteam/plugins/intent.test.d.ts.map +1 -0
- package/dist/test/redteam/plugins/intent.test.js +172 -0
- package/dist/test/redteam/plugins/intent.test.js.map +1 -0
- package/dist/test/redteam/providers/iterative.test.d.ts +2 -0
- package/dist/test/redteam/providers/iterative.test.d.ts.map +1 -0
- package/dist/test/redteam/providers/iterative.test.js +61 -0
- package/dist/test/redteam/providers/iterative.test.js.map +1 -0
- package/dist/test/redteam/providers/iterativeTree.test.js +46 -8
- package/dist/test/redteam/providers/iterativeTree.test.js.map +1 -1
- package/dist/test/redteam/validators.test.js +2 -0
- package/dist/test/redteam/validators.test.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
|
@@ -539,7 +539,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
539
539
|
description?: string | undefined;
|
|
540
540
|
providerOutput?: string | {} | undefined;
|
|
541
541
|
assert?: ({
|
|
542
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
542
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
543
543
|
value?: import("../types").AssertionValue | undefined;
|
|
544
544
|
config?: Record<string, any> | undefined;
|
|
545
545
|
provider?: any;
|
|
@@ -554,7 +554,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
554
554
|
} | {
|
|
555
555
|
type: "assert-set";
|
|
556
556
|
assert: {
|
|
557
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
557
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
558
558
|
value?: import("../types").AssertionValue | undefined;
|
|
559
559
|
config?: Record<string, any> | undefined;
|
|
560
560
|
provider?: any;
|
|
@@ -681,7 +681,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
681
681
|
description?: string | undefined;
|
|
682
682
|
providerOutput?: string | {} | undefined;
|
|
683
683
|
assert?: ({
|
|
684
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
684
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
685
685
|
value?: import("../types").AssertionValue | undefined;
|
|
686
686
|
config?: Record<string, any> | undefined;
|
|
687
687
|
provider?: any;
|
|
@@ -696,7 +696,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
696
696
|
} | {
|
|
697
697
|
type: "assert-set";
|
|
698
698
|
assert: {
|
|
699
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
699
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
700
700
|
value?: import("../types").AssertionValue | undefined;
|
|
701
701
|
config?: Record<string, any> | undefined;
|
|
702
702
|
provider?: any;
|
|
@@ -820,7 +820,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
820
820
|
description?: string | undefined;
|
|
821
821
|
providerOutput?: string | {} | undefined;
|
|
822
822
|
assert?: ({
|
|
823
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
823
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
824
824
|
value?: import("../types").AssertionValue | undefined;
|
|
825
825
|
config?: Record<string, any> | undefined;
|
|
826
826
|
provider?: any;
|
|
@@ -835,7 +835,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
835
835
|
} | {
|
|
836
836
|
type: "assert-set";
|
|
837
837
|
assert: {
|
|
838
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
838
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
839
839
|
value?: import("../types").AssertionValue | undefined;
|
|
840
840
|
config?: Record<string, any> | undefined;
|
|
841
841
|
provider?: any;
|
|
@@ -960,7 +960,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
960
960
|
}) | undefined;
|
|
961
961
|
providerOutput?: string | {} | undefined;
|
|
962
962
|
assert?: ({
|
|
963
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
963
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
964
964
|
value?: import("../types").AssertionValue | undefined;
|
|
965
965
|
config?: Record<string, any> | undefined;
|
|
966
966
|
provider?: any;
|
|
@@ -975,7 +975,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
975
975
|
} | {
|
|
976
976
|
type: "assert-set";
|
|
977
977
|
assert: {
|
|
978
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
978
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
979
979
|
value?: import("../types").AssertionValue | undefined;
|
|
980
980
|
config?: Record<string, any> | undefined;
|
|
981
981
|
provider?: any;
|
|
@@ -1493,7 +1493,7 @@ export declare const evalResultsTable: import("drizzle-orm/sqlite-core").SQLiteT
|
|
|
1493
1493
|
description?: string | undefined;
|
|
1494
1494
|
providerOutput?: string | {} | undefined;
|
|
1495
1495
|
assert?: ({
|
|
1496
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1496
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1497
1497
|
value?: import("../types").AssertionValue | undefined;
|
|
1498
1498
|
config?: Record<string, any> | undefined;
|
|
1499
1499
|
provider?: any;
|
|
@@ -1508,7 +1508,7 @@ export declare const evalResultsTable: import("drizzle-orm/sqlite-core").SQLiteT
|
|
|
1508
1508
|
} | {
|
|
1509
1509
|
type: "assert-set";
|
|
1510
1510
|
assert: {
|
|
1511
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1511
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1512
1512
|
value?: import("../types").AssertionValue | undefined;
|
|
1513
1513
|
config?: Record<string, any> | undefined;
|
|
1514
1514
|
provider?: any;
|
|
@@ -1966,7 +1966,7 @@ export declare const datasetsTable: import("drizzle-orm/sqlite-core").SQLiteTabl
|
|
|
1966
1966
|
description?: string | undefined;
|
|
1967
1967
|
providerOutput?: string | {} | undefined;
|
|
1968
1968
|
assert?: ({
|
|
1969
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1969
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1970
1970
|
value?: import("../types").AssertionValue | undefined;
|
|
1971
1971
|
config?: Record<string, any> | undefined;
|
|
1972
1972
|
provider?: any;
|
|
@@ -1981,7 +1981,7 @@ export declare const datasetsTable: import("drizzle-orm/sqlite-core").SQLiteTabl
|
|
|
1981
1981
|
} | {
|
|
1982
1982
|
type: "assert-set";
|
|
1983
1983
|
assert: {
|
|
1984
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1984
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1985
1985
|
value?: import("../types").AssertionValue | undefined;
|
|
1986
1986
|
config?: Record<string, any> | undefined;
|
|
1987
1987
|
provider?: any;
|
|
@@ -1,4 +1,11 @@
|
|
|
1
1
|
import type { CsvRow } from './types';
|
|
2
|
+
export declare function checkGoogleSheetAccess(url: string): Promise<{
|
|
3
|
+
public: boolean;
|
|
4
|
+
status: number;
|
|
5
|
+
} | {
|
|
6
|
+
public: boolean;
|
|
7
|
+
status?: undefined;
|
|
8
|
+
}>;
|
|
2
9
|
export declare function fetchCsvFromGoogleSheetUnauthenticated(url: string): Promise<CsvRow[]>;
|
|
3
10
|
export declare function fetchCsvFromGoogleSheetAuthenticated(url: string): Promise<CsvRow[]>;
|
|
4
11
|
export declare function fetchCsvFromGoogleSheet(url: string): Promise<CsvRow[]>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"googleSheets.d.ts","sourceRoot":"","sources":["../../src/googleSheets.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"googleSheets.d.ts","sourceRoot":"","sources":["../../src/googleSheets.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAEtC,wBAAsB,sBAAsB,CAAC,GAAG,EAAE,MAAM;;;;;;GAYvD;AAED,wBAAsB,sCAAsC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAa3F;AAED,wBAAsB,oCAAoC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CA0CzF;AAED,wBAAsB,uBAAuB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAO5E;AAED,wBAAsB,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CA6DtF"}
|
package/dist/src/googleSheets.js
CHANGED
|
@@ -36,6 +36,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
36
36
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
37
|
};
|
|
38
38
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.checkGoogleSheetAccess = checkGoogleSheetAccess;
|
|
39
40
|
exports.fetchCsvFromGoogleSheetUnauthenticated = fetchCsvFromGoogleSheetUnauthenticated;
|
|
40
41
|
exports.fetchCsvFromGoogleSheetAuthenticated = fetchCsvFromGoogleSheetAuthenticated;
|
|
41
42
|
exports.fetchCsvFromGoogleSheet = fetchCsvFromGoogleSheet;
|
|
@@ -125,7 +126,37 @@ async function writeCsvToGoogleSheet(rows, url) {
|
|
|
125
126
|
throw new Error(`Invalid Google Sheets URL: ${url}`);
|
|
126
127
|
}
|
|
127
128
|
const spreadsheetId = match[1];
|
|
128
|
-
|
|
129
|
+
let range = 'A1:ZZZ';
|
|
130
|
+
const gid = Number(new URL(url).searchParams.get('gid'));
|
|
131
|
+
if (gid) {
|
|
132
|
+
const spreadsheet = await sheets.spreadsheets.get({ spreadsheetId, auth });
|
|
133
|
+
const sheetName = spreadsheet.data.sheets?.find((sheet) => sheet.properties?.sheetId === gid)
|
|
134
|
+
?.properties?.title;
|
|
135
|
+
if (!sheetName) {
|
|
136
|
+
throw new Error(`Sheet not found for gid: ${gid}`);
|
|
137
|
+
}
|
|
138
|
+
range = `${sheetName}!${range}`;
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
// Create a new sheet if no gid is provided
|
|
142
|
+
const newSheetTitle = `Sheet${Date.now()}`;
|
|
143
|
+
await sheets.spreadsheets.batchUpdate({
|
|
144
|
+
spreadsheetId,
|
|
145
|
+
auth,
|
|
146
|
+
requestBody: {
|
|
147
|
+
requests: [
|
|
148
|
+
{
|
|
149
|
+
addSheet: {
|
|
150
|
+
properties: {
|
|
151
|
+
title: newSheetTitle,
|
|
152
|
+
},
|
|
153
|
+
},
|
|
154
|
+
},
|
|
155
|
+
],
|
|
156
|
+
},
|
|
157
|
+
});
|
|
158
|
+
range = `${newSheetTitle}!${range}`;
|
|
159
|
+
}
|
|
129
160
|
// Extract headers from the first row
|
|
130
161
|
const headers = Object.keys(rows[0]);
|
|
131
162
|
// Convert rows to a 2D array
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"googleSheets.js","sourceRoot":"","sources":["../../src/googleSheets.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
1
|
+
{"version":3,"file":"googleSheets.js","sourceRoot":"","sources":["../../src/googleSheets.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAGA,wDAYC;AAED,wFAaC;AAED,oFA0CC;AAED,0DAOC;AAED,sDA6DC;AAlJD,sDAA8B;AAGvB,KAAK,UAAU,sBAAsB,CAAC,GAAW;IACtD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,QAAQ,CAAC,EAAE,EAAE,CAAC;YAChB,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC;QACnD,CAAC;aAAM,CAAC;YACN,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC;QACpD,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,gBAAM,CAAC,KAAK,CAAC,8BAA8B,EAAE,KAAK,CAAC,CAAC;QACpD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IAC3B,CAAC;AACH,CAAC;AAEM,KAAK,UAAU,sCAAsC,CAAC,GAAW;IACtE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,wDAAa,gBAAgB,GAAC,CAAC;IAC3D,MAAM,EAAE,cAAc,EAAE,GAAG,wDAAa,SAAS,GAAC,CAAC;IAEnD,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IACjD,MAAM,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,SAAS,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;IAE9F,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,MAAM,CAAC,CAAC;IAC9C,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;QAC5B,MAAM,IAAI,KAAK,CAAC,+CAA+C,GAAG,EAAE,CAAC,CAAC;IACxE,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IACtC,OAAO,QAAQ,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;AAC9C,CAAC;AAEM,KAAK,UAAU,oCAAoC,CAAC,GAAW;IACpE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,wDAAa,oBAAoB,GAAC,CAAC;IACtF,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC;QACrC,MAAM,EAAE,CAAC,uDAAuD,CAAC;KAClE,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACxC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,MAAM,aAAa,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAE/B,IAAI,KAAK,GAAG,QAAQ,CAAC;IACrB,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;IACzD,IAAI,GAAG,EAAE,CAAC;QACR,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,OAAO,KAAK,GAAG,CAAC;YAC3F,EAAE,UAAU,EAAE,KAAK,CAAC;QACtB,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,KAAK,GAAG,GAAG,SAAS,IAAI,KAAK,EAAE,CAAC;IAClC,CAAC;IACD,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAEtF,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;IAClC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,uCAAuC,GAAG,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,0CAA0C;IAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACxB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE/B,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC1B,MAAM,MAAM,GAAW,EAAE,CAAC;QAC1B,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;YAChC,MAAM,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;QACH,OAAO,MAAM,CAAC;IAChB,CAAC,CAAC,CAAC;AACL,CAAC;AAEM,KAAK,UAAU,uBAAuB,CAAC,GAAW;IACvD,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,sBAAsB,CAAC,GAAG,CAAC,CAAC;IAC/D,gBAAM,CAAC,KAAK,CAAC,sBAAsB,GAAG,eAAe,QAAQ,EAAE,CAAC,CAAC;IACjE,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,sCAAsC,CAAC,GAAG,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,oCAAoC,CAAC,GAAG,CAAC,CAAC;AACnD,CAAC;AAEM,KAAK,UAAU,qBAAqB,CAAC,IAAc,EAAE,GAAW;IACrE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,wDAAa,oBAAoB,GAAC,CAAC;IACtF,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC;QACrC,MAAM,EAAE,CAAC,8CAA8C,CAAC;KACzD,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACxC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,MAAM,aAAa,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAE/B,IAAI,KAAK,GAAG,QAAQ,CAAC;IACrB,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;IACzD,IAAI,GAAG,EAAE,CAAC;QACR,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,OAAO,KAAK,GAAG,CAAC;YAC3F,EAAE,UAAU,EAAE,KAAK,CAAC;QACtB,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,KAAK,GAAG,GAAG,SAAS,IAAI,KAAK,EAAE,CAAC;IAClC,CAAC;SAAM,CAAC;QACN,2CAA2C;QAC3C,MAAM,aAAa,GAAG,QAAQ,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;QAC3C,MAAM,MAAM,CAAC,YAAY,CAAC,WAAW,CAAC;YACpC,aAAa;YACb,IAAI;YACJ,WAAW,EAAE;gBACX,QAAQ,EAAE;oBACR;wBACE,QAAQ,EAAE;4BACR,UAAU,EAAE;gCACV,KAAK,EAAE,aAAa;6BACrB;yBACF;qBACF;iBACF;aACF;SACF,CAAC,CAAC;QACH,KAAK,GAAG,GAAG,aAAa,IAAI,KAAK,EAAE,CAAC;IACtC,CAAC;IAED,qCAAqC;IACrC,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAErC,6BAA6B;IAC7B,MAAM,MAAM,GAAG,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAErF,0BAA0B;IAC1B,gBAAM,CAAC,KAAK,CAAC,qCAAqC,GAAG,SAAS,MAAM,CAAC,MAAM,OAAO,CAAC,CAAC;IACpF,MAAM,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC;QACtC,aAAa;QACb,KAAK;QACL,gBAAgB,EAAE,cAAc;QAChC,IAAI;QACJ,WAAW,EAAE;YACX,MAAM;SACP;KACF,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"huggingfaceDatasets.d.ts","sourceRoot":"","sources":["../../../src/integrations/huggingfaceDatasets.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AA+CzC,wBAAsB,uBAAuB,CAC3C,WAAW,EAAE,MAAM,EACnB,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,QAAQ,EAAE,CAAC,CA6ErB"}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.fetchHuggingFaceDataset = fetchHuggingFaceDataset;
|
|
7
|
+
const fetch_1 = require("../fetch");
|
|
8
|
+
const logger_1 = __importDefault(require("../logger"));
|
|
9
|
+
function parseDatasetPath(path) {
|
|
10
|
+
// Remove the huggingface://datasets/ prefix and split into path and query
|
|
11
|
+
const [pathPart, queryPart] = path.replace('huggingface://datasets/', '').split('?');
|
|
12
|
+
const [owner, repo] = pathPart.split('/');
|
|
13
|
+
// Start with default parameters
|
|
14
|
+
const defaultParams = new URLSearchParams({
|
|
15
|
+
split: 'test',
|
|
16
|
+
config: 'default',
|
|
17
|
+
});
|
|
18
|
+
// Parse user query parameters
|
|
19
|
+
const userParams = new URLSearchParams(queryPart || '');
|
|
20
|
+
// Merge user params into defaults (user params override defaults)
|
|
21
|
+
const queryParams = new URLSearchParams();
|
|
22
|
+
for (const [key, value] of defaultParams) {
|
|
23
|
+
queryParams.set(key, value);
|
|
24
|
+
}
|
|
25
|
+
for (const [key, value] of userParams) {
|
|
26
|
+
queryParams.set(key, value);
|
|
27
|
+
}
|
|
28
|
+
return { owner, repo, queryParams };
|
|
29
|
+
}
|
|
30
|
+
async function fetchHuggingFaceDataset(datasetPath, limit) {
|
|
31
|
+
const baseUrl = 'https://datasets-server.huggingface.co/rows';
|
|
32
|
+
const { owner, repo, queryParams } = parseDatasetPath(datasetPath);
|
|
33
|
+
logger_1.default.info(`[Huggingface Dataset] Fetching dataset: ${owner}/${repo} ...`);
|
|
34
|
+
const tests = [];
|
|
35
|
+
let offset = 0;
|
|
36
|
+
const pageSize = 100; // Number of rows per request
|
|
37
|
+
const queryParamLimit = queryParams.get('limit');
|
|
38
|
+
const userLimit = limit ?? (queryParamLimit ? Number.parseInt(queryParamLimit, 10) : undefined);
|
|
39
|
+
while (true) {
|
|
40
|
+
// Create a new URLSearchParams for this request
|
|
41
|
+
const requestParams = new URLSearchParams(queryParams);
|
|
42
|
+
requestParams.set('offset', offset.toString());
|
|
43
|
+
requestParams.set('length', Math.min(pageSize, userLimit ? userLimit - offset : pageSize).toString());
|
|
44
|
+
const url = `${baseUrl}?dataset=${encodeURIComponent(`${owner}/${repo}`)}&${requestParams.toString()}`;
|
|
45
|
+
logger_1.default.debug(`[Huggingface Dataset] Fetching page from ${url}`);
|
|
46
|
+
const response = await (0, fetch_1.fetchWithProxy)(url);
|
|
47
|
+
if (!response.ok) {
|
|
48
|
+
const error = `[Huggingface Dataset] Failed to fetch dataset: ${response.statusText}.\nFetched ${url}`;
|
|
49
|
+
logger_1.default.error(error);
|
|
50
|
+
throw new Error(error);
|
|
51
|
+
}
|
|
52
|
+
const data = (await response.json());
|
|
53
|
+
logger_1.default.debug(`[Huggingface Dataset] Received ${data.rows.length} rows (total: ${data.num_rows_total})`);
|
|
54
|
+
if (offset === 0) {
|
|
55
|
+
// Log schema information on first request
|
|
56
|
+
logger_1.default.debug('[Huggingface Dataset] Dataset features:', data.features);
|
|
57
|
+
logger_1.default.debug('[Huggingface Dataset] Using query parameters:', Object.fromEntries(queryParams));
|
|
58
|
+
}
|
|
59
|
+
// Convert HuggingFace rows to test cases
|
|
60
|
+
for (const { row } of data.rows) {
|
|
61
|
+
const test = {
|
|
62
|
+
vars: {
|
|
63
|
+
...row,
|
|
64
|
+
},
|
|
65
|
+
};
|
|
66
|
+
tests.push(test);
|
|
67
|
+
}
|
|
68
|
+
logger_1.default.debug(`[Huggingface Dataset] Processed ${tests.length} total test cases so far`);
|
|
69
|
+
// Check if we've reached user's limit or end of dataset
|
|
70
|
+
if (userLimit && tests.length >= userLimit) {
|
|
71
|
+
logger_1.default.debug(`[Huggingface Dataset] Reached user-specified limit of ${userLimit}`);
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
// Check if we've fetched all rows
|
|
75
|
+
if (offset + data.rows.length >= data.num_rows_total) {
|
|
76
|
+
logger_1.default.debug('[Huggingface Dataset] Finished fetching all rows');
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
offset += data.rows.length;
|
|
80
|
+
logger_1.default.debug(`[Huggingface Dataset] Fetching next page starting at offset ${offset}`);
|
|
81
|
+
}
|
|
82
|
+
// If user specified a limit, ensure we don't return more than that
|
|
83
|
+
const finalTests = userLimit ? tests.slice(0, userLimit) : tests;
|
|
84
|
+
logger_1.default.debug(`[Huggingface Dataset] Successfully loaded ${finalTests.length} test cases`);
|
|
85
|
+
return finalTests;
|
|
86
|
+
}
|
|
87
|
+
//# sourceMappingURL=huggingfaceDatasets.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"huggingfaceDatasets.js","sourceRoot":"","sources":["../../../src/integrations/huggingfaceDatasets.ts"],"names":[],"mappings":";;;;;AAiDA,0DAgFC;AAjID,oCAA0C;AAC1C,uDAA+B;AAkB/B,SAAS,gBAAgB,CAAC,IAAY;IAKpC,0EAA0E;IAC1E,MAAM,CAAC,QAAQ,EAAE,SAAS,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,yBAAyB,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACrF,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAE1C,gCAAgC;IAChC,MAAM,aAAa,GAAG,IAAI,eAAe,CAAC;QACxC,KAAK,EAAE,MAAM;QACb,MAAM,EAAE,SAAS;KAClB,CAAC,CAAC;IAEH,8BAA8B;IAC9B,MAAM,UAAU,GAAG,IAAI,eAAe,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC;IAExD,kEAAkE;IAClE,MAAM,WAAW,GAAG,IAAI,eAAe,EAAE,CAAC;IAC1C,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,aAAa,EAAE,CAAC;QACzC,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC9B,CAAC;IACD,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,UAAU,EAAE,CAAC;QACtC,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC;AACtC,CAAC;AAEM,KAAK,UAAU,uBAAuB,CAC3C,WAAmB,EACnB,KAAc;IAEd,MAAM,OAAO,GAAG,6CAA6C,CAAC;IAC9D,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,GAAG,gBAAgB,CAAC,WAAW,CAAC,CAAC;IAEnE,gBAAM,CAAC,IAAI,CAAC,2CAA2C,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC;IAE5E,MAAM,KAAK,GAAe,EAAE,CAAC;IAC7B,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,QAAQ,GAAG,GAAG,CAAC,CAAC,6BAA6B;IACnD,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IACjD,MAAM,SAAS,GAAG,KAAK,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;IAChG,OAAO,IAAI,EAAE,CAAC;QACZ,gDAAgD;QAChD,MAAM,aAAa,GAAG,IAAI,eAAe,CAAC,WAAW,CAAC,CAAC;QACvD,aAAa,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QAC/C,aAAa,CAAC,GAAG,CACf,QAAQ,EACR,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,QAAQ,EAAE,CACzE,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,OAAO,YAAY,kBAAkB,CAAC,GAAG,KAAK,IAAI,IAAI,EAAE,CAAC,IAAI,aAAa,CAAC,QAAQ,EAAE,EAAE,CAAC;QACvG,gBAAM,CAAC,KAAK,CAAC,4CAA4C,GAAG,EAAE,CAAC,CAAC;QAEhE,MAAM,QAAQ,GAAG,MAAM,IAAA,sBAAc,EAAC,GAAG,CAAC,CAAC;QAC3C,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,KAAK,GAAG,kDAAkD,QAAQ,CAAC,UAAU,cAAc,GAAG,EAAE,CAAC;YACvG,gBAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC;QACzB,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAwB,CAAC;QAC5D,gBAAM,CAAC,KAAK,CACV,kCAAkC,IAAI,CAAC,IAAI,CAAC,MAAM,iBAAiB,IAAI,CAAC,cAAc,GAAG,CAC1F,CAAC;QAEF,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;YACjB,0CAA0C;YAC1C,gBAAM,CAAC,KAAK,CAAC,yCAAyC,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;YACvE,gBAAM,CAAC,KAAK,CACV,+CAA+C,EAC/C,MAAM,CAAC,WAAW,CAAC,WAAW,CAAC,CAChC,CAAC;QACJ,CAAC;QAED,yCAAyC;QACzC,KAAK,MAAM,EAAE,GAAG,EAAE,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YAChC,MAAM,IAAI,GAAa;gBACrB,IAAI,EAAE;oBACJ,GAAG,GAAG;iBACP;aACF,CAAC;YACF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;QAED,gBAAM,CAAC,KAAK,CAAC,mCAAmC,KAAK,CAAC,MAAM,0BAA0B,CAAC,CAAC;QAExF,wDAAwD;QACxD,IAAI,SAAS,IAAI,KAAK,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;YAC3C,gBAAM,CAAC,KAAK,CAAC,yDAAyD,SAAS,EAAE,CAAC,CAAC;YACnF,MAAM;QACR,CAAC;QAED,kCAAkC;QAClC,IAAI,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACrD,gBAAM,CAAC,KAAK,CAAC,kDAAkD,CAAC,CAAC;YACjE,MAAM;QACR,CAAC;QAED,MAAM,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;QAC3B,gBAAM,CAAC,KAAK,CAAC,+DAA+D,MAAM,EAAE,CAAC,CAAC;IACxF,CAAC;IAED,mEAAmE;IACnE,MAAM,UAAU,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IAEjE,gBAAM,CAAC,KAAK,CAAC,6CAA6C,UAAU,CAAC,MAAM,aAAa,CAAC,CAAC;IAC1F,OAAO,UAAU,CAAC;AACpB,CAAC"}
|
package/dist/src/matchers.d.ts
CHANGED
|
@@ -15,6 +15,7 @@ export declare function renderLlmRubricPrompt(rubric: string, llmOutput: string,
|
|
|
15
15
|
export declare function matchesLlmRubric(rubric: string, llmOutput: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
|
|
16
16
|
export declare function matchesFactuality(input: string, expected: string, output: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
|
|
17
17
|
export declare function matchesClosedQa(input: string, expected: string, output: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
|
|
18
|
+
export declare function matchesGEval(criteria: string, input: string, output: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;
|
|
18
19
|
export declare function matchesAnswerRelevance(input: string, output: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;
|
|
19
20
|
export declare function matchesContextRecall(context: string, groundTruth: string, threshold: number, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
|
|
20
21
|
export declare function matchesContextRelevance(question: string, context: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"matchers.d.ts","sourceRoot":"","sources":["../../src/matchers.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"matchers.d.ts","sourceRoot":"","sources":["../../src/matchers.ts"],"names":[],"mappings":"AAsBA,OAAO,KAAK,EAGV,WAAW,EAEX,aAAa,EACb,aAAa,EAIb,YAAY,EAEb,MAAM,SAAS,CAAC;AAiDjB,wBAAsB,kBAAkB,CACtC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,GAClC,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAqC7B;AAED,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,EACnC,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,WAAW,CAAC,CAmCtB;AAgBD,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,OAAe,EACxB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA8F3C;AAED;;;;;;;GAOG;AACH,wBAAsB,qBAAqB,CACzC,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoC3C;AAED,wBAAgB,qBAAqB,CACnC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,UAavC;AAED,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyD3C;AAED,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAwF3C;AAED,wBAAsB,eAAe,CACnC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoD3C;AAED,wBAAsB,YAAY,CAChC,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAmF3C;AAED,wBAAsB,sBAAsB,CAC1C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA6F3C;AAED,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0C3C;AAED,wBAAsB,uBAAuB,CAC3C,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyC3C;AAED,wBAAsB,0BAA0B,CAC9C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0E3C;AAED,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,EAAE,CAAC,CAyD7C;AAED,UAAU,sBAAsB;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB;AAED,wBAAsB,iBAAiB,CACrC,EAAE,UAAU,EAAE,iBAAiB,EAAE,UAAe,EAAE,EAAE,sBAAsB,EAC1E,OAAO,CAAC,EAAE,aAAa;;;;GAwDxB"}
|
package/dist/src/matchers.js
CHANGED
|
@@ -11,12 +11,14 @@ exports.renderLlmRubricPrompt = renderLlmRubricPrompt;
|
|
|
11
11
|
exports.matchesLlmRubric = matchesLlmRubric;
|
|
12
12
|
exports.matchesFactuality = matchesFactuality;
|
|
13
13
|
exports.matchesClosedQa = matchesClosedQa;
|
|
14
|
+
exports.matchesGEval = matchesGEval;
|
|
14
15
|
exports.matchesAnswerRelevance = matchesAnswerRelevance;
|
|
15
16
|
exports.matchesContextRecall = matchesContextRecall;
|
|
16
17
|
exports.matchesContextRelevance = matchesContextRelevance;
|
|
17
18
|
exports.matchesContextFaithfulness = matchesContextFaithfulness;
|
|
18
19
|
exports.matchesSelectBest = matchesSelectBest;
|
|
19
20
|
exports.matchesModeration = matchesModeration;
|
|
21
|
+
const dedent_1 = __importDefault(require("dedent"));
|
|
20
22
|
const cliState_1 = __importDefault(require("./cliState"));
|
|
21
23
|
const envars_1 = require("./envars");
|
|
22
24
|
const logger_1 = __importDefault(require("./logger"));
|
|
@@ -428,6 +430,78 @@ async function matchesClosedQa(input, expected, output, grading, vars) {
|
|
|
428
430
|
return fail(`Error parsing output: ${err.message}`, resp.tokenUsage);
|
|
429
431
|
}
|
|
430
432
|
}
|
|
433
|
+
async function matchesGEval(criteria, input, output, threshold, grading) {
|
|
434
|
+
if (!input) {
|
|
435
|
+
throw Error('No source text to estimate reply');
|
|
436
|
+
}
|
|
437
|
+
const maxScore = 10;
|
|
438
|
+
const textProvider = await getAndCheckProvider('text', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).gradingProvider, 'reply geval check');
|
|
439
|
+
const promptSteps = (0, dedent_1.default) `
|
|
440
|
+
Given an evaluation criteria which outlines how you should judge some text, generate 3-4 concise evaluation steps for any text based on the criteria below.
|
|
441
|
+
|
|
442
|
+
Evaluation Criteria:
|
|
443
|
+
${criteria}
|
|
444
|
+
|
|
445
|
+
**
|
|
446
|
+
IMPORTANT: Please make sure to only return in minified JSON format, with the "steps" key as a list of strings. No additional words, explanation or formatting is needed.
|
|
447
|
+
Example JSON:
|
|
448
|
+
{"steps": <list_of_strings>}
|
|
449
|
+
**
|
|
450
|
+
|
|
451
|
+
JSON:
|
|
452
|
+
`;
|
|
453
|
+
const respSteps = await textProvider.callApi(promptSteps);
|
|
454
|
+
let steps;
|
|
455
|
+
try {
|
|
456
|
+
// NOTE: use regexp for reliable, because sometimes LLM wraps response to markdown format ```json...```
|
|
457
|
+
steps = JSON.parse(respSteps.output.match(/\{"steps".+\}/g)[0]).steps;
|
|
458
|
+
if (!steps.length) {
|
|
459
|
+
return fail('LLM does not propose any evaluation step');
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
catch {
|
|
463
|
+
return fail(`LLM-proposed evaluation steps are not in JSON format: ${respSteps.output}`);
|
|
464
|
+
}
|
|
465
|
+
const promptText = (0, dedent_1.default) `
|
|
466
|
+
You will be given one Reply for a Source Text below. Your task is to rate the Reply on one metric.
|
|
467
|
+
Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
|
|
468
|
+
|
|
469
|
+
Evaluation Criteria:
|
|
470
|
+
${criteria}
|
|
471
|
+
|
|
472
|
+
Evaluation Steps:
|
|
473
|
+
- ${steps.join('\n- ')}
|
|
474
|
+
- Given the evaluation steps, return a JSON with two keys: 1) a "score" key ranging from 0 - ${maxScore}, with ${maxScore} being that it follows the Evaluation Criteria outlined in the Evaluation Steps and 0 being that it does not; 2) a "reason" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Source Text and Reply in your reason, but be very concise with it!
|
|
475
|
+
|
|
476
|
+
Source Text:
|
|
477
|
+
${input}
|
|
478
|
+
|
|
479
|
+
Reply:
|
|
480
|
+
${output}
|
|
481
|
+
|
|
482
|
+
**
|
|
483
|
+
IMPORTANT: Please make sure to only return in minified JSON format, with the "score" and "reason" key. No additional words, explanation or formatting is needed.
|
|
484
|
+
|
|
485
|
+
Example JSON:
|
|
486
|
+
{"score":0,"reason":"The text does not follow the evaluation steps provided."}
|
|
487
|
+
**
|
|
488
|
+
|
|
489
|
+
JSON:
|
|
490
|
+
`;
|
|
491
|
+
const resp = await textProvider.callApi(promptText);
|
|
492
|
+
let result;
|
|
493
|
+
try {
|
|
494
|
+
result = JSON.parse(resp.output.match(/\{.+\}/g)[0]);
|
|
495
|
+
}
|
|
496
|
+
catch {
|
|
497
|
+
return fail(`LLM-proposed evaluation result is not in JSON format: ${resp.output}`);
|
|
498
|
+
}
|
|
499
|
+
return {
|
|
500
|
+
pass: result.score / maxScore >= threshold,
|
|
501
|
+
score: result.score / maxScore,
|
|
502
|
+
reason: result.reason,
|
|
503
|
+
};
|
|
504
|
+
}
|
|
431
505
|
async function matchesAnswerRelevance(input, output, threshold, grading) {
|
|
432
506
|
const embeddingProvider = await getAndCheckProvider('embedding', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).embeddingProvider, 'answer relevancy check');
|
|
433
507
|
const textProvider = await getAndCheckProvider('text', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).gradingProvider, 'answer relevancy check');
|