promptfoo 0.102.2 → 0.102.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/package.json +13 -13
- package/dist/src/app/assets/{index-Ce_ypVwN.js → index-DVUcCcZX.js} +255 -255
- package/dist/src/app/assets/{index.es-BhCb3aAk.js → index.es-Bxgo-NgH.js} +1 -1
- package/dist/src/app/assets/{sync-BXsc6UV3.js → sync-BiF17zM_.js} +1 -1
- package/dist/src/app/index.html +1 -1
- package/dist/src/assertions/geval.d.ts +3 -0
- package/dist/src/assertions/geval.d.ts.map +1 -0
- package/dist/src/assertions/geval.js +39 -0
- package/dist/src/assertions/geval.js.map +1 -0
- package/dist/src/assertions/index.d.ts +1 -1
- package/dist/src/assertions/index.d.ts.map +1 -1
- package/dist/src/assertions/index.js +4 -0
- package/dist/src/assertions/index.js.map +1 -1
- package/dist/src/assertions/refusal.d.ts +3 -0
- package/dist/src/assertions/refusal.d.ts.map +1 -0
- package/dist/src/assertions/refusal.js +23 -0
- package/dist/src/assertions/refusal.js.map +1 -0
- package/dist/src/assertions/utils.d.ts +139 -1
- package/dist/src/assertions/utils.d.ts.map +1 -1
- package/dist/src/database/tables.d.ts +12 -12
- package/dist/src/matchers.d.ts +1 -0
- package/dist/src/matchers.d.ts.map +1 -1
- package/dist/src/matchers.js +74 -0
- package/dist/src/matchers.js.map +1 -1
- package/dist/src/models/evalResult.d.ts.map +1 -1
- package/dist/src/models/evalResult.js +8 -1
- package/dist/src/models/evalResult.js.map +1 -1
- package/dist/src/providers/bedrock.d.ts.map +1 -1
- package/dist/src/providers/bedrock.js +10 -3
- package/dist/src/providers/bedrock.js.map +1 -1
- package/dist/src/providers/shared.js +2 -2
- package/dist/src/providers/shared.js.map +1 -1
- package/dist/src/redteam/constants.d.ts +5 -1
- package/dist/src/redteam/constants.d.ts.map +1 -1
- package/dist/src/redteam/constants.js +21 -0
- package/dist/src/redteam/constants.js.map +1 -1
- package/dist/src/redteam/index.d.ts.map +1 -1
- package/dist/src/redteam/index.js +4 -0
- package/dist/src/redteam/index.js.map +1 -1
- package/dist/src/redteam/plugins/index.d.ts.map +1 -1
- package/dist/src/redteam/plugins/index.js +2 -0
- package/dist/src/redteam/plugins/index.js.map +1 -1
- package/dist/src/redteam/plugins/pliny.d.ts +9 -0
- package/dist/src/redteam/plugins/pliny.d.ts.map +1 -0
- package/dist/src/redteam/plugins/pliny.js +68 -0
- package/dist/src/redteam/plugins/pliny.js.map +1 -0
- package/dist/src/redteam/providers/crescendo/index.d.ts +4 -7
- package/dist/src/redteam/providers/crescendo/index.d.ts.map +1 -1
- package/dist/src/redteam/providers/crescendo/index.js +1 -1
- package/dist/src/redteam/providers/crescendo/index.js.map +1 -1
- package/dist/src/redteam/providers/goat.d.ts.map +1 -1
- package/dist/src/redteam/providers/goat.js +15 -10
- package/dist/src/redteam/providers/goat.js.map +1 -1
- package/dist/src/redteam/providers/shared.d.ts +5 -0
- package/dist/src/redteam/providers/shared.d.ts.map +1 -1
- package/dist/src/redteam/providers/shared.js +3 -1
- package/dist/src/redteam/providers/shared.js.map +1 -1
- package/dist/src/redteam/shared.d.ts.map +1 -1
- package/dist/src/redteam/shared.js +1 -0
- package/dist/src/redteam/shared.js.map +1 -1
- package/dist/src/redteam/util.d.ts.map +1 -1
- package/dist/src/redteam/util.js +19 -2
- package/dist/src/redteam/util.js.map +1 -1
- package/dist/src/server/routes/redteam.js +2 -1
- package/dist/src/server/routes/redteam.js.map +1 -1
- package/dist/src/types/index.d.ts +343 -343
- package/dist/src/types/index.d.ts.map +1 -1
- package/dist/src/types/index.js +2 -0
- package/dist/src/types/index.js.map +1 -1
- package/dist/src/util/index.d.ts +4 -4
- package/dist/src/validators/redteam.js +2 -2
- package/dist/src/validators/redteam.js.map +1 -1
- package/dist/test/factories/evalFactory.d.ts +8 -8
- package/dist/test/models/evalResult.test.d.ts +2 -0
- package/dist/test/models/evalResult.test.d.ts.map +1 -0
- package/dist/test/models/evalResult.test.js +217 -0
- package/dist/test/models/evalResult.test.js.map +1 -0
- package/dist/test/providers/bedrock.test.js +110 -0
- package/dist/test/providers/bedrock.test.js.map +1 -1
- package/dist/test/redteam/validators.test.js +2 -0
- package/dist/test/redteam/validators.test.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +13 -13
|
@@ -1,5 +1,143 @@
|
|
|
1
1
|
import { type Assertion, type TestCase } from '../types';
|
|
2
|
-
export declare function getFinalTest(test: TestCase, assertion: Assertion): Readonly<
|
|
2
|
+
export declare function getFinalTest(test: TestCase, assertion: Assertion): Readonly<{
|
|
3
|
+
options?: ({
|
|
4
|
+
prefix?: string | undefined;
|
|
5
|
+
suffix?: string | undefined;
|
|
6
|
+
} & {
|
|
7
|
+
transform?: string | undefined;
|
|
8
|
+
postprocess?: string | undefined;
|
|
9
|
+
transformVars?: string | undefined;
|
|
10
|
+
storeOutputAs?: string | undefined;
|
|
11
|
+
} & {
|
|
12
|
+
provider?: any;
|
|
13
|
+
rubricPrompt?: string | string[] | {
|
|
14
|
+
role: string;
|
|
15
|
+
content: string;
|
|
16
|
+
}[] | undefined;
|
|
17
|
+
factuality?: {
|
|
18
|
+
subset?: number | undefined;
|
|
19
|
+
superset?: number | undefined;
|
|
20
|
+
agree?: number | undefined;
|
|
21
|
+
disagree?: number | undefined;
|
|
22
|
+
differButFactual?: number | undefined;
|
|
23
|
+
} | undefined;
|
|
24
|
+
} & {
|
|
25
|
+
disableVarExpansion?: boolean | undefined;
|
|
26
|
+
disableConversationVar?: boolean | undefined;
|
|
27
|
+
runSerially?: boolean | undefined;
|
|
28
|
+
}) | undefined;
|
|
29
|
+
vars?: Record<string, string | any[] | string[] | {}> | undefined;
|
|
30
|
+
provider?: string | {
|
|
31
|
+
id?: string | undefined;
|
|
32
|
+
config?: any;
|
|
33
|
+
label?: string | undefined;
|
|
34
|
+
prompts?: string[] | undefined;
|
|
35
|
+
transform?: string | undefined;
|
|
36
|
+
delay?: number | undefined;
|
|
37
|
+
env?: {
|
|
38
|
+
PROMPTFOO_REMOTE_GENERATION_URL?: string | undefined;
|
|
39
|
+
AI21_API_BASE_URL?: string | undefined;
|
|
40
|
+
AI21_API_KEY?: string | undefined;
|
|
41
|
+
ANTHROPIC_API_KEY?: string | undefined;
|
|
42
|
+
AWS_BEDROCK_REGION?: string | undefined;
|
|
43
|
+
FAL_KEY?: string | undefined;
|
|
44
|
+
GROQ_API_KEY?: string | undefined;
|
|
45
|
+
LOCALAI_BASE_URL?: string | undefined;
|
|
46
|
+
WATSONX_AI_APIKEY?: string | undefined;
|
|
47
|
+
WATSONX_AI_PROJECT_ID?: string | undefined;
|
|
48
|
+
WATSONX_AI_BEARER_TOKEN?: string | undefined;
|
|
49
|
+
AZURE_CLIENT_SECRET?: string | undefined;
|
|
50
|
+
AZURE_CLIENT_ID?: string | undefined;
|
|
51
|
+
AZURE_TENANT_ID?: string | undefined;
|
|
52
|
+
AZURE_AUTHORITY_HOST?: string | undefined;
|
|
53
|
+
AZURE_TOKEN_SCOPE?: string | undefined;
|
|
54
|
+
AZURE_DEPLOYMENT_NAME?: string | undefined;
|
|
55
|
+
AZURE_EMBEDDING_DEPLOYMENT_NAME?: string | undefined;
|
|
56
|
+
AZURE_OPENAI_DEPLOYMENT_NAME?: string | undefined;
|
|
57
|
+
AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME?: string | undefined;
|
|
58
|
+
AZURE_OPENAI_API_BASE_URL?: string | undefined;
|
|
59
|
+
AZURE_OPENAI_API_HOST?: string | undefined;
|
|
60
|
+
AZURE_OPENAI_API_KEY?: string | undefined;
|
|
61
|
+
AZURE_API_BASE_URL?: string | undefined;
|
|
62
|
+
AZURE_API_HOST?: string | undefined;
|
|
63
|
+
AZURE_API_KEY?: string | undefined;
|
|
64
|
+
AZURE_OPENAI_BASE_URL?: string | undefined;
|
|
65
|
+
BAM_API_HOST?: string | undefined;
|
|
66
|
+
BAM_API_KEY?: string | undefined;
|
|
67
|
+
CLOUDFLARE_ACCOUNT_ID?: string | undefined;
|
|
68
|
+
CLOUDFLARE_API_KEY?: string | undefined;
|
|
69
|
+
COHERE_API_KEY?: string | undefined;
|
|
70
|
+
GOOGLE_API_HOST?: string | undefined;
|
|
71
|
+
GOOGLE_API_KEY?: string | undefined;
|
|
72
|
+
MISTRAL_API_BASE_URL?: string | undefined;
|
|
73
|
+
MISTRAL_API_HOST?: string | undefined;
|
|
74
|
+
MISTRAL_API_KEY?: string | undefined;
|
|
75
|
+
OPENAI_API_BASE_URL?: string | undefined;
|
|
76
|
+
OPENAI_API_HOST?: string | undefined;
|
|
77
|
+
OPENAI_API_KEY?: string | undefined;
|
|
78
|
+
OPENAI_BASE_URL?: string | undefined;
|
|
79
|
+
OPENAI_ORGANIZATION?: string | undefined;
|
|
80
|
+
PALM_API_HOST?: string | undefined;
|
|
81
|
+
PALM_API_KEY?: string | undefined;
|
|
82
|
+
REPLICATE_API_KEY?: string | undefined;
|
|
83
|
+
REPLICATE_API_TOKEN?: string | undefined;
|
|
84
|
+
VERTEX_API_HOST?: string | undefined;
|
|
85
|
+
VERTEX_API_KEY?: string | undefined;
|
|
86
|
+
VERTEX_PROJECT_ID?: string | undefined;
|
|
87
|
+
VERTEX_PUBLISHER?: string | undefined;
|
|
88
|
+
VERTEX_REGION?: string | undefined;
|
|
89
|
+
} | undefined;
|
|
90
|
+
} | {
|
|
91
|
+
callApi: import("../types").CallApiFunction;
|
|
92
|
+
id: (...args: unknown[]) => string;
|
|
93
|
+
config?: any;
|
|
94
|
+
label?: string | undefined;
|
|
95
|
+
transform?: string | undefined;
|
|
96
|
+
delay?: number | undefined;
|
|
97
|
+
callEmbeddingApi?: ((args_0: string, ...args: unknown[]) => Promise<import("../types").ProviderEmbeddingResponse>) | undefined;
|
|
98
|
+
callClassificationApi?: ((args_0: string, ...args: unknown[]) => Promise<import("../types").ProviderClassificationResponse>) | undefined;
|
|
99
|
+
} | undefined;
|
|
100
|
+
metadata?: (Record<string, any> & {
|
|
101
|
+
pluginConfig?: import("../types").PluginConfig | undefined;
|
|
102
|
+
strategyConfig?: import("../types").RedteamObjectConfig | undefined;
|
|
103
|
+
}) | undefined;
|
|
104
|
+
description?: string | undefined;
|
|
105
|
+
providerOutput?: string | {} | undefined;
|
|
106
|
+
assert?: ({
|
|
107
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
108
|
+
value?: import("../types").AssertionValue | undefined;
|
|
109
|
+
config?: Record<string, any> | undefined;
|
|
110
|
+
provider?: any;
|
|
111
|
+
transform?: string | undefined;
|
|
112
|
+
rubricPrompt?: string | string[] | {
|
|
113
|
+
role: string;
|
|
114
|
+
content: string;
|
|
115
|
+
}[] | undefined;
|
|
116
|
+
threshold?: number | undefined;
|
|
117
|
+
weight?: number | undefined;
|
|
118
|
+
metric?: string | undefined;
|
|
119
|
+
} | {
|
|
120
|
+
type: "assert-set";
|
|
121
|
+
assert: {
|
|
122
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
123
|
+
value?: import("../types").AssertionValue | undefined;
|
|
124
|
+
config?: Record<string, any> | undefined;
|
|
125
|
+
provider?: any;
|
|
126
|
+
transform?: string | undefined;
|
|
127
|
+
rubricPrompt?: string | string[] | {
|
|
128
|
+
role: string;
|
|
129
|
+
content: string;
|
|
130
|
+
}[] | undefined;
|
|
131
|
+
threshold?: number | undefined;
|
|
132
|
+
weight?: number | undefined;
|
|
133
|
+
metric?: string | undefined;
|
|
134
|
+
}[];
|
|
135
|
+
threshold?: number | undefined;
|
|
136
|
+
weight?: number | undefined;
|
|
137
|
+
metric?: string | undefined;
|
|
138
|
+
})[] | undefined;
|
|
139
|
+
threshold?: number | undefined;
|
|
140
|
+
}>;
|
|
3
141
|
export declare function processFileReference(fileRef: string): object | string;
|
|
4
142
|
export declare function coerceString(value: string | object): string;
|
|
5
143
|
//# sourceMappingURL=utils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/assertions/utils.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,KAAK,SAAS,EAAE,KAAK,QAAQ,EAAE,MAAM,UAAU,CAAC;AAIzD,wBAAgB,YAAY,CAAC,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/assertions/utils.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,KAAK,SAAS,EAAE,KAAK,QAAQ,EAAE,MAAM,UAAU,CAAC;AAIzD,wBAAgB,YAAY,CAAC,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAUhE;AAED,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAYrE;AAED,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAK3D"}
|
|
@@ -539,7 +539,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
539
539
|
description?: string | undefined;
|
|
540
540
|
providerOutput?: string | {} | undefined;
|
|
541
541
|
assert?: ({
|
|
542
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
542
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
543
543
|
value?: import("../types").AssertionValue | undefined;
|
|
544
544
|
config?: Record<string, any> | undefined;
|
|
545
545
|
provider?: any;
|
|
@@ -554,7 +554,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
554
554
|
} | {
|
|
555
555
|
type: "assert-set";
|
|
556
556
|
assert: {
|
|
557
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
557
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
558
558
|
value?: import("../types").AssertionValue | undefined;
|
|
559
559
|
config?: Record<string, any> | undefined;
|
|
560
560
|
provider?: any;
|
|
@@ -681,7 +681,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
681
681
|
description?: string | undefined;
|
|
682
682
|
providerOutput?: string | {} | undefined;
|
|
683
683
|
assert?: ({
|
|
684
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
684
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
685
685
|
value?: import("../types").AssertionValue | undefined;
|
|
686
686
|
config?: Record<string, any> | undefined;
|
|
687
687
|
provider?: any;
|
|
@@ -696,7 +696,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
696
696
|
} | {
|
|
697
697
|
type: "assert-set";
|
|
698
698
|
assert: {
|
|
699
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
699
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
700
700
|
value?: import("../types").AssertionValue | undefined;
|
|
701
701
|
config?: Record<string, any> | undefined;
|
|
702
702
|
provider?: any;
|
|
@@ -820,7 +820,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
820
820
|
description?: string | undefined;
|
|
821
821
|
providerOutput?: string | {} | undefined;
|
|
822
822
|
assert?: ({
|
|
823
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
823
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
824
824
|
value?: import("../types").AssertionValue | undefined;
|
|
825
825
|
config?: Record<string, any> | undefined;
|
|
826
826
|
provider?: any;
|
|
@@ -835,7 +835,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
835
835
|
} | {
|
|
836
836
|
type: "assert-set";
|
|
837
837
|
assert: {
|
|
838
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
838
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
839
839
|
value?: import("../types").AssertionValue | undefined;
|
|
840
840
|
config?: Record<string, any> | undefined;
|
|
841
841
|
provider?: any;
|
|
@@ -960,7 +960,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
960
960
|
}) | undefined;
|
|
961
961
|
providerOutput?: string | {} | undefined;
|
|
962
962
|
assert?: ({
|
|
963
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
963
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
964
964
|
value?: import("../types").AssertionValue | undefined;
|
|
965
965
|
config?: Record<string, any> | undefined;
|
|
966
966
|
provider?: any;
|
|
@@ -975,7 +975,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
|
|
|
975
975
|
} | {
|
|
976
976
|
type: "assert-set";
|
|
977
977
|
assert: {
|
|
978
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
978
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
979
979
|
value?: import("../types").AssertionValue | undefined;
|
|
980
980
|
config?: Record<string, any> | undefined;
|
|
981
981
|
provider?: any;
|
|
@@ -1493,7 +1493,7 @@ export declare const evalResultsTable: import("drizzle-orm/sqlite-core").SQLiteT
|
|
|
1493
1493
|
description?: string | undefined;
|
|
1494
1494
|
providerOutput?: string | {} | undefined;
|
|
1495
1495
|
assert?: ({
|
|
1496
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1496
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1497
1497
|
value?: import("../types").AssertionValue | undefined;
|
|
1498
1498
|
config?: Record<string, any> | undefined;
|
|
1499
1499
|
provider?: any;
|
|
@@ -1508,7 +1508,7 @@ export declare const evalResultsTable: import("drizzle-orm/sqlite-core").SQLiteT
|
|
|
1508
1508
|
} | {
|
|
1509
1509
|
type: "assert-set";
|
|
1510
1510
|
assert: {
|
|
1511
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1511
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1512
1512
|
value?: import("../types").AssertionValue | undefined;
|
|
1513
1513
|
config?: Record<string, any> | undefined;
|
|
1514
1514
|
provider?: any;
|
|
@@ -1966,7 +1966,7 @@ export declare const datasetsTable: import("drizzle-orm/sqlite-core").SQLiteTabl
|
|
|
1966
1966
|
description?: string | undefined;
|
|
1967
1967
|
providerOutput?: string | {} | undefined;
|
|
1968
1968
|
assert?: ({
|
|
1969
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1969
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1970
1970
|
value?: import("../types").AssertionValue | undefined;
|
|
1971
1971
|
config?: Record<string, any> | undefined;
|
|
1972
1972
|
provider?: any;
|
|
@@ -1981,7 +1981,7 @@ export declare const datasetsTable: import("drizzle-orm/sqlite-core").SQLiteTabl
|
|
|
1981
1981
|
} | {
|
|
1982
1982
|
type: "assert-set";
|
|
1983
1983
|
assert: {
|
|
1984
|
-
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1984
|
+
type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
|
|
1985
1985
|
value?: import("../types").AssertionValue | undefined;
|
|
1986
1986
|
config?: Record<string, any> | undefined;
|
|
1987
1987
|
provider?: any;
|
package/dist/src/matchers.d.ts
CHANGED
|
@@ -15,6 +15,7 @@ export declare function renderLlmRubricPrompt(rubric: string, llmOutput: string,
|
|
|
15
15
|
export declare function matchesLlmRubric(rubric: string, llmOutput: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
|
|
16
16
|
export declare function matchesFactuality(input: string, expected: string, output: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
|
|
17
17
|
export declare function matchesClosedQa(input: string, expected: string, output: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
|
|
18
|
+
export declare function matchesGEval(criteria: string, input: string, output: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;
|
|
18
19
|
export declare function matchesAnswerRelevance(input: string, output: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;
|
|
19
20
|
export declare function matchesContextRecall(context: string, groundTruth: string, threshold: number, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
|
|
20
21
|
export declare function matchesContextRelevance(question: string, context: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"matchers.d.ts","sourceRoot":"","sources":["../../src/matchers.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"matchers.d.ts","sourceRoot":"","sources":["../../src/matchers.ts"],"names":[],"mappings":"AAsBA,OAAO,KAAK,EAGV,WAAW,EAEX,aAAa,EACb,aAAa,EAIb,YAAY,EAEb,MAAM,SAAS,CAAC;AAiDjB,wBAAsB,kBAAkB,CACtC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,GAClC,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAqC7B;AAED,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,EACnC,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,WAAW,CAAC,CAmCtB;AAgBD,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,OAAe,EACxB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA8F3C;AAED;;;;;;;GAOG;AACH,wBAAsB,qBAAqB,CACzC,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoC3C;AAED,wBAAgB,qBAAqB,CACnC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,UAavC;AAED,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyD3C;AAED,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAwF3C;AAED,wBAAsB,eAAe,CACnC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoD3C;AAED,wBAAsB,YAAY,CAChC,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAmF3C;AAED,wBAAsB,sBAAsB,CAC1C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA6F3C;AAED,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0C3C;AAED,wBAAsB,uBAAuB,CAC3C,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyC3C;AAED,wBAAsB,0BAA0B,CAC9C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0E3C;AAED,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,EAAE,CAAC,CAyD7C;AAED,UAAU,sBAAsB;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB;AAED,wBAAsB,iBAAiB,CACrC,EAAE,UAAU,EAAE,iBAAiB,EAAE,UAAe,EAAE,EAAE,sBAAsB,EAC1E,OAAO,CAAC,EAAE,aAAa;;;;GAwDxB"}
|
package/dist/src/matchers.js
CHANGED
|
@@ -11,12 +11,14 @@ exports.renderLlmRubricPrompt = renderLlmRubricPrompt;
|
|
|
11
11
|
exports.matchesLlmRubric = matchesLlmRubric;
|
|
12
12
|
exports.matchesFactuality = matchesFactuality;
|
|
13
13
|
exports.matchesClosedQa = matchesClosedQa;
|
|
14
|
+
exports.matchesGEval = matchesGEval;
|
|
14
15
|
exports.matchesAnswerRelevance = matchesAnswerRelevance;
|
|
15
16
|
exports.matchesContextRecall = matchesContextRecall;
|
|
16
17
|
exports.matchesContextRelevance = matchesContextRelevance;
|
|
17
18
|
exports.matchesContextFaithfulness = matchesContextFaithfulness;
|
|
18
19
|
exports.matchesSelectBest = matchesSelectBest;
|
|
19
20
|
exports.matchesModeration = matchesModeration;
|
|
21
|
+
const dedent_1 = __importDefault(require("dedent"));
|
|
20
22
|
const cliState_1 = __importDefault(require("./cliState"));
|
|
21
23
|
const envars_1 = require("./envars");
|
|
22
24
|
const logger_1 = __importDefault(require("./logger"));
|
|
@@ -428,6 +430,78 @@ async function matchesClosedQa(input, expected, output, grading, vars) {
|
|
|
428
430
|
return fail(`Error parsing output: ${err.message}`, resp.tokenUsage);
|
|
429
431
|
}
|
|
430
432
|
}
|
|
433
|
+
async function matchesGEval(criteria, input, output, threshold, grading) {
|
|
434
|
+
if (!input) {
|
|
435
|
+
throw Error('No source text to estimate reply');
|
|
436
|
+
}
|
|
437
|
+
const maxScore = 10;
|
|
438
|
+
const textProvider = await getAndCheckProvider('text', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).gradingProvider, 'reply geval check');
|
|
439
|
+
const promptSteps = (0, dedent_1.default) `
|
|
440
|
+
Given an evaluation criteria which outlines how you should judge some text, generate 3-4 concise evaluation steps for any text based on the criteria below.
|
|
441
|
+
|
|
442
|
+
Evaluation Criteria:
|
|
443
|
+
${criteria}
|
|
444
|
+
|
|
445
|
+
**
|
|
446
|
+
IMPORTANT: Please make sure to only return in minified JSON format, with the "steps" key as a list of strings. No additional words, explanation or formatting is needed.
|
|
447
|
+
Example JSON:
|
|
448
|
+
{"steps": <list_of_strings>}
|
|
449
|
+
**
|
|
450
|
+
|
|
451
|
+
JSON:
|
|
452
|
+
`;
|
|
453
|
+
const respSteps = await textProvider.callApi(promptSteps);
|
|
454
|
+
let steps;
|
|
455
|
+
try {
|
|
456
|
+
// NOTE: use regexp for reliable, because sometimes LLM wraps response to markdown format ```json...```
|
|
457
|
+
steps = JSON.parse(respSteps.output.match(/\{"steps".+\}/g)[0]).steps;
|
|
458
|
+
if (!steps.length) {
|
|
459
|
+
return fail('LLM does not propose any evaluation step');
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
catch {
|
|
463
|
+
return fail(`LLM-proposed evaluation steps are not in JSON format: ${respSteps.output}`);
|
|
464
|
+
}
|
|
465
|
+
const promptText = (0, dedent_1.default) `
|
|
466
|
+
You will be given one Reply for a Source Text below. Your task is to rate the Reply on one metric.
|
|
467
|
+
Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
|
|
468
|
+
|
|
469
|
+
Evaluation Criteria:
|
|
470
|
+
${criteria}
|
|
471
|
+
|
|
472
|
+
Evaluation Steps:
|
|
473
|
+
- ${steps.join('\n- ')}
|
|
474
|
+
- Given the evaluation steps, return a JSON with two keys: 1) a "score" key ranging from 0 - ${maxScore}, with ${maxScore} being that it follows the Evaluation Criteria outlined in the Evaluation Steps and 0 being that it does not; 2) a "reason" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Source Text and Reply in your reason, but be very concise with it!
|
|
475
|
+
|
|
476
|
+
Source Text:
|
|
477
|
+
${input}
|
|
478
|
+
|
|
479
|
+
Reply:
|
|
480
|
+
${output}
|
|
481
|
+
|
|
482
|
+
**
|
|
483
|
+
IMPORTANT: Please make sure to only return in minified JSON format, with the "score" and "reason" key. No additional words, explanation or formatting is needed.
|
|
484
|
+
|
|
485
|
+
Example JSON:
|
|
486
|
+
{"score":0,"reason":"The text does not follow the evaluation steps provided."}
|
|
487
|
+
**
|
|
488
|
+
|
|
489
|
+
JSON:
|
|
490
|
+
`;
|
|
491
|
+
const resp = await textProvider.callApi(promptText);
|
|
492
|
+
let result;
|
|
493
|
+
try {
|
|
494
|
+
result = JSON.parse(resp.output.match(/\{.+\}/g)[0]);
|
|
495
|
+
}
|
|
496
|
+
catch {
|
|
497
|
+
return fail(`LLM-proposed evaluation result is not in JSON format: ${resp.output}`);
|
|
498
|
+
}
|
|
499
|
+
return {
|
|
500
|
+
pass: result.score / maxScore >= threshold,
|
|
501
|
+
score: result.score / maxScore,
|
|
502
|
+
reason: result.reason,
|
|
503
|
+
};
|
|
504
|
+
}
|
|
431
505
|
async function matchesAnswerRelevance(input, output, threshold, grading) {
|
|
432
506
|
const embeddingProvider = await getAndCheckProvider('embedding', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).embeddingProvider, 'answer relevancy check');
|
|
433
507
|
const textProvider = await getAndCheckProvider('text', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).gradingProvider, 'answer relevancy check');
|