promptfoo 0.119.13 → 0.119.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +28 -26
- package/dist/src/app/assets/index-eJ2lMe94.js +51 -0
- package/dist/src/app/assets/{source-map-support-Bnh0UQ2S.js → source-map-support-1v4oeb7P.js} +1 -1
- package/dist/src/app/assets/sync-CtLQRuC1.js +1 -0
- package/dist/src/app/assets/{vendor-charts-T60Uk0Z3.js → vendor-charts-DnVv66VV.js} +1 -1
- package/dist/src/app/assets/{vendor-markdown-DLig-KJh.js → vendor-markdown-DCpQIyMA.js} +1 -1
- package/dist/src/app/assets/{vendor-mui-core-5BLaiG3c.js → vendor-mui-core-Boqnpf9f.js} +1 -1
- package/dist/src/app/assets/{vendor-mui-icons-fn39Fu2e.js → vendor-mui-icons-B8MqoVbj.js} +1 -1
- package/dist/src/app/assets/vendor-mui-x-CGSS6QHF.js +45 -0
- package/dist/src/app/assets/{vendor-utils-DYBMEuwX.js → vendor-utils-DdfHIEy8.js} +1 -1
- package/dist/src/app/index.html +7 -7
- package/dist/src/assertions/guardrails.d.ts +1 -1
- package/dist/src/assertions/guardrails.js +18 -9
- package/dist/src/assertions/index.d.ts +1 -1
- package/dist/src/assertions/index.js +9 -3
- package/dist/src/assertions/searchRubric.d.ts +3 -0
- package/dist/src/assertions/searchRubric.js +18 -0
- package/dist/src/commands/eval.js +1 -1
- package/dist/src/commands/modelScan.d.ts +7 -1
- package/dist/src/commands/modelScan.js +121 -59
- package/dist/src/database/index.d.ts +6 -0
- package/dist/src/database/index.js +11 -0
- package/dist/src/database/tables.d.ts +46 -24
- package/dist/src/envars.d.ts +17 -0
- package/dist/src/generated/constants.js +1 -1
- package/dist/src/logger.d.ts +5 -0
- package/dist/src/logger.js +28 -0
- package/dist/src/main.js +17 -6
- package/dist/src/matchers.d.ts +1 -0
- package/dist/src/matchers.js +80 -0
- package/dist/src/models/eval.d.ts +2 -1
- package/dist/src/models/eval.js +44 -2
- package/dist/src/prompts/grading.d.ts +1 -0
- package/dist/src/prompts/grading.js +26 -1
- package/dist/src/prompts/index.d.ts +1 -0
- package/dist/src/prompts/index.js +4 -1
- package/dist/src/providers/adaline.gateway.js +2 -2
- package/dist/src/providers/anthropic/defaults.d.ts +1 -1
- package/dist/src/providers/anthropic/defaults.js +15 -0
- package/dist/src/providers/azure/chat.d.ts +3 -1
- package/dist/src/providers/azure/chat.js +16 -3
- package/dist/src/providers/azure/defaults.js +660 -141
- package/dist/src/providers/azure/responses.d.ts +5 -0
- package/dist/src/providers/azure/responses.js +33 -4
- package/dist/src/providers/azure/types.d.ts +4 -0
- package/dist/src/providers/bedrock/agents.d.ts +1 -1
- package/dist/src/providers/bedrock/agents.js +2 -2
- package/dist/src/providers/bedrock/base.d.ts +40 -0
- package/dist/src/providers/bedrock/base.js +171 -0
- package/dist/src/providers/bedrock/converse.d.ts +146 -0
- package/dist/src/providers/bedrock/converse.js +1044 -0
- package/dist/src/providers/bedrock/index.d.ts +1 -34
- package/dist/src/providers/bedrock/index.js +4 -159
- package/dist/src/providers/bedrock/knowledgeBase.d.ts +1 -1
- package/dist/src/providers/bedrock/knowledgeBase.js +2 -2
- package/dist/src/providers/bedrock/nova-sonic.d.ts +2 -1
- package/dist/src/providers/bedrock/nova-sonic.js +2 -2
- package/dist/src/providers/claude-agent-sdk.d.ts +58 -1
- package/dist/src/providers/claude-agent-sdk.js +22 -1
- package/dist/src/providers/defaults.js +4 -0
- package/dist/src/providers/github/defaults.js +6 -6
- package/dist/src/providers/google/types.d.ts +25 -0
- package/dist/src/providers/google/util.d.ts +2 -0
- package/dist/src/providers/google/vertex.js +78 -22
- package/dist/src/providers/{groq.d.ts → groq/chat.d.ts} +26 -20
- package/dist/src/providers/groq/chat.js +79 -0
- package/dist/src/providers/groq/index.d.ts +5 -0
- package/dist/src/providers/groq/index.js +24 -0
- package/dist/src/providers/groq/responses.d.ts +106 -0
- package/dist/src/providers/groq/responses.js +64 -0
- package/dist/src/providers/groq/types.d.ts +44 -0
- package/dist/src/providers/groq/types.js +3 -0
- package/dist/src/providers/groq/util.d.ts +15 -0
- package/dist/src/providers/groq/util.js +28 -0
- package/dist/src/providers/mcp/client.d.ts +8 -0
- package/dist/src/providers/mcp/client.js +60 -10
- package/dist/src/providers/mcp/types.d.ts +21 -0
- package/dist/src/providers/openai/chatkit-pool.d.ts +114 -0
- package/dist/src/providers/openai/chatkit-pool.js +548 -0
- package/dist/src/providers/openai/chatkit-types.d.ts +73 -0
- package/dist/src/providers/openai/chatkit-types.js +3 -0
- package/dist/src/providers/openai/chatkit.d.ts +76 -0
- package/dist/src/providers/openai/chatkit.js +879 -0
- package/dist/src/providers/openai/codex-sdk.d.ts +109 -0
- package/dist/src/providers/openai/codex-sdk.js +346 -0
- package/dist/src/providers/openai/defaults.d.ts +2 -0
- package/dist/src/providers/openai/defaults.js +10 -4
- package/dist/src/providers/registry.js +48 -9
- package/dist/src/providers/responses/types.d.ts +1 -1
- package/dist/src/providers/sagemaker.d.ts +2 -2
- package/dist/src/providers/webSearchUtils.d.ts +17 -0
- package/dist/src/providers/webSearchUtils.js +169 -0
- package/dist/src/providers/xai/chat.d.ts +61 -0
- package/dist/src/providers/xai/chat.js +68 -3
- package/dist/src/providers/xai/responses.d.ts +189 -0
- package/dist/src/providers/xai/responses.js +268 -0
- package/dist/src/redteam/constants/plugins.d.ts +1 -1
- package/dist/src/redteam/constants/plugins.js +1 -1
- package/dist/src/redteam/constants/strategies.d.ts +1 -1
- package/dist/src/redteam/constants/strategies.js +1 -0
- package/dist/src/redteam/plugins/vlguard.d.ts +53 -4
- package/dist/src/redteam/plugins/vlguard.js +362 -46
- package/dist/src/redteam/providers/constants.d.ts +2 -2
- package/dist/src/redteam/providers/constants.js +2 -2
- package/dist/src/redteam/providers/crescendo/index.d.ts +1 -1
- package/dist/src/redteam/providers/crescendo/index.js +5 -3
- package/dist/src/redteam/providers/hydra/index.js +1 -1
- package/dist/src/server/routes/modelAudit.js +4 -4
- package/dist/src/share.js +4 -2
- package/dist/src/telemetry.js +44 -8
- package/dist/src/types/env.d.ts +3 -0
- package/dist/src/types/env.js +1 -0
- package/dist/src/types/index.d.ts +896 -615
- package/dist/src/types/index.js +1 -0
- package/dist/src/types/providers.d.ts +1 -0
- package/dist/src/types/tracing.d.ts +3 -0
- package/dist/src/util/database.d.ts +6 -4
- package/dist/src/util/file.js +6 -4
- package/dist/src/util/modelAuditCliParser.d.ts +4 -4
- package/dist/src/util/xlsx.js +52 -26
- package/dist/src/validators/providers.d.ts +142 -122
- package/dist/src/validators/providers.js +4 -6
- package/dist/src/validators/redteam.d.ts +36 -28
- package/dist/src/validators/redteam.js +9 -3
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +28 -26
- package/dist/drizzle/CLAUDE.md +0 -65
- package/dist/src/app/assets/index-DifT6VGT.js +0 -51
- package/dist/src/app/assets/sync-Oo-W_Rbj.js +0 -1
- package/dist/src/app/assets/vendor-mui-x-C2xF-yiO.js +0 -45
- package/dist/src/providers/groq.js +0 -48
package/dist/src/app/index.html
CHANGED
|
@@ -7,14 +7,14 @@
|
|
|
7
7
|
<title>promptfoo</title>
|
|
8
8
|
<meta name="description" content="LLM testing and evaluation" />
|
|
9
9
|
<meta property="og:image" content="https://www.promptfoo.dev/img/thumbnail.png" />
|
|
10
|
-
<script type="module" crossorigin src="/assets/index-
|
|
10
|
+
<script type="module" crossorigin src="/assets/index-eJ2lMe94.js"></script>
|
|
11
11
|
<link rel="modulepreload" crossorigin href="/assets/vendor-react-BuO7LJGJ.js">
|
|
12
|
-
<link rel="modulepreload" crossorigin href="/assets/vendor-mui-core-
|
|
13
|
-
<link rel="modulepreload" crossorigin href="/assets/vendor-mui-icons-
|
|
14
|
-
<link rel="modulepreload" crossorigin href="/assets/vendor-mui-x-
|
|
15
|
-
<link rel="modulepreload" crossorigin href="/assets/vendor-charts-
|
|
16
|
-
<link rel="modulepreload" crossorigin href="/assets/vendor-utils-
|
|
17
|
-
<link rel="modulepreload" crossorigin href="/assets/vendor-markdown-
|
|
12
|
+
<link rel="modulepreload" crossorigin href="/assets/vendor-mui-core-Boqnpf9f.js">
|
|
13
|
+
<link rel="modulepreload" crossorigin href="/assets/vendor-mui-icons-B8MqoVbj.js">
|
|
14
|
+
<link rel="modulepreload" crossorigin href="/assets/vendor-mui-x-CGSS6QHF.js">
|
|
15
|
+
<link rel="modulepreload" crossorigin href="/assets/vendor-charts-DnVv66VV.js">
|
|
16
|
+
<link rel="modulepreload" crossorigin href="/assets/vendor-utils-DdfHIEy8.js">
|
|
17
|
+
<link rel="modulepreload" crossorigin href="/assets/vendor-markdown-DCpQIyMA.js">
|
|
18
18
|
<link rel="modulepreload" crossorigin href="/assets/vendor-syntax-BKjot6cf.js">
|
|
19
19
|
<link rel="stylesheet" crossorigin href="/assets/vendor-mui-x-BU3nLhQd.css">
|
|
20
20
|
<link rel="stylesheet" crossorigin href="/assets/index-M7TScuK-.css">
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import type { AssertionParams, GradingResult } from '../types/index';
|
|
2
|
-
export declare const handleGuardrails: ({ assertion, providerResponse, }: AssertionParams) => Promise<GradingResult>;
|
|
2
|
+
export declare const handleGuardrails: ({ assertion, inverse, providerResponse, }: AssertionParams) => Promise<GradingResult>;
|
|
3
3
|
//# sourceMappingURL=guardrails.d.ts.map
|
|
@@ -5,7 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.handleGuardrails = void 0;
|
|
7
7
|
const logger_1 = __importDefault(require("../logger"));
|
|
8
|
-
const handleGuardrails = async ({ assertion, providerResponse, }) => {
|
|
8
|
+
const handleGuardrails = async ({ assertion, inverse, providerResponse, }) => {
|
|
9
9
|
const guardrails = providerResponse.guardrails ||
|
|
10
10
|
providerResponse.metadata?.redteamHistory?.[providerResponse.metadata.redteamHistory.length - 1]
|
|
11
11
|
?.guardrails;
|
|
@@ -20,26 +20,35 @@ const handleGuardrails = async ({ assertion, providerResponse, }) => {
|
|
|
20
20
|
reason = 'Output failed safety checks';
|
|
21
21
|
}
|
|
22
22
|
reason = guardrails.reason || reason;
|
|
23
|
+
// For inverse (not-guardrails), passing means content WAS flagged
|
|
24
|
+
const pass = inverse;
|
|
23
25
|
return {
|
|
24
|
-
pass
|
|
25
|
-
score: 0,
|
|
26
|
-
reason,
|
|
26
|
+
pass,
|
|
27
|
+
score: pass ? 1 : 0,
|
|
28
|
+
reason: inverse ? `Guardrail correctly blocked: ${reason}` : reason,
|
|
27
29
|
assertion,
|
|
28
30
|
};
|
|
29
31
|
}
|
|
30
32
|
else {
|
|
33
|
+
// Content was not flagged
|
|
34
|
+
// For inverse (not-guardrails), failing means content was NOT flagged
|
|
35
|
+
const pass = !inverse;
|
|
31
36
|
return {
|
|
32
|
-
pass
|
|
33
|
-
score: 1,
|
|
34
|
-
reason:
|
|
37
|
+
pass,
|
|
38
|
+
score: pass ? 1 : 0,
|
|
39
|
+
reason: inverse
|
|
40
|
+
? 'Content was not blocked by guardrails (expected to be blocked)'
|
|
41
|
+
: 'Content passed safety checks',
|
|
35
42
|
assertion,
|
|
36
43
|
};
|
|
37
44
|
}
|
|
38
45
|
}
|
|
39
46
|
return {
|
|
40
|
-
pass:
|
|
47
|
+
pass: !inverse,
|
|
41
48
|
score: 0,
|
|
42
|
-
reason:
|
|
49
|
+
reason: inverse
|
|
50
|
+
? 'Guardrail was not applied (expected content to be blocked)'
|
|
51
|
+
: 'Guardrail was not applied',
|
|
43
52
|
assertion,
|
|
44
53
|
};
|
|
45
54
|
};
|
|
@@ -2,7 +2,7 @@ import { matchesConversationRelevance } from '../external/matchers/deepeval';
|
|
|
2
2
|
import { matchesAnswerRelevance, matchesClassification, matchesClosedQa, matchesContextFaithfulness, matchesContextRecall, matchesContextRelevance, matchesFactuality, matchesLlmRubric, matchesModeration, matchesSelectBest, matchesSimilarity } from '../matchers';
|
|
3
3
|
import { type ApiProvider, type Assertion, type AssertionType, type AtomicTestCase, type CallApiContextParams, type GradingResult } from '../types/index';
|
|
4
4
|
import type { ProviderResponse, ScoringFunction } from '../types/index';
|
|
5
|
-
export declare const MODEL_GRADED_ASSERTION_TYPES: Set<"moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-webhook" | "select-best" | "human" | "max-score">;
|
|
5
|
+
export declare const MODEL_GRADED_ASSERTION_TYPES: Set<"moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "search-rubric" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-search-rubric" | "not-webhook" | "select-best" | "human" | "max-score">;
|
|
6
6
|
/**
|
|
7
7
|
* Tests whether an assertion is inverse e.g. "not-equals" is inverse of "equals"
|
|
8
8
|
* or "not-contains" is inverse of "contains".
|
|
@@ -55,6 +55,7 @@ const logger_1 = __importDefault(require("../logger"));
|
|
|
55
55
|
const matchers_1 = require("../matchers");
|
|
56
56
|
const packageParser_1 = require("../providers/packageParser");
|
|
57
57
|
const pythonUtils_1 = require("../python/pythonUtils");
|
|
58
|
+
const store_1 = require("../tracing/store");
|
|
58
59
|
const fileExtensions_1 = require("../util/fileExtensions");
|
|
59
60
|
const invariant_1 = __importDefault(require("../util/invariant"));
|
|
60
61
|
const templates_1 = require("../util/templates");
|
|
@@ -88,10 +89,10 @@ const perplexity_1 = require("./perplexity");
|
|
|
88
89
|
const pi_1 = require("./pi");
|
|
89
90
|
const python_1 = require("./python");
|
|
90
91
|
const redteam_1 = require("./redteam");
|
|
91
|
-
const ruby_1 = require("./ruby");
|
|
92
92
|
const refusal_1 = require("./refusal");
|
|
93
93
|
const regex_1 = require("./regex");
|
|
94
94
|
const rouge_1 = require("./rouge");
|
|
95
|
+
const ruby_1 = require("./ruby");
|
|
95
96
|
const similar_1 = require("./similar");
|
|
96
97
|
const sql_1 = require("./sql");
|
|
97
98
|
const startsWith_1 = require("./startsWith");
|
|
@@ -100,6 +101,7 @@ const traceSpanCount_1 = require("./traceSpanCount");
|
|
|
100
101
|
const traceSpanDuration_1 = require("./traceSpanDuration");
|
|
101
102
|
const utils_1 = require("./utils");
|
|
102
103
|
const webhook_1 = require("./webhook");
|
|
104
|
+
const searchRubric_1 = require("./searchRubric");
|
|
103
105
|
const xml_1 = require("./xml");
|
|
104
106
|
const ASSERTIONS_MAX_CONCURRENCY = (0, envars_1.getEnvInt)('PROMPTFOO_ASSERTIONS_MAX_CONCURRENCY', 3);
|
|
105
107
|
exports.MODEL_GRADED_ASSERTION_TYPES = new Set([
|
|
@@ -111,6 +113,7 @@ exports.MODEL_GRADED_ASSERTION_TYPES = new Set([
|
|
|
111
113
|
'llm-rubric',
|
|
112
114
|
'model-graded-closedqa',
|
|
113
115
|
'model-graded-factuality',
|
|
116
|
+
'search-rubric',
|
|
114
117
|
]);
|
|
115
118
|
const ASSERTION_HANDLERS = {
|
|
116
119
|
'answer-relevance': answerRelevance_1.handleAnswerRelevance,
|
|
@@ -178,6 +181,7 @@ const ASSERTION_HANDLERS = {
|
|
|
178
181
|
regex: regex_1.handleRegex,
|
|
179
182
|
ruby: ruby_1.handleRuby,
|
|
180
183
|
'rouge-n': rouge_1.handleRougeScore,
|
|
184
|
+
'search-rubric': searchRubric_1.handleSearchRubric,
|
|
181
185
|
similar: similar_1.handleSimilar,
|
|
182
186
|
'similar:cosine': similar_1.handleSimilar,
|
|
183
187
|
'similar:dot': similar_1.handleSimilar,
|
|
@@ -231,12 +235,14 @@ async function runAssertion({ prompt, provider, assertion, test, latencyMs, prov
|
|
|
231
235
|
// Add trace data if traceId is available
|
|
232
236
|
if (traceId) {
|
|
233
237
|
try {
|
|
234
|
-
const
|
|
235
|
-
const traceStore = getTraceStore();
|
|
238
|
+
const traceStore = (0, store_1.getTraceStore)();
|
|
236
239
|
const traceData = await traceStore.getTrace(traceId);
|
|
237
240
|
if (traceData) {
|
|
238
241
|
context.trace = {
|
|
239
242
|
traceId: traceData.traceId,
|
|
243
|
+
evaluationId: traceData.evaluationId,
|
|
244
|
+
testCaseId: traceData.testCaseId,
|
|
245
|
+
metadata: traceData.metadata,
|
|
240
246
|
spans: traceData.spans || [],
|
|
241
247
|
};
|
|
242
248
|
}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { AssertionParams, GradingResult } from '../types/index';
|
|
2
|
+
export declare function handleSearchRubric({ assertion, baseType: _baseType, inverse, provider, renderedValue, test, providerResponse, }: AssertionParams): Promise<GradingResult>;
|
|
3
|
+
//# sourceMappingURL=searchRubric.d.ts.map
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.handleSearchRubric = handleSearchRubric;
|
|
4
|
+
const matchers_1 = require("../matchers");
|
|
5
|
+
async function handleSearchRubric({ assertion, baseType: _baseType, inverse, provider, renderedValue, test, providerResponse, }) {
|
|
6
|
+
if (renderedValue == null) {
|
|
7
|
+
throw new Error('search-rubric assertion type must have a string value');
|
|
8
|
+
}
|
|
9
|
+
const result = await (0, matchers_1.matchesSearchRubric)(String(renderedValue), providerResponse.output, test.options, test.vars, assertion, provider);
|
|
10
|
+
if (inverse) {
|
|
11
|
+
result.pass = !result.pass;
|
|
12
|
+
result.reason = result.pass
|
|
13
|
+
? `Output does not require web search verification: ${result.reason}`
|
|
14
|
+
: `Output requires web search verification: ${result.reason}`;
|
|
15
|
+
}
|
|
16
|
+
return result;
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=searchRubric.js.map
|
|
@@ -882,7 +882,7 @@ function evalCommand(program, defaultConfig, defaultConfigPath) {
|
|
|
882
882
|
const { data: extension } = index_2.OutputFileExtension.safeParse(maybeFilePath.split('.').pop()?.toLowerCase());
|
|
883
883
|
(0, invariant_1.default)(extension, `Unsupported output file format: ${maybeFilePath}. Please use one of: ${index_2.OutputFileExtension.options.join(', ')}.`);
|
|
884
884
|
}
|
|
885
|
-
doEval(validatedOpts, defaultConfig, defaultConfigPath, evaluateOptions);
|
|
885
|
+
await doEval(validatedOpts, defaultConfig, defaultConfigPath, evaluateOptions);
|
|
886
886
|
});
|
|
887
887
|
return evalCmd;
|
|
888
888
|
}
|
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
import type { Command } from 'commander';
|
|
2
|
-
|
|
2
|
+
/**
|
|
3
|
+
* Check if modelaudit is installed and get its version.
|
|
4
|
+
*/
|
|
5
|
+
export declare function checkModelAuditInstalled(): Promise<{
|
|
6
|
+
installed: boolean;
|
|
7
|
+
version: string | null;
|
|
8
|
+
}>;
|
|
3
9
|
export declare function modelScanCommand(program: Command): void;
|
|
4
10
|
//# sourceMappingURL=modelScan.d.ts.map
|
|
@@ -40,19 +40,38 @@ exports.checkModelAuditInstalled = checkModelAuditInstalled;
|
|
|
40
40
|
exports.modelScanCommand = modelScanCommand;
|
|
41
41
|
const child_process_1 = require("child_process");
|
|
42
42
|
const chalk_1 = __importDefault(require("chalk"));
|
|
43
|
+
const zod_1 = require("zod");
|
|
43
44
|
const accounts_1 = require("../globalConfig/accounts");
|
|
45
|
+
const logger_1 = __importDefault(require("../logger"));
|
|
44
46
|
const modelAudit_1 = __importDefault(require("../models/modelAudit"));
|
|
45
47
|
const updates_1 = require("../updates");
|
|
46
|
-
const modelAuditCliParser_1 = require("../util/modelAuditCliParser");
|
|
47
48
|
const huggingfaceMetadata_1 = require("../util/huggingfaceMetadata");
|
|
48
|
-
const
|
|
49
|
-
|
|
49
|
+
const modelAuditCliParser_1 = require("../util/modelAuditCliParser");
|
|
50
|
+
/**
|
|
51
|
+
* Check if modelaudit is installed and get its version.
|
|
52
|
+
*/
|
|
50
53
|
async function checkModelAuditInstalled() {
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
54
|
+
const version = await (0, updates_1.getModelAuditCurrentVersion)();
|
|
55
|
+
return { installed: version !== null, version };
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Determine if scan results contain errors.
|
|
59
|
+
*/
|
|
60
|
+
function hasErrorsInResults(results) {
|
|
61
|
+
return Boolean(results.has_errors ||
|
|
62
|
+
results.issues?.some((issue) => issue.severity === 'critical' || issue.severity === 'error'));
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Determine if a model should be re-scanned based on version changes.
|
|
66
|
+
*/
|
|
67
|
+
function shouldRescan(existingVersion, currentVersion) {
|
|
68
|
+
if (!currentVersion) {
|
|
69
|
+
return false;
|
|
70
|
+
}
|
|
71
|
+
if (!existingVersion) {
|
|
72
|
+
return true; // Previous scan missing version
|
|
73
|
+
}
|
|
74
|
+
return existingVersion !== currentVersion; // Version changed
|
|
56
75
|
}
|
|
57
76
|
function modelScanCommand(program) {
|
|
58
77
|
program
|
|
@@ -114,8 +133,8 @@ function modelScanCommand(program) {
|
|
|
114
133
|
}
|
|
115
134
|
});
|
|
116
135
|
}
|
|
117
|
-
// Check if modelaudit is installed
|
|
118
|
-
const isModelAuditInstalled = await checkModelAuditInstalled();
|
|
136
|
+
// Check if modelaudit is installed and get its version
|
|
137
|
+
const { installed: isModelAuditInstalled, version: currentScannerVersion } = await checkModelAuditInstalled();
|
|
119
138
|
if (!isModelAuditInstalled) {
|
|
120
139
|
logger_1.default.error('ModelAudit is not installed.');
|
|
121
140
|
logger_1.default.info(`Please install it using: ${chalk_1.default.green('pip install modelaudit')}`);
|
|
@@ -124,38 +143,52 @@ function modelScanCommand(program) {
|
|
|
124
143
|
}
|
|
125
144
|
// Check for modelaudit updates
|
|
126
145
|
await (0, updates_1.checkModelAuditUpdates)();
|
|
146
|
+
if (currentScannerVersion) {
|
|
147
|
+
logger_1.default.debug(`Using modelaudit version: ${currentScannerVersion}`);
|
|
148
|
+
}
|
|
127
149
|
// When saving to database (default), always use JSON format internally
|
|
128
150
|
// Note: --no-write flag sets options.write to false
|
|
129
151
|
const saveToDatabase = options.write === undefined || options.write === true;
|
|
152
|
+
// Track existing audit to update (when re-scanning or using --force)
|
|
153
|
+
let existingAuditToUpdate = null;
|
|
130
154
|
// Check for duplicate scans (HuggingFace models only, before download)
|
|
131
|
-
//
|
|
132
|
-
if (saveToDatabase &&
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
const
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
155
|
+
// When --force is used, we still need to find existing record to update (avoid unique constraint)
|
|
156
|
+
if (saveToDatabase && paths.length === 1 && (0, huggingfaceMetadata_1.isHuggingFaceModel)(paths[0])) {
|
|
157
|
+
try {
|
|
158
|
+
const metadata = await (0, huggingfaceMetadata_1.getHuggingFaceMetadata)(paths[0]);
|
|
159
|
+
if (metadata) {
|
|
160
|
+
const parsed = (0, huggingfaceMetadata_1.parseHuggingFaceModel)(paths[0]);
|
|
161
|
+
const modelId = parsed ? `${parsed.owner}/${parsed.repo}` : paths[0];
|
|
162
|
+
const existing = await modelAudit_1.default.findByRevision(modelId, metadata.sha);
|
|
163
|
+
if (existing && options.force) {
|
|
164
|
+
logger_1.default.debug(`Re-scanning (--force): ${modelId}`);
|
|
165
|
+
existingAuditToUpdate = existing;
|
|
166
|
+
}
|
|
167
|
+
else if (existing && shouldRescan(existing.scannerVersion, currentScannerVersion)) {
|
|
168
|
+
const reason = existing.scannerVersion
|
|
169
|
+
? `modelaudit upgraded from ${existing.scannerVersion} to ${currentScannerVersion}`
|
|
170
|
+
: `previous scan missing version info (now using ${currentScannerVersion})`;
|
|
171
|
+
logger_1.default.debug(`Re-scanning: ${reason}`);
|
|
172
|
+
existingAuditToUpdate = existing;
|
|
173
|
+
}
|
|
174
|
+
else if (existing) {
|
|
175
|
+
logger_1.default.info(chalk_1.default.yellow('✓ Model already scanned'));
|
|
176
|
+
logger_1.default.info(` Model: ${modelId}`);
|
|
177
|
+
logger_1.default.info(` Revision: ${metadata.sha}`);
|
|
178
|
+
if (existing.scannerVersion) {
|
|
179
|
+
logger_1.default.info(` Scanner version: ${existing.scannerVersion}`);
|
|
152
180
|
}
|
|
181
|
+
logger_1.default.info(` Previous scan: ${new Date(existing.createdAt).toISOString()}`);
|
|
182
|
+
logger_1.default.info(` Scan ID: ${existing.id}`);
|
|
183
|
+
logger_1.default.info(`\n${chalk_1.default.gray('Use --force to scan anyway, or view existing results with:')}`);
|
|
184
|
+
logger_1.default.info(chalk_1.default.green(` promptfoo view ${existing.id}`));
|
|
185
|
+
process.exitCode = 0;
|
|
186
|
+
return;
|
|
153
187
|
}
|
|
154
188
|
}
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
}
|
|
189
|
+
}
|
|
190
|
+
catch (error) {
|
|
191
|
+
logger_1.default.debug(`Failed to check for existing scan: ${error}`);
|
|
159
192
|
}
|
|
160
193
|
}
|
|
161
194
|
const outputFormat = saveToDatabase ? 'json' : options.format || 'text';
|
|
@@ -272,31 +305,54 @@ function modelScanCommand(program) {
|
|
|
272
305
|
revisionInfo.contentHash = results.content_hash;
|
|
273
306
|
}
|
|
274
307
|
}
|
|
275
|
-
//
|
|
276
|
-
const
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
dryRun: options.dryRun,
|
|
291
|
-
cache: options.cache,
|
|
292
|
-
quiet: options.quiet,
|
|
293
|
-
progress: options.progress,
|
|
294
|
-
stream: options.stream,
|
|
295
|
-
},
|
|
308
|
+
// Shared metadata for audit records
|
|
309
|
+
const auditMetadata = {
|
|
310
|
+
paths,
|
|
311
|
+
options: {
|
|
312
|
+
blacklist: options.blacklist,
|
|
313
|
+
timeout: cliOptions.timeout,
|
|
314
|
+
maxSize: options.maxSize,
|
|
315
|
+
verbose: options.verbose,
|
|
316
|
+
sbom: options.sbom,
|
|
317
|
+
strict: options.strict,
|
|
318
|
+
dryRun: options.dryRun,
|
|
319
|
+
cache: options.cache,
|
|
320
|
+
quiet: options.quiet,
|
|
321
|
+
progress: options.progress,
|
|
322
|
+
stream: options.stream,
|
|
296
323
|
},
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
324
|
+
};
|
|
325
|
+
// Create or update audit record in database
|
|
326
|
+
let audit;
|
|
327
|
+
if (existingAuditToUpdate) {
|
|
328
|
+
// Update existing record with new scan results
|
|
329
|
+
existingAuditToUpdate.results = results;
|
|
330
|
+
existingAuditToUpdate.checks = results.checks ?? null;
|
|
331
|
+
existingAuditToUpdate.issues = results.issues ?? null;
|
|
332
|
+
existingAuditToUpdate.hasErrors = hasErrorsInResults(results);
|
|
333
|
+
existingAuditToUpdate.totalChecks = results.total_checks ?? null;
|
|
334
|
+
existingAuditToUpdate.passedChecks = results.passed_checks ?? null;
|
|
335
|
+
existingAuditToUpdate.failedChecks = results.failed_checks ?? null;
|
|
336
|
+
existingAuditToUpdate.scannerVersion = currentScannerVersion ?? null;
|
|
337
|
+
existingAuditToUpdate.metadata = auditMetadata;
|
|
338
|
+
existingAuditToUpdate.updatedAt = Date.now();
|
|
339
|
+
if (revisionInfo.contentHash) {
|
|
340
|
+
existingAuditToUpdate.contentHash = revisionInfo.contentHash;
|
|
341
|
+
}
|
|
342
|
+
await existingAuditToUpdate.save();
|
|
343
|
+
audit = existingAuditToUpdate;
|
|
344
|
+
}
|
|
345
|
+
else {
|
|
346
|
+
audit = await modelAudit_1.default.create({
|
|
347
|
+
name: options.name || `Model scan ${new Date().toISOString()}`,
|
|
348
|
+
author: (0, accounts_1.getAuthor)() || undefined,
|
|
349
|
+
modelPath: paths.join(', '),
|
|
350
|
+
results,
|
|
351
|
+
metadata: auditMetadata,
|
|
352
|
+
scannerVersion: currentScannerVersion || undefined,
|
|
353
|
+
...revisionInfo,
|
|
354
|
+
});
|
|
355
|
+
}
|
|
300
356
|
// Display summary to user (unless they requested JSON format)
|
|
301
357
|
if (options.format !== 'json') {
|
|
302
358
|
logger_1.default.info('\n' + chalk_1.default.bold('Model Audit Summary'));
|
|
@@ -340,6 +396,12 @@ function modelScanCommand(program) {
|
|
|
340
396
|
}
|
|
341
397
|
logger_1.default.info(`\nScanned ${results.files_scanned ?? 0} files (${((results.bytes_scanned ?? 0) / 1024 / 1024).toFixed(2)} MB)`);
|
|
342
398
|
logger_1.default.info(`Duration: ${((results.duration ?? 0) / 1000).toFixed(2)} seconds`);
|
|
399
|
+
if (currentScannerVersion) {
|
|
400
|
+
logger_1.default.debug(`Scanner version: ${currentScannerVersion}`);
|
|
401
|
+
}
|
|
402
|
+
if (existingAuditToUpdate) {
|
|
403
|
+
logger_1.default.debug(`Updated existing audit record: ${audit.id}`);
|
|
404
|
+
}
|
|
343
405
|
logger_1.default.info(chalk_1.default.green(`\n✓ Results saved to database with ID: ${audit.id}`));
|
|
344
406
|
}
|
|
345
407
|
// Save to file if requested
|
|
@@ -13,4 +13,10 @@ export declare function closeDb(): void;
|
|
|
13
13
|
* Check if the database is currently open
|
|
14
14
|
*/
|
|
15
15
|
export declare function isDbOpen(): boolean;
|
|
16
|
+
/**
|
|
17
|
+
* Close database connection if it's currently open
|
|
18
|
+
* Safe to call even if database was never opened
|
|
19
|
+
* Should be called during graceful shutdown to prevent event loop hanging
|
|
20
|
+
*/
|
|
21
|
+
export declare function closeDbIfOpen(): void;
|
|
16
22
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -42,6 +42,7 @@ exports.getDbSignalPath = getDbSignalPath;
|
|
|
42
42
|
exports.getDb = getDb;
|
|
43
43
|
exports.closeDb = closeDb;
|
|
44
44
|
exports.isDbOpen = isDbOpen;
|
|
45
|
+
exports.closeDbIfOpen = closeDbIfOpen;
|
|
45
46
|
const better_sqlite3_1 = __importDefault(require("better-sqlite3"));
|
|
46
47
|
const better_sqlite3_2 = require("drizzle-orm/better-sqlite3");
|
|
47
48
|
const logger_1 = require("drizzle-orm/logger");
|
|
@@ -136,4 +137,14 @@ function closeDb() {
|
|
|
136
137
|
function isDbOpen() {
|
|
137
138
|
return sqliteInstance !== null && dbInstance !== null;
|
|
138
139
|
}
|
|
140
|
+
/**
|
|
141
|
+
* Close database connection if it's currently open
|
|
142
|
+
* Safe to call even if database was never opened
|
|
143
|
+
* Should be called during graceful shutdown to prevent event loop hanging
|
|
144
|
+
*/
|
|
145
|
+
function closeDbIfOpen() {
|
|
146
|
+
if (sqliteInstance) {
|
|
147
|
+
closeDb();
|
|
148
|
+
}
|
|
149
|
+
}
|
|
139
150
|
//# sourceMappingURL=index.js.map
|