promptfoo 0.119.13 → 0.119.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/dist/package.json +28 -26
  2. package/dist/src/app/assets/index-eJ2lMe94.js +51 -0
  3. package/dist/src/app/assets/{source-map-support-Bnh0UQ2S.js → source-map-support-1v4oeb7P.js} +1 -1
  4. package/dist/src/app/assets/sync-CtLQRuC1.js +1 -0
  5. package/dist/src/app/assets/{vendor-charts-T60Uk0Z3.js → vendor-charts-DnVv66VV.js} +1 -1
  6. package/dist/src/app/assets/{vendor-markdown-DLig-KJh.js → vendor-markdown-DCpQIyMA.js} +1 -1
  7. package/dist/src/app/assets/{vendor-mui-core-5BLaiG3c.js → vendor-mui-core-Boqnpf9f.js} +1 -1
  8. package/dist/src/app/assets/{vendor-mui-icons-fn39Fu2e.js → vendor-mui-icons-B8MqoVbj.js} +1 -1
  9. package/dist/src/app/assets/vendor-mui-x-CGSS6QHF.js +45 -0
  10. package/dist/src/app/assets/{vendor-utils-DYBMEuwX.js → vendor-utils-DdfHIEy8.js} +1 -1
  11. package/dist/src/app/index.html +7 -7
  12. package/dist/src/assertions/guardrails.d.ts +1 -1
  13. package/dist/src/assertions/guardrails.js +18 -9
  14. package/dist/src/assertions/index.d.ts +1 -1
  15. package/dist/src/assertions/index.js +9 -3
  16. package/dist/src/assertions/searchRubric.d.ts +3 -0
  17. package/dist/src/assertions/searchRubric.js +18 -0
  18. package/dist/src/commands/eval.js +1 -1
  19. package/dist/src/commands/modelScan.d.ts +7 -1
  20. package/dist/src/commands/modelScan.js +121 -59
  21. package/dist/src/database/index.d.ts +6 -0
  22. package/dist/src/database/index.js +11 -0
  23. package/dist/src/database/tables.d.ts +46 -24
  24. package/dist/src/envars.d.ts +17 -0
  25. package/dist/src/generated/constants.js +1 -1
  26. package/dist/src/logger.d.ts +5 -0
  27. package/dist/src/logger.js +28 -0
  28. package/dist/src/main.js +17 -6
  29. package/dist/src/matchers.d.ts +1 -0
  30. package/dist/src/matchers.js +80 -0
  31. package/dist/src/models/eval.d.ts +2 -1
  32. package/dist/src/models/eval.js +44 -2
  33. package/dist/src/prompts/grading.d.ts +1 -0
  34. package/dist/src/prompts/grading.js +26 -1
  35. package/dist/src/prompts/index.d.ts +1 -0
  36. package/dist/src/prompts/index.js +4 -1
  37. package/dist/src/providers/adaline.gateway.js +2 -2
  38. package/dist/src/providers/anthropic/defaults.d.ts +1 -1
  39. package/dist/src/providers/anthropic/defaults.js +15 -0
  40. package/dist/src/providers/azure/chat.d.ts +3 -1
  41. package/dist/src/providers/azure/chat.js +16 -3
  42. package/dist/src/providers/azure/defaults.js +660 -141
  43. package/dist/src/providers/azure/responses.d.ts +5 -0
  44. package/dist/src/providers/azure/responses.js +33 -4
  45. package/dist/src/providers/azure/types.d.ts +4 -0
  46. package/dist/src/providers/bedrock/agents.d.ts +1 -1
  47. package/dist/src/providers/bedrock/agents.js +2 -2
  48. package/dist/src/providers/bedrock/base.d.ts +40 -0
  49. package/dist/src/providers/bedrock/base.js +171 -0
  50. package/dist/src/providers/bedrock/converse.d.ts +146 -0
  51. package/dist/src/providers/bedrock/converse.js +1044 -0
  52. package/dist/src/providers/bedrock/index.d.ts +1 -34
  53. package/dist/src/providers/bedrock/index.js +4 -159
  54. package/dist/src/providers/bedrock/knowledgeBase.d.ts +1 -1
  55. package/dist/src/providers/bedrock/knowledgeBase.js +2 -2
  56. package/dist/src/providers/bedrock/nova-sonic.d.ts +2 -1
  57. package/dist/src/providers/bedrock/nova-sonic.js +2 -2
  58. package/dist/src/providers/claude-agent-sdk.d.ts +58 -1
  59. package/dist/src/providers/claude-agent-sdk.js +22 -1
  60. package/dist/src/providers/defaults.js +4 -0
  61. package/dist/src/providers/github/defaults.js +6 -6
  62. package/dist/src/providers/google/types.d.ts +25 -0
  63. package/dist/src/providers/google/util.d.ts +2 -0
  64. package/dist/src/providers/google/vertex.js +78 -22
  65. package/dist/src/providers/{groq.d.ts → groq/chat.d.ts} +26 -20
  66. package/dist/src/providers/groq/chat.js +79 -0
  67. package/dist/src/providers/groq/index.d.ts +5 -0
  68. package/dist/src/providers/groq/index.js +24 -0
  69. package/dist/src/providers/groq/responses.d.ts +106 -0
  70. package/dist/src/providers/groq/responses.js +64 -0
  71. package/dist/src/providers/groq/types.d.ts +44 -0
  72. package/dist/src/providers/groq/types.js +3 -0
  73. package/dist/src/providers/groq/util.d.ts +15 -0
  74. package/dist/src/providers/groq/util.js +28 -0
  75. package/dist/src/providers/mcp/client.d.ts +8 -0
  76. package/dist/src/providers/mcp/client.js +60 -10
  77. package/dist/src/providers/mcp/types.d.ts +21 -0
  78. package/dist/src/providers/openai/chatkit-pool.d.ts +114 -0
  79. package/dist/src/providers/openai/chatkit-pool.js +548 -0
  80. package/dist/src/providers/openai/chatkit-types.d.ts +73 -0
  81. package/dist/src/providers/openai/chatkit-types.js +3 -0
  82. package/dist/src/providers/openai/chatkit.d.ts +76 -0
  83. package/dist/src/providers/openai/chatkit.js +879 -0
  84. package/dist/src/providers/openai/codex-sdk.d.ts +109 -0
  85. package/dist/src/providers/openai/codex-sdk.js +346 -0
  86. package/dist/src/providers/openai/defaults.d.ts +2 -0
  87. package/dist/src/providers/openai/defaults.js +10 -4
  88. package/dist/src/providers/registry.js +48 -9
  89. package/dist/src/providers/responses/types.d.ts +1 -1
  90. package/dist/src/providers/sagemaker.d.ts +2 -2
  91. package/dist/src/providers/webSearchUtils.d.ts +17 -0
  92. package/dist/src/providers/webSearchUtils.js +169 -0
  93. package/dist/src/providers/xai/chat.d.ts +61 -0
  94. package/dist/src/providers/xai/chat.js +68 -3
  95. package/dist/src/providers/xai/responses.d.ts +189 -0
  96. package/dist/src/providers/xai/responses.js +268 -0
  97. package/dist/src/redteam/constants/plugins.d.ts +1 -1
  98. package/dist/src/redteam/constants/plugins.js +1 -1
  99. package/dist/src/redteam/constants/strategies.d.ts +1 -1
  100. package/dist/src/redteam/constants/strategies.js +1 -0
  101. package/dist/src/redteam/plugins/vlguard.d.ts +53 -4
  102. package/dist/src/redteam/plugins/vlguard.js +362 -46
  103. package/dist/src/redteam/providers/constants.d.ts +2 -2
  104. package/dist/src/redteam/providers/constants.js +2 -2
  105. package/dist/src/redteam/providers/crescendo/index.d.ts +1 -1
  106. package/dist/src/redteam/providers/crescendo/index.js +5 -3
  107. package/dist/src/redteam/providers/hydra/index.js +1 -1
  108. package/dist/src/server/routes/modelAudit.js +4 -4
  109. package/dist/src/share.js +4 -2
  110. package/dist/src/telemetry.js +44 -8
  111. package/dist/src/types/env.d.ts +3 -0
  112. package/dist/src/types/env.js +1 -0
  113. package/dist/src/types/index.d.ts +896 -615
  114. package/dist/src/types/index.js +1 -0
  115. package/dist/src/types/providers.d.ts +1 -0
  116. package/dist/src/types/tracing.d.ts +3 -0
  117. package/dist/src/util/database.d.ts +6 -4
  118. package/dist/src/util/file.js +6 -4
  119. package/dist/src/util/modelAuditCliParser.d.ts +4 -4
  120. package/dist/src/util/xlsx.js +52 -26
  121. package/dist/src/validators/providers.d.ts +142 -122
  122. package/dist/src/validators/providers.js +4 -6
  123. package/dist/src/validators/redteam.d.ts +36 -28
  124. package/dist/src/validators/redteam.js +9 -3
  125. package/dist/tsconfig.tsbuildinfo +1 -1
  126. package/package.json +28 -26
  127. package/dist/drizzle/CLAUDE.md +0 -65
  128. package/dist/src/app/assets/index-DifT6VGT.js +0 -51
  129. package/dist/src/app/assets/sync-Oo-W_Rbj.js +0 -1
  130. package/dist/src/app/assets/vendor-mui-x-C2xF-yiO.js +0 -45
  131. package/dist/src/providers/groq.js +0 -48
@@ -7,14 +7,14 @@
7
7
  <title>promptfoo</title>
8
8
  <meta name="description" content="LLM testing and evaluation" />
9
9
  <meta property="og:image" content="https://www.promptfoo.dev/img/thumbnail.png" />
10
- <script type="module" crossorigin src="/assets/index-DifT6VGT.js"></script>
10
+ <script type="module" crossorigin src="/assets/index-eJ2lMe94.js"></script>
11
11
  <link rel="modulepreload" crossorigin href="/assets/vendor-react-BuO7LJGJ.js">
12
- <link rel="modulepreload" crossorigin href="/assets/vendor-mui-core-5BLaiG3c.js">
13
- <link rel="modulepreload" crossorigin href="/assets/vendor-mui-icons-fn39Fu2e.js">
14
- <link rel="modulepreload" crossorigin href="/assets/vendor-mui-x-C2xF-yiO.js">
15
- <link rel="modulepreload" crossorigin href="/assets/vendor-charts-T60Uk0Z3.js">
16
- <link rel="modulepreload" crossorigin href="/assets/vendor-utils-DYBMEuwX.js">
17
- <link rel="modulepreload" crossorigin href="/assets/vendor-markdown-DLig-KJh.js">
12
+ <link rel="modulepreload" crossorigin href="/assets/vendor-mui-core-Boqnpf9f.js">
13
+ <link rel="modulepreload" crossorigin href="/assets/vendor-mui-icons-B8MqoVbj.js">
14
+ <link rel="modulepreload" crossorigin href="/assets/vendor-mui-x-CGSS6QHF.js">
15
+ <link rel="modulepreload" crossorigin href="/assets/vendor-charts-DnVv66VV.js">
16
+ <link rel="modulepreload" crossorigin href="/assets/vendor-utils-DdfHIEy8.js">
17
+ <link rel="modulepreload" crossorigin href="/assets/vendor-markdown-DCpQIyMA.js">
18
18
  <link rel="modulepreload" crossorigin href="/assets/vendor-syntax-BKjot6cf.js">
19
19
  <link rel="stylesheet" crossorigin href="/assets/vendor-mui-x-BU3nLhQd.css">
20
20
  <link rel="stylesheet" crossorigin href="/assets/index-M7TScuK-.css">
@@ -1,3 +1,3 @@
1
1
  import type { AssertionParams, GradingResult } from '../types/index';
2
- export declare const handleGuardrails: ({ assertion, providerResponse, }: AssertionParams) => Promise<GradingResult>;
2
+ export declare const handleGuardrails: ({ assertion, inverse, providerResponse, }: AssertionParams) => Promise<GradingResult>;
3
3
  //# sourceMappingURL=guardrails.d.ts.map
@@ -5,7 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.handleGuardrails = void 0;
7
7
  const logger_1 = __importDefault(require("../logger"));
8
- const handleGuardrails = async ({ assertion, providerResponse, }) => {
8
+ const handleGuardrails = async ({ assertion, inverse, providerResponse, }) => {
9
9
  const guardrails = providerResponse.guardrails ||
10
10
  providerResponse.metadata?.redteamHistory?.[providerResponse.metadata.redteamHistory.length - 1]
11
11
  ?.guardrails;
@@ -20,26 +20,35 @@ const handleGuardrails = async ({ assertion, providerResponse, }) => {
20
20
  reason = 'Output failed safety checks';
21
21
  }
22
22
  reason = guardrails.reason || reason;
23
+ // For inverse (not-guardrails), passing means content WAS flagged
24
+ const pass = inverse;
23
25
  return {
24
- pass: false,
25
- score: 0,
26
- reason,
26
+ pass,
27
+ score: pass ? 1 : 0,
28
+ reason: inverse ? `Guardrail correctly blocked: ${reason}` : reason,
27
29
  assertion,
28
30
  };
29
31
  }
30
32
  else {
33
+ // Content was not flagged
34
+ // For inverse (not-guardrails), failing means content was NOT flagged
35
+ const pass = !inverse;
31
36
  return {
32
- pass: true,
33
- score: 1,
34
- reason: 'Content passed safety checks',
37
+ pass,
38
+ score: pass ? 1 : 0,
39
+ reason: inverse
40
+ ? 'Content was not blocked by guardrails (expected to be blocked)'
41
+ : 'Content passed safety checks',
35
42
  assertion,
36
43
  };
37
44
  }
38
45
  }
39
46
  return {
40
- pass: true,
47
+ pass: !inverse,
41
48
  score: 0,
42
- reason: 'Guardrail was not applied',
49
+ reason: inverse
50
+ ? 'Guardrail was not applied (expected content to be blocked)'
51
+ : 'Guardrail was not applied',
43
52
  assertion,
44
53
  };
45
54
  };
@@ -2,7 +2,7 @@ import { matchesConversationRelevance } from '../external/matchers/deepeval';
2
2
  import { matchesAnswerRelevance, matchesClassification, matchesClosedQa, matchesContextFaithfulness, matchesContextRecall, matchesContextRelevance, matchesFactuality, matchesLlmRubric, matchesModeration, matchesSelectBest, matchesSimilarity } from '../matchers';
3
3
  import { type ApiProvider, type Assertion, type AssertionType, type AtomicTestCase, type CallApiContextParams, type GradingResult } from '../types/index';
4
4
  import type { ProviderResponse, ScoringFunction } from '../types/index';
5
- export declare const MODEL_GRADED_ASSERTION_TYPES: Set<"moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-webhook" | "select-best" | "human" | "max-score">;
5
+ export declare const MODEL_GRADED_ASSERTION_TYPES: Set<"moderation" | "cost" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains" | "contains-all" | "contains-any" | "contains-html" | "contains-json" | "contains-sql" | "contains-xml" | "context-faithfulness" | "context-recall" | "context-relevance" | "conversation-relevance" | "equals" | "finish-reason" | "g-eval" | "gleu" | "guardrails" | "icontains" | "icontains-all" | "icontains-any" | "is-html" | "is-json" | "is-refusal" | "is-sql" | "is-valid-function-call" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "pi" | "meteor" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity" | "perplexity-score" | "python" | "regex" | "rouge-n" | "ruby" | "similar" | "similar:cosine" | "similar:dot" | "similar:euclidean" | "starts-with" | "trace-error-spans" | "trace-span-count" | "trace-span-duration" | "search-rubric" | "webhook" | "not-moderation" | "not-cost" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains" | "not-contains-all" | "not-contains-any" | "not-contains-html" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-conversation-relevance" | "not-equals" | "not-finish-reason" | "not-g-eval" | "not-gleu" | "not-guardrails" | "not-icontains" | "not-icontains-all" | "not-icontains-any" | "not-is-html" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-function-call" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-pi" | "not-meteor" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity" | "not-perplexity-score" | "not-python" | "not-regex" | "not-rouge-n" | "not-ruby" | "not-similar" | "not-similar:cosine" | "not-similar:dot" | "not-similar:euclidean" | "not-starts-with" | "not-trace-error-spans" | "not-trace-span-count" | "not-trace-span-duration" | "not-search-rubric" | "not-webhook" | "select-best" | "human" | "max-score">;
6
6
  /**
7
7
  * Tests whether an assertion is inverse e.g. "not-equals" is inverse of "equals"
8
8
  * or "not-contains" is inverse of "contains".
@@ -55,6 +55,7 @@ const logger_1 = __importDefault(require("../logger"));
55
55
  const matchers_1 = require("../matchers");
56
56
  const packageParser_1 = require("../providers/packageParser");
57
57
  const pythonUtils_1 = require("../python/pythonUtils");
58
+ const store_1 = require("../tracing/store");
58
59
  const fileExtensions_1 = require("../util/fileExtensions");
59
60
  const invariant_1 = __importDefault(require("../util/invariant"));
60
61
  const templates_1 = require("../util/templates");
@@ -88,10 +89,10 @@ const perplexity_1 = require("./perplexity");
88
89
  const pi_1 = require("./pi");
89
90
  const python_1 = require("./python");
90
91
  const redteam_1 = require("./redteam");
91
- const ruby_1 = require("./ruby");
92
92
  const refusal_1 = require("./refusal");
93
93
  const regex_1 = require("./regex");
94
94
  const rouge_1 = require("./rouge");
95
+ const ruby_1 = require("./ruby");
95
96
  const similar_1 = require("./similar");
96
97
  const sql_1 = require("./sql");
97
98
  const startsWith_1 = require("./startsWith");
@@ -100,6 +101,7 @@ const traceSpanCount_1 = require("./traceSpanCount");
100
101
  const traceSpanDuration_1 = require("./traceSpanDuration");
101
102
  const utils_1 = require("./utils");
102
103
  const webhook_1 = require("./webhook");
104
+ const searchRubric_1 = require("./searchRubric");
103
105
  const xml_1 = require("./xml");
104
106
  const ASSERTIONS_MAX_CONCURRENCY = (0, envars_1.getEnvInt)('PROMPTFOO_ASSERTIONS_MAX_CONCURRENCY', 3);
105
107
  exports.MODEL_GRADED_ASSERTION_TYPES = new Set([
@@ -111,6 +113,7 @@ exports.MODEL_GRADED_ASSERTION_TYPES = new Set([
111
113
  'llm-rubric',
112
114
  'model-graded-closedqa',
113
115
  'model-graded-factuality',
116
+ 'search-rubric',
114
117
  ]);
115
118
  const ASSERTION_HANDLERS = {
116
119
  'answer-relevance': answerRelevance_1.handleAnswerRelevance,
@@ -178,6 +181,7 @@ const ASSERTION_HANDLERS = {
178
181
  regex: regex_1.handleRegex,
179
182
  ruby: ruby_1.handleRuby,
180
183
  'rouge-n': rouge_1.handleRougeScore,
184
+ 'search-rubric': searchRubric_1.handleSearchRubric,
181
185
  similar: similar_1.handleSimilar,
182
186
  'similar:cosine': similar_1.handleSimilar,
183
187
  'similar:dot': similar_1.handleSimilar,
@@ -231,12 +235,14 @@ async function runAssertion({ prompt, provider, assertion, test, latencyMs, prov
231
235
  // Add trace data if traceId is available
232
236
  if (traceId) {
233
237
  try {
234
- const { getTraceStore } = await Promise.resolve().then(() => __importStar(require('../tracing/store')));
235
- const traceStore = getTraceStore();
238
+ const traceStore = (0, store_1.getTraceStore)();
236
239
  const traceData = await traceStore.getTrace(traceId);
237
240
  if (traceData) {
238
241
  context.trace = {
239
242
  traceId: traceData.traceId,
243
+ evaluationId: traceData.evaluationId,
244
+ testCaseId: traceData.testCaseId,
245
+ metadata: traceData.metadata,
240
246
  spans: traceData.spans || [],
241
247
  };
242
248
  }
@@ -0,0 +1,3 @@
1
+ import type { AssertionParams, GradingResult } from '../types/index';
2
+ export declare function handleSearchRubric({ assertion, baseType: _baseType, inverse, provider, renderedValue, test, providerResponse, }: AssertionParams): Promise<GradingResult>;
3
+ //# sourceMappingURL=searchRubric.d.ts.map
@@ -0,0 +1,18 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.handleSearchRubric = handleSearchRubric;
4
+ const matchers_1 = require("../matchers");
5
+ async function handleSearchRubric({ assertion, baseType: _baseType, inverse, provider, renderedValue, test, providerResponse, }) {
6
+ if (renderedValue == null) {
7
+ throw new Error('search-rubric assertion type must have a string value');
8
+ }
9
+ const result = await (0, matchers_1.matchesSearchRubric)(String(renderedValue), providerResponse.output, test.options, test.vars, assertion, provider);
10
+ if (inverse) {
11
+ result.pass = !result.pass;
12
+ result.reason = result.pass
13
+ ? `Output does not require web search verification: ${result.reason}`
14
+ : `Output requires web search verification: ${result.reason}`;
15
+ }
16
+ return result;
17
+ }
18
+ //# sourceMappingURL=searchRubric.js.map
@@ -882,7 +882,7 @@ function evalCommand(program, defaultConfig, defaultConfigPath) {
882
882
  const { data: extension } = index_2.OutputFileExtension.safeParse(maybeFilePath.split('.').pop()?.toLowerCase());
883
883
  (0, invariant_1.default)(extension, `Unsupported output file format: ${maybeFilePath}. Please use one of: ${index_2.OutputFileExtension.options.join(', ')}.`);
884
884
  }
885
- doEval(validatedOpts, defaultConfig, defaultConfigPath, evaluateOptions);
885
+ await doEval(validatedOpts, defaultConfig, defaultConfigPath, evaluateOptions);
886
886
  });
887
887
  return evalCmd;
888
888
  }
@@ -1,4 +1,10 @@
1
1
  import type { Command } from 'commander';
2
- export declare function checkModelAuditInstalled(): Promise<boolean>;
2
+ /**
3
+ * Check if modelaudit is installed and get its version.
4
+ */
5
+ export declare function checkModelAuditInstalled(): Promise<{
6
+ installed: boolean;
7
+ version: string | null;
8
+ }>;
3
9
  export declare function modelScanCommand(program: Command): void;
4
10
  //# sourceMappingURL=modelScan.d.ts.map
@@ -40,19 +40,38 @@ exports.checkModelAuditInstalled = checkModelAuditInstalled;
40
40
  exports.modelScanCommand = modelScanCommand;
41
41
  const child_process_1 = require("child_process");
42
42
  const chalk_1 = __importDefault(require("chalk"));
43
+ const zod_1 = require("zod");
43
44
  const accounts_1 = require("../globalConfig/accounts");
45
+ const logger_1 = __importDefault(require("../logger"));
44
46
  const modelAudit_1 = __importDefault(require("../models/modelAudit"));
45
47
  const updates_1 = require("../updates");
46
- const modelAuditCliParser_1 = require("../util/modelAuditCliParser");
47
48
  const huggingfaceMetadata_1 = require("../util/huggingfaceMetadata");
48
- const logger_1 = __importDefault(require("../logger"));
49
- const zod_1 = require("zod");
49
+ const modelAuditCliParser_1 = require("../util/modelAuditCliParser");
50
+ /**
51
+ * Check if modelaudit is installed and get its version.
52
+ */
50
53
  async function checkModelAuditInstalled() {
51
- return new Promise((resolve) => {
52
- const proc = (0, child_process_1.spawn)('modelaudit', ['--version']);
53
- proc.on('error', () => resolve(false));
54
- proc.on('close', (code) => resolve(code === 0 || code === 1));
55
- });
54
+ const version = await (0, updates_1.getModelAuditCurrentVersion)();
55
+ return { installed: version !== null, version };
56
+ }
57
+ /**
58
+ * Determine if scan results contain errors.
59
+ */
60
+ function hasErrorsInResults(results) {
61
+ return Boolean(results.has_errors ||
62
+ results.issues?.some((issue) => issue.severity === 'critical' || issue.severity === 'error'));
63
+ }
64
+ /**
65
+ * Determine if a model should be re-scanned based on version changes.
66
+ */
67
+ function shouldRescan(existingVersion, currentVersion) {
68
+ if (!currentVersion) {
69
+ return false;
70
+ }
71
+ if (!existingVersion) {
72
+ return true; // Previous scan missing version
73
+ }
74
+ return existingVersion !== currentVersion; // Version changed
56
75
  }
57
76
  function modelScanCommand(program) {
58
77
  program
@@ -114,8 +133,8 @@ function modelScanCommand(program) {
114
133
  }
115
134
  });
116
135
  }
117
- // Check if modelaudit is installed
118
- const isModelAuditInstalled = await checkModelAuditInstalled();
136
+ // Check if modelaudit is installed and get its version
137
+ const { installed: isModelAuditInstalled, version: currentScannerVersion } = await checkModelAuditInstalled();
119
138
  if (!isModelAuditInstalled) {
120
139
  logger_1.default.error('ModelAudit is not installed.');
121
140
  logger_1.default.info(`Please install it using: ${chalk_1.default.green('pip install modelaudit')}`);
@@ -124,38 +143,52 @@ function modelScanCommand(program) {
124
143
  }
125
144
  // Check for modelaudit updates
126
145
  await (0, updates_1.checkModelAuditUpdates)();
146
+ if (currentScannerVersion) {
147
+ logger_1.default.debug(`Using modelaudit version: ${currentScannerVersion}`);
148
+ }
127
149
  // When saving to database (default), always use JSON format internally
128
150
  // Note: --no-write flag sets options.write to false
129
151
  const saveToDatabase = options.write === undefined || options.write === true;
152
+ // Track existing audit to update (when re-scanning or using --force)
153
+ let existingAuditToUpdate = null;
130
154
  // Check for duplicate scans (HuggingFace models only, before download)
131
- // Only check if saving to database and not forcing
132
- if (saveToDatabase && !options.force && paths.length === 1) {
133
- const modelPath = paths[0];
134
- if ((0, huggingfaceMetadata_1.isHuggingFaceModel)(modelPath)) {
135
- try {
136
- const metadata = await (0, huggingfaceMetadata_1.getHuggingFaceMetadata)(modelPath);
137
- if (metadata) {
138
- const parsed = (0, huggingfaceMetadata_1.parseHuggingFaceModel)(modelPath);
139
- const modelId = parsed ? `${parsed.owner}/${parsed.repo}` : modelPath;
140
- // Check if already scanned with this revision
141
- const existing = await modelAudit_1.default.findByRevision(modelId, metadata.sha);
142
- if (existing) {
143
- logger_1.default.info(chalk_1.default.yellow('✓ Model already scanned'));
144
- logger_1.default.info(` Model: ${modelId}`);
145
- logger_1.default.info(` Revision: ${metadata.sha}`);
146
- logger_1.default.info(` Previous scan: ${new Date(existing.createdAt).toISOString()}`);
147
- logger_1.default.info(` Scan ID: ${existing.id}`);
148
- logger_1.default.info(`\n${chalk_1.default.gray('Use --force to scan anyway, or view existing results with:')}`);
149
- logger_1.default.info(chalk_1.default.green(` promptfoo view ${existing.id}`));
150
- process.exitCode = 0;
151
- return;
155
+ // When --force is used, we still need to find existing record to update (avoid unique constraint)
156
+ if (saveToDatabase && paths.length === 1 && (0, huggingfaceMetadata_1.isHuggingFaceModel)(paths[0])) {
157
+ try {
158
+ const metadata = await (0, huggingfaceMetadata_1.getHuggingFaceMetadata)(paths[0]);
159
+ if (metadata) {
160
+ const parsed = (0, huggingfaceMetadata_1.parseHuggingFaceModel)(paths[0]);
161
+ const modelId = parsed ? `${parsed.owner}/${parsed.repo}` : paths[0];
162
+ const existing = await modelAudit_1.default.findByRevision(modelId, metadata.sha);
163
+ if (existing && options.force) {
164
+ logger_1.default.debug(`Re-scanning (--force): ${modelId}`);
165
+ existingAuditToUpdate = existing;
166
+ }
167
+ else if (existing && shouldRescan(existing.scannerVersion, currentScannerVersion)) {
168
+ const reason = existing.scannerVersion
169
+ ? `modelaudit upgraded from ${existing.scannerVersion} to ${currentScannerVersion}`
170
+ : `previous scan missing version info (now using ${currentScannerVersion})`;
171
+ logger_1.default.debug(`Re-scanning: ${reason}`);
172
+ existingAuditToUpdate = existing;
173
+ }
174
+ else if (existing) {
175
+ logger_1.default.info(chalk_1.default.yellow('✓ Model already scanned'));
176
+ logger_1.default.info(` Model: ${modelId}`);
177
+ logger_1.default.info(` Revision: ${metadata.sha}`);
178
+ if (existing.scannerVersion) {
179
+ logger_1.default.info(` Scanner version: ${existing.scannerVersion}`);
152
180
  }
181
+ logger_1.default.info(` Previous scan: ${new Date(existing.createdAt).toISOString()}`);
182
+ logger_1.default.info(` Scan ID: ${existing.id}`);
183
+ logger_1.default.info(`\n${chalk_1.default.gray('Use --force to scan anyway, or view existing results with:')}`);
184
+ logger_1.default.info(chalk_1.default.green(` promptfoo view ${existing.id}`));
185
+ process.exitCode = 0;
186
+ return;
153
187
  }
154
188
  }
155
- catch (error) {
156
- logger_1.default.debug(`Failed to check for existing scan: ${error}`);
157
- // Continue with scan if metadata fetch fails
158
- }
189
+ }
190
+ catch (error) {
191
+ logger_1.default.debug(`Failed to check for existing scan: ${error}`);
159
192
  }
160
193
  }
161
194
  const outputFormat = saveToDatabase ? 'json' : options.format || 'text';
@@ -272,31 +305,54 @@ function modelScanCommand(program) {
272
305
  revisionInfo.contentHash = results.content_hash;
273
306
  }
274
307
  }
275
- // Create audit record in database
276
- const audit = await modelAudit_1.default.create({
277
- name: options.name || `Model scan ${new Date().toISOString()}`,
278
- author: (0, accounts_1.getAuthor)() || undefined,
279
- modelPath: paths.join(', '),
280
- results,
281
- metadata: {
282
- paths,
283
- options: {
284
- blacklist: options.blacklist,
285
- timeout: cliOptions.timeout,
286
- maxSize: options.maxSize,
287
- verbose: options.verbose,
288
- sbom: options.sbom,
289
- strict: options.strict,
290
- dryRun: options.dryRun,
291
- cache: options.cache,
292
- quiet: options.quiet,
293
- progress: options.progress,
294
- stream: options.stream,
295
- },
308
+ // Shared metadata for audit records
309
+ const auditMetadata = {
310
+ paths,
311
+ options: {
312
+ blacklist: options.blacklist,
313
+ timeout: cliOptions.timeout,
314
+ maxSize: options.maxSize,
315
+ verbose: options.verbose,
316
+ sbom: options.sbom,
317
+ strict: options.strict,
318
+ dryRun: options.dryRun,
319
+ cache: options.cache,
320
+ quiet: options.quiet,
321
+ progress: options.progress,
322
+ stream: options.stream,
296
323
  },
297
- // Revision tracking
298
- ...revisionInfo,
299
- });
324
+ };
325
+ // Create or update audit record in database
326
+ let audit;
327
+ if (existingAuditToUpdate) {
328
+ // Update existing record with new scan results
329
+ existingAuditToUpdate.results = results;
330
+ existingAuditToUpdate.checks = results.checks ?? null;
331
+ existingAuditToUpdate.issues = results.issues ?? null;
332
+ existingAuditToUpdate.hasErrors = hasErrorsInResults(results);
333
+ existingAuditToUpdate.totalChecks = results.total_checks ?? null;
334
+ existingAuditToUpdate.passedChecks = results.passed_checks ?? null;
335
+ existingAuditToUpdate.failedChecks = results.failed_checks ?? null;
336
+ existingAuditToUpdate.scannerVersion = currentScannerVersion ?? null;
337
+ existingAuditToUpdate.metadata = auditMetadata;
338
+ existingAuditToUpdate.updatedAt = Date.now();
339
+ if (revisionInfo.contentHash) {
340
+ existingAuditToUpdate.contentHash = revisionInfo.contentHash;
341
+ }
342
+ await existingAuditToUpdate.save();
343
+ audit = existingAuditToUpdate;
344
+ }
345
+ else {
346
+ audit = await modelAudit_1.default.create({
347
+ name: options.name || `Model scan ${new Date().toISOString()}`,
348
+ author: (0, accounts_1.getAuthor)() || undefined,
349
+ modelPath: paths.join(', '),
350
+ results,
351
+ metadata: auditMetadata,
352
+ scannerVersion: currentScannerVersion || undefined,
353
+ ...revisionInfo,
354
+ });
355
+ }
300
356
  // Display summary to user (unless they requested JSON format)
301
357
  if (options.format !== 'json') {
302
358
  logger_1.default.info('\n' + chalk_1.default.bold('Model Audit Summary'));
@@ -340,6 +396,12 @@ function modelScanCommand(program) {
340
396
  }
341
397
  logger_1.default.info(`\nScanned ${results.files_scanned ?? 0} files (${((results.bytes_scanned ?? 0) / 1024 / 1024).toFixed(2)} MB)`);
342
398
  logger_1.default.info(`Duration: ${((results.duration ?? 0) / 1000).toFixed(2)} seconds`);
399
+ if (currentScannerVersion) {
400
+ logger_1.default.debug(`Scanner version: ${currentScannerVersion}`);
401
+ }
402
+ if (existingAuditToUpdate) {
403
+ logger_1.default.debug(`Updated existing audit record: ${audit.id}`);
404
+ }
343
405
  logger_1.default.info(chalk_1.default.green(`\n✓ Results saved to database with ID: ${audit.id}`));
344
406
  }
345
407
  // Save to file if requested
@@ -13,4 +13,10 @@ export declare function closeDb(): void;
13
13
  * Check if the database is currently open
14
14
  */
15
15
  export declare function isDbOpen(): boolean;
16
+ /**
17
+ * Close database connection if it's currently open
18
+ * Safe to call even if database was never opened
19
+ * Should be called during graceful shutdown to prevent event loop hanging
20
+ */
21
+ export declare function closeDbIfOpen(): void;
16
22
  //# sourceMappingURL=index.d.ts.map
@@ -42,6 +42,7 @@ exports.getDbSignalPath = getDbSignalPath;
42
42
  exports.getDb = getDb;
43
43
  exports.closeDb = closeDb;
44
44
  exports.isDbOpen = isDbOpen;
45
+ exports.closeDbIfOpen = closeDbIfOpen;
45
46
  const better_sqlite3_1 = __importDefault(require("better-sqlite3"));
46
47
  const better_sqlite3_2 = require("drizzle-orm/better-sqlite3");
47
48
  const logger_1 = require("drizzle-orm/logger");
@@ -136,4 +137,14 @@ function closeDb() {
136
137
  function isDbOpen() {
137
138
  return sqliteInstance !== null && dbInstance !== null;
138
139
  }
140
+ /**
141
+ * Close database connection if it's currently open
142
+ * Safe to call even if database was never opened
143
+ * Should be called during graceful shutdown to prevent event loop hanging
144
+ */
145
+ function closeDbIfOpen() {
146
+ if (sqliteInstance) {
147
+ closeDb();
148
+ }
149
+ }
139
150
  //# sourceMappingURL=index.js.map