promptfoo 0.119.13 → 0.119.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/dist/package.json +28 -26
  2. package/dist/src/app/assets/index-eJ2lMe94.js +51 -0
  3. package/dist/src/app/assets/{source-map-support-Bnh0UQ2S.js → source-map-support-1v4oeb7P.js} +1 -1
  4. package/dist/src/app/assets/sync-CtLQRuC1.js +1 -0
  5. package/dist/src/app/assets/{vendor-charts-T60Uk0Z3.js → vendor-charts-DnVv66VV.js} +1 -1
  6. package/dist/src/app/assets/{vendor-markdown-DLig-KJh.js → vendor-markdown-DCpQIyMA.js} +1 -1
  7. package/dist/src/app/assets/{vendor-mui-core-5BLaiG3c.js → vendor-mui-core-Boqnpf9f.js} +1 -1
  8. package/dist/src/app/assets/{vendor-mui-icons-fn39Fu2e.js → vendor-mui-icons-B8MqoVbj.js} +1 -1
  9. package/dist/src/app/assets/vendor-mui-x-CGSS6QHF.js +45 -0
  10. package/dist/src/app/assets/{vendor-utils-DYBMEuwX.js → vendor-utils-DdfHIEy8.js} +1 -1
  11. package/dist/src/app/index.html +7 -7
  12. package/dist/src/assertions/guardrails.d.ts +1 -1
  13. package/dist/src/assertions/guardrails.js +18 -9
  14. package/dist/src/assertions/index.d.ts +1 -1
  15. package/dist/src/assertions/index.js +9 -3
  16. package/dist/src/assertions/searchRubric.d.ts +3 -0
  17. package/dist/src/assertions/searchRubric.js +18 -0
  18. package/dist/src/commands/eval.js +1 -1
  19. package/dist/src/commands/modelScan.d.ts +7 -1
  20. package/dist/src/commands/modelScan.js +121 -59
  21. package/dist/src/database/index.d.ts +6 -0
  22. package/dist/src/database/index.js +11 -0
  23. package/dist/src/database/tables.d.ts +46 -24
  24. package/dist/src/envars.d.ts +17 -0
  25. package/dist/src/generated/constants.js +1 -1
  26. package/dist/src/logger.d.ts +5 -0
  27. package/dist/src/logger.js +28 -0
  28. package/dist/src/main.js +17 -6
  29. package/dist/src/matchers.d.ts +1 -0
  30. package/dist/src/matchers.js +80 -0
  31. package/dist/src/models/eval.d.ts +2 -1
  32. package/dist/src/models/eval.js +44 -2
  33. package/dist/src/prompts/grading.d.ts +1 -0
  34. package/dist/src/prompts/grading.js +26 -1
  35. package/dist/src/prompts/index.d.ts +1 -0
  36. package/dist/src/prompts/index.js +4 -1
  37. package/dist/src/providers/adaline.gateway.js +2 -2
  38. package/dist/src/providers/anthropic/defaults.d.ts +1 -1
  39. package/dist/src/providers/anthropic/defaults.js +15 -0
  40. package/dist/src/providers/azure/chat.d.ts +3 -1
  41. package/dist/src/providers/azure/chat.js +16 -3
  42. package/dist/src/providers/azure/defaults.js +660 -141
  43. package/dist/src/providers/azure/responses.d.ts +5 -0
  44. package/dist/src/providers/azure/responses.js +33 -4
  45. package/dist/src/providers/azure/types.d.ts +4 -0
  46. package/dist/src/providers/bedrock/agents.d.ts +1 -1
  47. package/dist/src/providers/bedrock/agents.js +2 -2
  48. package/dist/src/providers/bedrock/base.d.ts +40 -0
  49. package/dist/src/providers/bedrock/base.js +171 -0
  50. package/dist/src/providers/bedrock/converse.d.ts +146 -0
  51. package/dist/src/providers/bedrock/converse.js +1044 -0
  52. package/dist/src/providers/bedrock/index.d.ts +1 -34
  53. package/dist/src/providers/bedrock/index.js +4 -159
  54. package/dist/src/providers/bedrock/knowledgeBase.d.ts +1 -1
  55. package/dist/src/providers/bedrock/knowledgeBase.js +2 -2
  56. package/dist/src/providers/bedrock/nova-sonic.d.ts +2 -1
  57. package/dist/src/providers/bedrock/nova-sonic.js +2 -2
  58. package/dist/src/providers/claude-agent-sdk.d.ts +58 -1
  59. package/dist/src/providers/claude-agent-sdk.js +22 -1
  60. package/dist/src/providers/defaults.js +4 -0
  61. package/dist/src/providers/github/defaults.js +6 -6
  62. package/dist/src/providers/google/types.d.ts +25 -0
  63. package/dist/src/providers/google/util.d.ts +2 -0
  64. package/dist/src/providers/google/vertex.js +78 -22
  65. package/dist/src/providers/{groq.d.ts → groq/chat.d.ts} +26 -20
  66. package/dist/src/providers/groq/chat.js +79 -0
  67. package/dist/src/providers/groq/index.d.ts +5 -0
  68. package/dist/src/providers/groq/index.js +24 -0
  69. package/dist/src/providers/groq/responses.d.ts +106 -0
  70. package/dist/src/providers/groq/responses.js +64 -0
  71. package/dist/src/providers/groq/types.d.ts +44 -0
  72. package/dist/src/providers/groq/types.js +3 -0
  73. package/dist/src/providers/groq/util.d.ts +15 -0
  74. package/dist/src/providers/groq/util.js +28 -0
  75. package/dist/src/providers/mcp/client.d.ts +8 -0
  76. package/dist/src/providers/mcp/client.js +60 -10
  77. package/dist/src/providers/mcp/types.d.ts +21 -0
  78. package/dist/src/providers/openai/chatkit-pool.d.ts +114 -0
  79. package/dist/src/providers/openai/chatkit-pool.js +548 -0
  80. package/dist/src/providers/openai/chatkit-types.d.ts +73 -0
  81. package/dist/src/providers/openai/chatkit-types.js +3 -0
  82. package/dist/src/providers/openai/chatkit.d.ts +76 -0
  83. package/dist/src/providers/openai/chatkit.js +879 -0
  84. package/dist/src/providers/openai/codex-sdk.d.ts +109 -0
  85. package/dist/src/providers/openai/codex-sdk.js +346 -0
  86. package/dist/src/providers/openai/defaults.d.ts +2 -0
  87. package/dist/src/providers/openai/defaults.js +10 -4
  88. package/dist/src/providers/registry.js +48 -9
  89. package/dist/src/providers/responses/types.d.ts +1 -1
  90. package/dist/src/providers/sagemaker.d.ts +2 -2
  91. package/dist/src/providers/webSearchUtils.d.ts +17 -0
  92. package/dist/src/providers/webSearchUtils.js +169 -0
  93. package/dist/src/providers/xai/chat.d.ts +61 -0
  94. package/dist/src/providers/xai/chat.js +68 -3
  95. package/dist/src/providers/xai/responses.d.ts +189 -0
  96. package/dist/src/providers/xai/responses.js +268 -0
  97. package/dist/src/redteam/constants/plugins.d.ts +1 -1
  98. package/dist/src/redteam/constants/plugins.js +1 -1
  99. package/dist/src/redteam/constants/strategies.d.ts +1 -1
  100. package/dist/src/redteam/constants/strategies.js +1 -0
  101. package/dist/src/redteam/plugins/vlguard.d.ts +53 -4
  102. package/dist/src/redteam/plugins/vlguard.js +362 -46
  103. package/dist/src/redteam/providers/constants.d.ts +2 -2
  104. package/dist/src/redteam/providers/constants.js +2 -2
  105. package/dist/src/redteam/providers/crescendo/index.d.ts +1 -1
  106. package/dist/src/redteam/providers/crescendo/index.js +5 -3
  107. package/dist/src/redteam/providers/hydra/index.js +1 -1
  108. package/dist/src/server/routes/modelAudit.js +4 -4
  109. package/dist/src/share.js +4 -2
  110. package/dist/src/telemetry.js +44 -8
  111. package/dist/src/types/env.d.ts +3 -0
  112. package/dist/src/types/env.js +1 -0
  113. package/dist/src/types/index.d.ts +896 -615
  114. package/dist/src/types/index.js +1 -0
  115. package/dist/src/types/providers.d.ts +1 -0
  116. package/dist/src/types/tracing.d.ts +3 -0
  117. package/dist/src/util/database.d.ts +6 -4
  118. package/dist/src/util/file.js +6 -4
  119. package/dist/src/util/modelAuditCliParser.d.ts +4 -4
  120. package/dist/src/util/xlsx.js +52 -26
  121. package/dist/src/validators/providers.d.ts +142 -122
  122. package/dist/src/validators/providers.js +4 -6
  123. package/dist/src/validators/redteam.d.ts +36 -28
  124. package/dist/src/validators/redteam.js +9 -3
  125. package/dist/tsconfig.tsbuildinfo +1 -1
  126. package/package.json +28 -26
  127. package/dist/drizzle/CLAUDE.md +0 -65
  128. package/dist/src/app/assets/index-DifT6VGT.js +0 -51
  129. package/dist/src/app/assets/sync-Oo-W_Rbj.js +0 -1
  130. package/dist/src/app/assets/vendor-mui-x-C2xF-yiO.js +0 -45
  131. package/dist/src/providers/groq.js +0 -48
@@ -0,0 +1,73 @@
1
+ import type { OpenAiSharedOptions } from './types';
2
+ /**
3
+ * Configuration options for the OpenAI ChatKit Provider
4
+ */
5
+ export interface OpenAiChatKitOptions extends OpenAiSharedOptions {
6
+ /**
7
+ * The ChatKit workflow ID from Agent Builder (e.g., wf_xxxx)
8
+ */
9
+ workflowId?: string;
10
+ /**
11
+ * The version of the workflow to use
12
+ */
13
+ version?: string;
14
+ /**
15
+ * User identifier for the ChatKit session.
16
+ * If not set, a unique ID with timestamp is generated.
17
+ * @default 'promptfoo-eval-<timestamp>'
18
+ */
19
+ userId?: string;
20
+ /**
21
+ * Timeout in milliseconds for waiting for a response
22
+ * @default 120000 (2 minutes)
23
+ */
24
+ timeout?: number;
25
+ /**
26
+ * Run the browser in headless mode
27
+ * @default true
28
+ */
29
+ headless?: boolean;
30
+ /**
31
+ * Port for the local HTTP server serving the ChatKit HTML
32
+ * @default 0 (random available port)
33
+ */
34
+ serverPort?: number;
35
+ /**
36
+ * Use a shared browser pool for better concurrency support.
37
+ * When enabled, a single browser with multiple contexts is used
38
+ * instead of spawning separate browsers per test.
39
+ * @default true
40
+ */
41
+ usePool?: boolean;
42
+ /**
43
+ * Maximum number of concurrent browser contexts when using pool mode.
44
+ * Only applies when usePool is true.
45
+ * If not specified, defaults to the value of --max-concurrency (or 4).
46
+ */
47
+ poolSize?: number;
48
+ /**
49
+ * How to handle workflow approval steps.
50
+ * - 'auto-approve': Automatically click approve button when detected
51
+ * - 'auto-reject': Automatically click reject button when detected
52
+ * - 'skip': Don't interact with approvals, capture the approval prompt as output
53
+ * @default 'auto-approve'
54
+ */
55
+ approvalHandling?: 'auto-approve' | 'auto-reject' | 'skip';
56
+ /**
57
+ * Maximum number of approval steps to process per message.
58
+ * Prevents infinite loops if workflow has chained approvals.
59
+ * @default 5
60
+ */
61
+ maxApprovals?: number;
62
+ /**
63
+ * Enable stateful/multi-turn conversation mode.
64
+ * When enabled:
65
+ * - The browser page is kept alive between calls
66
+ * - First message uses newThread: true, subsequent messages use newThread: false
67
+ * - Requires --max-concurrency 1 for reliable behavior
68
+ * - Useful for workflows that ask follow-up questions
69
+ * @default false
70
+ */
71
+ stateful?: boolean;
72
+ }
73
+ //# sourceMappingURL=chatkit-types.d.ts.map
@@ -0,0 +1,3 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ //# sourceMappingURL=chatkit-types.js.map
@@ -0,0 +1,76 @@
1
+ /**
2
+ * OpenAI ChatKit Provider
3
+ *
4
+ * Evaluates ChatKit workflows deployed via Agent Builder using Playwright
5
+ * to interact with the ChatKit web component.
6
+ *
7
+ * ChatKit workflows created in OpenAI's Agent Builder don't expose a direct
8
+ * REST API for sending messages. Instead, they require interaction through
9
+ * the ChatKit web component, which this provider automates using Playwright.
10
+ *
11
+ * Prerequisites:
12
+ * - Playwright installed: npm install playwright && npx playwright install chromium
13
+ * - OPENAI_API_KEY environment variable set
14
+ *
15
+ * Usage:
16
+ * providers:
17
+ * - id: openai:chatkit:wf_68ffb83dbfc88190a38103c2bb9f421003f913035dbdb131
18
+ * config:
19
+ * version: '3' # Optional: workflow version
20
+ * timeout: 120000 # Optional: response timeout in ms (default: 120000)
21
+ * headless: true # Optional: run browser headless (default: true)
22
+ *
23
+ * Performance Notes:
24
+ * - Each evaluation spawns a browser instance, so it's slower than REST APIs
25
+ * - For reliable results, use --max-concurrency 1 to avoid resource contention
26
+ * - First test may be slower due to browser launch and ChatKit initialization
27
+ *
28
+ * Troubleshooting:
29
+ * - "Playwright not found": Run `npx playwright install chromium`
30
+ * - Timeout errors: Increase timeout config or use --max-concurrency 1
31
+ * - Empty responses: The workflow may not generate text for some inputs
32
+ */
33
+ import { OpenAiGenericProvider } from './index';
34
+ import type { OpenAiChatKitOptions } from './chatkit-types';
35
+ import type { CallApiContextParams, CallApiOptionsParams, ProviderResponse } from '../../types/index';
36
+ import type { EnvOverrides } from '../../types/env';
37
+ export declare class OpenAiChatKitProvider extends OpenAiGenericProvider {
38
+ private chatKitConfig;
39
+ private browser;
40
+ private context;
41
+ private page;
42
+ private server;
43
+ private serverPort;
44
+ private initialized;
45
+ private static defaultUserId;
46
+ private static getDefaultUserId;
47
+ constructor(workflowId: string, options?: {
48
+ config?: OpenAiChatKitOptions;
49
+ id?: string;
50
+ env?: EnvOverrides;
51
+ });
52
+ id(): string;
53
+ toString(): string;
54
+ /**
55
+ * Initialize the browser and ChatKit page
56
+ */
57
+ private initialize;
58
+ /**
59
+ * Shutdown method for providerRegistry cleanup
60
+ */
61
+ shutdown(): Promise<void>;
62
+ /**
63
+ * Clean up browser resources
64
+ */
65
+ cleanup(): Promise<void>;
66
+ /**
67
+ * Call the ChatKit workflow with the given prompt
68
+ */
69
+ callApi(prompt: string, _context?: CallApiContextParams, _callApiOptions?: CallApiOptionsParams): Promise<ProviderResponse>;
70
+ /**
71
+ * Pool-based callApi for better concurrency support.
72
+ * Uses a shared browser with multiple contexts instead of separate browsers.
73
+ */
74
+ private callApiWithPool;
75
+ }
76
+ //# sourceMappingURL=chatkit.d.ts.map