promptfoo 0.119.13 → 0.119.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +28 -26
- package/dist/src/app/assets/index-eJ2lMe94.js +51 -0
- package/dist/src/app/assets/{source-map-support-Bnh0UQ2S.js → source-map-support-1v4oeb7P.js} +1 -1
- package/dist/src/app/assets/sync-CtLQRuC1.js +1 -0
- package/dist/src/app/assets/{vendor-charts-T60Uk0Z3.js → vendor-charts-DnVv66VV.js} +1 -1
- package/dist/src/app/assets/{vendor-markdown-DLig-KJh.js → vendor-markdown-DCpQIyMA.js} +1 -1
- package/dist/src/app/assets/{vendor-mui-core-5BLaiG3c.js → vendor-mui-core-Boqnpf9f.js} +1 -1
- package/dist/src/app/assets/{vendor-mui-icons-fn39Fu2e.js → vendor-mui-icons-B8MqoVbj.js} +1 -1
- package/dist/src/app/assets/vendor-mui-x-CGSS6QHF.js +45 -0
- package/dist/src/app/assets/{vendor-utils-DYBMEuwX.js → vendor-utils-DdfHIEy8.js} +1 -1
- package/dist/src/app/index.html +7 -7
- package/dist/src/assertions/guardrails.d.ts +1 -1
- package/dist/src/assertions/guardrails.js +18 -9
- package/dist/src/assertions/index.d.ts +1 -1
- package/dist/src/assertions/index.js +9 -3
- package/dist/src/assertions/searchRubric.d.ts +3 -0
- package/dist/src/assertions/searchRubric.js +18 -0
- package/dist/src/commands/eval.js +1 -1
- package/dist/src/commands/modelScan.d.ts +7 -1
- package/dist/src/commands/modelScan.js +121 -59
- package/dist/src/database/index.d.ts +6 -0
- package/dist/src/database/index.js +11 -0
- package/dist/src/database/tables.d.ts +46 -24
- package/dist/src/envars.d.ts +17 -0
- package/dist/src/generated/constants.js +1 -1
- package/dist/src/logger.d.ts +5 -0
- package/dist/src/logger.js +28 -0
- package/dist/src/main.js +17 -6
- package/dist/src/matchers.d.ts +1 -0
- package/dist/src/matchers.js +80 -0
- package/dist/src/models/eval.d.ts +2 -1
- package/dist/src/models/eval.js +44 -2
- package/dist/src/prompts/grading.d.ts +1 -0
- package/dist/src/prompts/grading.js +26 -1
- package/dist/src/prompts/index.d.ts +1 -0
- package/dist/src/prompts/index.js +4 -1
- package/dist/src/providers/adaline.gateway.js +2 -2
- package/dist/src/providers/anthropic/defaults.d.ts +1 -1
- package/dist/src/providers/anthropic/defaults.js +15 -0
- package/dist/src/providers/azure/chat.d.ts +3 -1
- package/dist/src/providers/azure/chat.js +16 -3
- package/dist/src/providers/azure/defaults.js +660 -141
- package/dist/src/providers/azure/responses.d.ts +5 -0
- package/dist/src/providers/azure/responses.js +33 -4
- package/dist/src/providers/azure/types.d.ts +4 -0
- package/dist/src/providers/bedrock/agents.d.ts +1 -1
- package/dist/src/providers/bedrock/agents.js +2 -2
- package/dist/src/providers/bedrock/base.d.ts +40 -0
- package/dist/src/providers/bedrock/base.js +171 -0
- package/dist/src/providers/bedrock/converse.d.ts +146 -0
- package/dist/src/providers/bedrock/converse.js +1044 -0
- package/dist/src/providers/bedrock/index.d.ts +1 -34
- package/dist/src/providers/bedrock/index.js +4 -159
- package/dist/src/providers/bedrock/knowledgeBase.d.ts +1 -1
- package/dist/src/providers/bedrock/knowledgeBase.js +2 -2
- package/dist/src/providers/bedrock/nova-sonic.d.ts +2 -1
- package/dist/src/providers/bedrock/nova-sonic.js +2 -2
- package/dist/src/providers/claude-agent-sdk.d.ts +58 -1
- package/dist/src/providers/claude-agent-sdk.js +22 -1
- package/dist/src/providers/defaults.js +4 -0
- package/dist/src/providers/github/defaults.js +6 -6
- package/dist/src/providers/google/types.d.ts +25 -0
- package/dist/src/providers/google/util.d.ts +2 -0
- package/dist/src/providers/google/vertex.js +78 -22
- package/dist/src/providers/{groq.d.ts → groq/chat.d.ts} +26 -20
- package/dist/src/providers/groq/chat.js +79 -0
- package/dist/src/providers/groq/index.d.ts +5 -0
- package/dist/src/providers/groq/index.js +24 -0
- package/dist/src/providers/groq/responses.d.ts +106 -0
- package/dist/src/providers/groq/responses.js +64 -0
- package/dist/src/providers/groq/types.d.ts +44 -0
- package/dist/src/providers/groq/types.js +3 -0
- package/dist/src/providers/groq/util.d.ts +15 -0
- package/dist/src/providers/groq/util.js +28 -0
- package/dist/src/providers/mcp/client.d.ts +8 -0
- package/dist/src/providers/mcp/client.js +60 -10
- package/dist/src/providers/mcp/types.d.ts +21 -0
- package/dist/src/providers/openai/chatkit-pool.d.ts +114 -0
- package/dist/src/providers/openai/chatkit-pool.js +548 -0
- package/dist/src/providers/openai/chatkit-types.d.ts +73 -0
- package/dist/src/providers/openai/chatkit-types.js +3 -0
- package/dist/src/providers/openai/chatkit.d.ts +76 -0
- package/dist/src/providers/openai/chatkit.js +879 -0
- package/dist/src/providers/openai/codex-sdk.d.ts +109 -0
- package/dist/src/providers/openai/codex-sdk.js +346 -0
- package/dist/src/providers/openai/defaults.d.ts +2 -0
- package/dist/src/providers/openai/defaults.js +10 -4
- package/dist/src/providers/registry.js +48 -9
- package/dist/src/providers/responses/types.d.ts +1 -1
- package/dist/src/providers/sagemaker.d.ts +2 -2
- package/dist/src/providers/webSearchUtils.d.ts +17 -0
- package/dist/src/providers/webSearchUtils.js +169 -0
- package/dist/src/providers/xai/chat.d.ts +61 -0
- package/dist/src/providers/xai/chat.js +68 -3
- package/dist/src/providers/xai/responses.d.ts +189 -0
- package/dist/src/providers/xai/responses.js +268 -0
- package/dist/src/redteam/constants/plugins.d.ts +1 -1
- package/dist/src/redteam/constants/plugins.js +1 -1
- package/dist/src/redteam/constants/strategies.d.ts +1 -1
- package/dist/src/redteam/constants/strategies.js +1 -0
- package/dist/src/redteam/plugins/vlguard.d.ts +53 -4
- package/dist/src/redteam/plugins/vlguard.js +362 -46
- package/dist/src/redteam/providers/constants.d.ts +2 -2
- package/dist/src/redteam/providers/constants.js +2 -2
- package/dist/src/redteam/providers/crescendo/index.d.ts +1 -1
- package/dist/src/redteam/providers/crescendo/index.js +5 -3
- package/dist/src/redteam/providers/hydra/index.js +1 -1
- package/dist/src/server/routes/modelAudit.js +4 -4
- package/dist/src/share.js +4 -2
- package/dist/src/telemetry.js +44 -8
- package/dist/src/types/env.d.ts +3 -0
- package/dist/src/types/env.js +1 -0
- package/dist/src/types/index.d.ts +896 -615
- package/dist/src/types/index.js +1 -0
- package/dist/src/types/providers.d.ts +1 -0
- package/dist/src/types/tracing.d.ts +3 -0
- package/dist/src/util/database.d.ts +6 -4
- package/dist/src/util/file.js +6 -4
- package/dist/src/util/modelAuditCliParser.d.ts +4 -4
- package/dist/src/util/xlsx.js +52 -26
- package/dist/src/validators/providers.d.ts +142 -122
- package/dist/src/validators/providers.js +4 -6
- package/dist/src/validators/redteam.d.ts +36 -28
- package/dist/src/validators/redteam.js +9 -3
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +28 -26
- package/dist/drizzle/CLAUDE.md +0 -65
- package/dist/src/app/assets/index-DifT6VGT.js +0 -51
- package/dist/src/app/assets/sync-Oo-W_Rbj.js +0 -1
- package/dist/src/app/assets/vendor-mui-x-C2xF-yiO.js +0 -45
- package/dist/src/providers/groq.js +0 -48
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import type { OpenAiSharedOptions } from './types';
|
|
2
|
+
/**
|
|
3
|
+
* Configuration options for the OpenAI ChatKit Provider
|
|
4
|
+
*/
|
|
5
|
+
export interface OpenAiChatKitOptions extends OpenAiSharedOptions {
|
|
6
|
+
/**
|
|
7
|
+
* The ChatKit workflow ID from Agent Builder (e.g., wf_xxxx)
|
|
8
|
+
*/
|
|
9
|
+
workflowId?: string;
|
|
10
|
+
/**
|
|
11
|
+
* The version of the workflow to use
|
|
12
|
+
*/
|
|
13
|
+
version?: string;
|
|
14
|
+
/**
|
|
15
|
+
* User identifier for the ChatKit session.
|
|
16
|
+
* If not set, a unique ID with timestamp is generated.
|
|
17
|
+
* @default 'promptfoo-eval-<timestamp>'
|
|
18
|
+
*/
|
|
19
|
+
userId?: string;
|
|
20
|
+
/**
|
|
21
|
+
* Timeout in milliseconds for waiting for a response
|
|
22
|
+
* @default 120000 (2 minutes)
|
|
23
|
+
*/
|
|
24
|
+
timeout?: number;
|
|
25
|
+
/**
|
|
26
|
+
* Run the browser in headless mode
|
|
27
|
+
* @default true
|
|
28
|
+
*/
|
|
29
|
+
headless?: boolean;
|
|
30
|
+
/**
|
|
31
|
+
* Port for the local HTTP server serving the ChatKit HTML
|
|
32
|
+
* @default 0 (random available port)
|
|
33
|
+
*/
|
|
34
|
+
serverPort?: number;
|
|
35
|
+
/**
|
|
36
|
+
* Use a shared browser pool for better concurrency support.
|
|
37
|
+
* When enabled, a single browser with multiple contexts is used
|
|
38
|
+
* instead of spawning separate browsers per test.
|
|
39
|
+
* @default true
|
|
40
|
+
*/
|
|
41
|
+
usePool?: boolean;
|
|
42
|
+
/**
|
|
43
|
+
* Maximum number of concurrent browser contexts when using pool mode.
|
|
44
|
+
* Only applies when usePool is true.
|
|
45
|
+
* If not specified, defaults to the value of --max-concurrency (or 4).
|
|
46
|
+
*/
|
|
47
|
+
poolSize?: number;
|
|
48
|
+
/**
|
|
49
|
+
* How to handle workflow approval steps.
|
|
50
|
+
* - 'auto-approve': Automatically click approve button when detected
|
|
51
|
+
* - 'auto-reject': Automatically click reject button when detected
|
|
52
|
+
* - 'skip': Don't interact with approvals, capture the approval prompt as output
|
|
53
|
+
* @default 'auto-approve'
|
|
54
|
+
*/
|
|
55
|
+
approvalHandling?: 'auto-approve' | 'auto-reject' | 'skip';
|
|
56
|
+
/**
|
|
57
|
+
* Maximum number of approval steps to process per message.
|
|
58
|
+
* Prevents infinite loops if workflow has chained approvals.
|
|
59
|
+
* @default 5
|
|
60
|
+
*/
|
|
61
|
+
maxApprovals?: number;
|
|
62
|
+
/**
|
|
63
|
+
* Enable stateful/multi-turn conversation mode.
|
|
64
|
+
* When enabled:
|
|
65
|
+
* - The browser page is kept alive between calls
|
|
66
|
+
* - First message uses newThread: true, subsequent messages use newThread: false
|
|
67
|
+
* - Requires --max-concurrency 1 for reliable behavior
|
|
68
|
+
* - Useful for workflows that ask follow-up questions
|
|
69
|
+
* @default false
|
|
70
|
+
*/
|
|
71
|
+
stateful?: boolean;
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=chatkit-types.d.ts.map
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI ChatKit Provider
|
|
3
|
+
*
|
|
4
|
+
* Evaluates ChatKit workflows deployed via Agent Builder using Playwright
|
|
5
|
+
* to interact with the ChatKit web component.
|
|
6
|
+
*
|
|
7
|
+
* ChatKit workflows created in OpenAI's Agent Builder don't expose a direct
|
|
8
|
+
* REST API for sending messages. Instead, they require interaction through
|
|
9
|
+
* the ChatKit web component, which this provider automates using Playwright.
|
|
10
|
+
*
|
|
11
|
+
* Prerequisites:
|
|
12
|
+
* - Playwright installed: npm install playwright && npx playwright install chromium
|
|
13
|
+
* - OPENAI_API_KEY environment variable set
|
|
14
|
+
*
|
|
15
|
+
* Usage:
|
|
16
|
+
* providers:
|
|
17
|
+
* - id: openai:chatkit:wf_68ffb83dbfc88190a38103c2bb9f421003f913035dbdb131
|
|
18
|
+
* config:
|
|
19
|
+
* version: '3' # Optional: workflow version
|
|
20
|
+
* timeout: 120000 # Optional: response timeout in ms (default: 120000)
|
|
21
|
+
* headless: true # Optional: run browser headless (default: true)
|
|
22
|
+
*
|
|
23
|
+
* Performance Notes:
|
|
24
|
+
* - Each evaluation spawns a browser instance, so it's slower than REST APIs
|
|
25
|
+
* - For reliable results, use --max-concurrency 1 to avoid resource contention
|
|
26
|
+
* - First test may be slower due to browser launch and ChatKit initialization
|
|
27
|
+
*
|
|
28
|
+
* Troubleshooting:
|
|
29
|
+
* - "Playwright not found": Run `npx playwright install chromium`
|
|
30
|
+
* - Timeout errors: Increase timeout config or use --max-concurrency 1
|
|
31
|
+
* - Empty responses: The workflow may not generate text for some inputs
|
|
32
|
+
*/
|
|
33
|
+
import { OpenAiGenericProvider } from './index';
|
|
34
|
+
import type { OpenAiChatKitOptions } from './chatkit-types';
|
|
35
|
+
import type { CallApiContextParams, CallApiOptionsParams, ProviderResponse } from '../../types/index';
|
|
36
|
+
import type { EnvOverrides } from '../../types/env';
|
|
37
|
+
export declare class OpenAiChatKitProvider extends OpenAiGenericProvider {
|
|
38
|
+
private chatKitConfig;
|
|
39
|
+
private browser;
|
|
40
|
+
private context;
|
|
41
|
+
private page;
|
|
42
|
+
private server;
|
|
43
|
+
private serverPort;
|
|
44
|
+
private initialized;
|
|
45
|
+
private static defaultUserId;
|
|
46
|
+
private static getDefaultUserId;
|
|
47
|
+
constructor(workflowId: string, options?: {
|
|
48
|
+
config?: OpenAiChatKitOptions;
|
|
49
|
+
id?: string;
|
|
50
|
+
env?: EnvOverrides;
|
|
51
|
+
});
|
|
52
|
+
id(): string;
|
|
53
|
+
toString(): string;
|
|
54
|
+
/**
|
|
55
|
+
* Initialize the browser and ChatKit page
|
|
56
|
+
*/
|
|
57
|
+
private initialize;
|
|
58
|
+
/**
|
|
59
|
+
* Shutdown method for providerRegistry cleanup
|
|
60
|
+
*/
|
|
61
|
+
shutdown(): Promise<void>;
|
|
62
|
+
/**
|
|
63
|
+
* Clean up browser resources
|
|
64
|
+
*/
|
|
65
|
+
cleanup(): Promise<void>;
|
|
66
|
+
/**
|
|
67
|
+
* Call the ChatKit workflow with the given prompt
|
|
68
|
+
*/
|
|
69
|
+
callApi(prompt: string, _context?: CallApiContextParams, _callApiOptions?: CallApiOptionsParams): Promise<ProviderResponse>;
|
|
70
|
+
/**
|
|
71
|
+
* Pool-based callApi for better concurrency support.
|
|
72
|
+
* Uses a shared browser with multiple contexts instead of separate browsers.
|
|
73
|
+
*/
|
|
74
|
+
private callApiWithPool;
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=chatkit.d.ts.map
|