promptfoo 0.119.13 → 0.119.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +28 -26
- package/dist/src/app/assets/index-eJ2lMe94.js +51 -0
- package/dist/src/app/assets/{source-map-support-Bnh0UQ2S.js → source-map-support-1v4oeb7P.js} +1 -1
- package/dist/src/app/assets/sync-CtLQRuC1.js +1 -0
- package/dist/src/app/assets/{vendor-charts-T60Uk0Z3.js → vendor-charts-DnVv66VV.js} +1 -1
- package/dist/src/app/assets/{vendor-markdown-DLig-KJh.js → vendor-markdown-DCpQIyMA.js} +1 -1
- package/dist/src/app/assets/{vendor-mui-core-5BLaiG3c.js → vendor-mui-core-Boqnpf9f.js} +1 -1
- package/dist/src/app/assets/{vendor-mui-icons-fn39Fu2e.js → vendor-mui-icons-B8MqoVbj.js} +1 -1
- package/dist/src/app/assets/vendor-mui-x-CGSS6QHF.js +45 -0
- package/dist/src/app/assets/{vendor-utils-DYBMEuwX.js → vendor-utils-DdfHIEy8.js} +1 -1
- package/dist/src/app/index.html +7 -7
- package/dist/src/assertions/guardrails.d.ts +1 -1
- package/dist/src/assertions/guardrails.js +18 -9
- package/dist/src/assertions/index.d.ts +1 -1
- package/dist/src/assertions/index.js +9 -3
- package/dist/src/assertions/searchRubric.d.ts +3 -0
- package/dist/src/assertions/searchRubric.js +18 -0
- package/dist/src/commands/eval.js +1 -1
- package/dist/src/commands/modelScan.d.ts +7 -1
- package/dist/src/commands/modelScan.js +121 -59
- package/dist/src/database/index.d.ts +6 -0
- package/dist/src/database/index.js +11 -0
- package/dist/src/database/tables.d.ts +46 -24
- package/dist/src/envars.d.ts +17 -0
- package/dist/src/generated/constants.js +1 -1
- package/dist/src/logger.d.ts +5 -0
- package/dist/src/logger.js +28 -0
- package/dist/src/main.js +17 -6
- package/dist/src/matchers.d.ts +1 -0
- package/dist/src/matchers.js +80 -0
- package/dist/src/models/eval.d.ts +2 -1
- package/dist/src/models/eval.js +44 -2
- package/dist/src/prompts/grading.d.ts +1 -0
- package/dist/src/prompts/grading.js +26 -1
- package/dist/src/prompts/index.d.ts +1 -0
- package/dist/src/prompts/index.js +4 -1
- package/dist/src/providers/adaline.gateway.js +2 -2
- package/dist/src/providers/anthropic/defaults.d.ts +1 -1
- package/dist/src/providers/anthropic/defaults.js +15 -0
- package/dist/src/providers/azure/chat.d.ts +3 -1
- package/dist/src/providers/azure/chat.js +16 -3
- package/dist/src/providers/azure/defaults.js +660 -141
- package/dist/src/providers/azure/responses.d.ts +5 -0
- package/dist/src/providers/azure/responses.js +33 -4
- package/dist/src/providers/azure/types.d.ts +4 -0
- package/dist/src/providers/bedrock/agents.d.ts +1 -1
- package/dist/src/providers/bedrock/agents.js +2 -2
- package/dist/src/providers/bedrock/base.d.ts +40 -0
- package/dist/src/providers/bedrock/base.js +171 -0
- package/dist/src/providers/bedrock/converse.d.ts +146 -0
- package/dist/src/providers/bedrock/converse.js +1044 -0
- package/dist/src/providers/bedrock/index.d.ts +1 -34
- package/dist/src/providers/bedrock/index.js +4 -159
- package/dist/src/providers/bedrock/knowledgeBase.d.ts +1 -1
- package/dist/src/providers/bedrock/knowledgeBase.js +2 -2
- package/dist/src/providers/bedrock/nova-sonic.d.ts +2 -1
- package/dist/src/providers/bedrock/nova-sonic.js +2 -2
- package/dist/src/providers/claude-agent-sdk.d.ts +58 -1
- package/dist/src/providers/claude-agent-sdk.js +22 -1
- package/dist/src/providers/defaults.js +4 -0
- package/dist/src/providers/github/defaults.js +6 -6
- package/dist/src/providers/google/types.d.ts +25 -0
- package/dist/src/providers/google/util.d.ts +2 -0
- package/dist/src/providers/google/vertex.js +78 -22
- package/dist/src/providers/{groq.d.ts → groq/chat.d.ts} +26 -20
- package/dist/src/providers/groq/chat.js +79 -0
- package/dist/src/providers/groq/index.d.ts +5 -0
- package/dist/src/providers/groq/index.js +24 -0
- package/dist/src/providers/groq/responses.d.ts +106 -0
- package/dist/src/providers/groq/responses.js +64 -0
- package/dist/src/providers/groq/types.d.ts +44 -0
- package/dist/src/providers/groq/types.js +3 -0
- package/dist/src/providers/groq/util.d.ts +15 -0
- package/dist/src/providers/groq/util.js +28 -0
- package/dist/src/providers/mcp/client.d.ts +8 -0
- package/dist/src/providers/mcp/client.js +60 -10
- package/dist/src/providers/mcp/types.d.ts +21 -0
- package/dist/src/providers/openai/chatkit-pool.d.ts +114 -0
- package/dist/src/providers/openai/chatkit-pool.js +548 -0
- package/dist/src/providers/openai/chatkit-types.d.ts +73 -0
- package/dist/src/providers/openai/chatkit-types.js +3 -0
- package/dist/src/providers/openai/chatkit.d.ts +76 -0
- package/dist/src/providers/openai/chatkit.js +879 -0
- package/dist/src/providers/openai/codex-sdk.d.ts +109 -0
- package/dist/src/providers/openai/codex-sdk.js +346 -0
- package/dist/src/providers/openai/defaults.d.ts +2 -0
- package/dist/src/providers/openai/defaults.js +10 -4
- package/dist/src/providers/registry.js +48 -9
- package/dist/src/providers/responses/types.d.ts +1 -1
- package/dist/src/providers/sagemaker.d.ts +2 -2
- package/dist/src/providers/webSearchUtils.d.ts +17 -0
- package/dist/src/providers/webSearchUtils.js +169 -0
- package/dist/src/providers/xai/chat.d.ts +61 -0
- package/dist/src/providers/xai/chat.js +68 -3
- package/dist/src/providers/xai/responses.d.ts +189 -0
- package/dist/src/providers/xai/responses.js +268 -0
- package/dist/src/redteam/constants/plugins.d.ts +1 -1
- package/dist/src/redteam/constants/plugins.js +1 -1
- package/dist/src/redteam/constants/strategies.d.ts +1 -1
- package/dist/src/redteam/constants/strategies.js +1 -0
- package/dist/src/redteam/plugins/vlguard.d.ts +53 -4
- package/dist/src/redteam/plugins/vlguard.js +362 -46
- package/dist/src/redteam/providers/constants.d.ts +2 -2
- package/dist/src/redteam/providers/constants.js +2 -2
- package/dist/src/redteam/providers/crescendo/index.d.ts +1 -1
- package/dist/src/redteam/providers/crescendo/index.js +5 -3
- package/dist/src/redteam/providers/hydra/index.js +1 -1
- package/dist/src/server/routes/modelAudit.js +4 -4
- package/dist/src/share.js +4 -2
- package/dist/src/telemetry.js +44 -8
- package/dist/src/types/env.d.ts +3 -0
- package/dist/src/types/env.js +1 -0
- package/dist/src/types/index.d.ts +896 -615
- package/dist/src/types/index.js +1 -0
- package/dist/src/types/providers.d.ts +1 -0
- package/dist/src/types/tracing.d.ts +3 -0
- package/dist/src/util/database.d.ts +6 -4
- package/dist/src/util/file.js +6 -4
- package/dist/src/util/modelAuditCliParser.d.ts +4 -4
- package/dist/src/util/xlsx.js +52 -26
- package/dist/src/validators/providers.d.ts +142 -122
- package/dist/src/validators/providers.js +4 -6
- package/dist/src/validators/redteam.d.ts +36 -28
- package/dist/src/validators/redteam.js +9 -3
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +28 -26
- package/dist/drizzle/CLAUDE.md +0 -65
- package/dist/src/app/assets/index-DifT6VGT.js +0 -51
- package/dist/src/app/assets/sync-Oo-W_Rbj.js +0 -1
- package/dist/src/app/assets/vendor-mui-x-C2xF-yiO.js +0 -45
- package/dist/src/providers/groq.js +0 -48
|
@@ -0,0 +1,879 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* OpenAI ChatKit Provider
|
|
4
|
+
*
|
|
5
|
+
* Evaluates ChatKit workflows deployed via Agent Builder using Playwright
|
|
6
|
+
* to interact with the ChatKit web component.
|
|
7
|
+
*
|
|
8
|
+
* ChatKit workflows created in OpenAI's Agent Builder don't expose a direct
|
|
9
|
+
* REST API for sending messages. Instead, they require interaction through
|
|
10
|
+
* the ChatKit web component, which this provider automates using Playwright.
|
|
11
|
+
*
|
|
12
|
+
* Prerequisites:
|
|
13
|
+
* - Playwright installed: npm install playwright && npx playwright install chromium
|
|
14
|
+
* - OPENAI_API_KEY environment variable set
|
|
15
|
+
*
|
|
16
|
+
* Usage:
|
|
17
|
+
* providers:
|
|
18
|
+
* - id: openai:chatkit:wf_68ffb83dbfc88190a38103c2bb9f421003f913035dbdb131
|
|
19
|
+
* config:
|
|
20
|
+
* version: '3' # Optional: workflow version
|
|
21
|
+
* timeout: 120000 # Optional: response timeout in ms (default: 120000)
|
|
22
|
+
* headless: true # Optional: run browser headless (default: true)
|
|
23
|
+
*
|
|
24
|
+
* Performance Notes:
|
|
25
|
+
* - Each evaluation spawns a browser instance, so it's slower than REST APIs
|
|
26
|
+
* - For reliable results, use --max-concurrency 1 to avoid resource contention
|
|
27
|
+
* - First test may be slower due to browser launch and ChatKit initialization
|
|
28
|
+
*
|
|
29
|
+
* Troubleshooting:
|
|
30
|
+
* - "Playwright not found": Run `npx playwright install chromium`
|
|
31
|
+
* - Timeout errors: Increase timeout config or use --max-concurrency 1
|
|
32
|
+
* - Empty responses: The workflow may not generate text for some inputs
|
|
33
|
+
*/
|
|
34
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
35
|
+
if (k2 === undefined) k2 = k;
|
|
36
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
37
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
38
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
39
|
+
}
|
|
40
|
+
Object.defineProperty(o, k2, desc);
|
|
41
|
+
}) : (function(o, m, k, k2) {
|
|
42
|
+
if (k2 === undefined) k2 = k;
|
|
43
|
+
o[k2] = m[k];
|
|
44
|
+
}));
|
|
45
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
46
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
47
|
+
}) : function(o, v) {
|
|
48
|
+
o["default"] = v;
|
|
49
|
+
});
|
|
50
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
51
|
+
var ownKeys = function(o) {
|
|
52
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
53
|
+
var ar = [];
|
|
54
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
55
|
+
return ar;
|
|
56
|
+
};
|
|
57
|
+
return ownKeys(o);
|
|
58
|
+
};
|
|
59
|
+
return function (mod) {
|
|
60
|
+
if (mod && mod.__esModule) return mod;
|
|
61
|
+
var result = {};
|
|
62
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
63
|
+
__setModuleDefault(result, mod);
|
|
64
|
+
return result;
|
|
65
|
+
};
|
|
66
|
+
})();
|
|
67
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
68
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
69
|
+
};
|
|
70
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
71
|
+
exports.OpenAiChatKitProvider = void 0;
|
|
72
|
+
const playwright_1 = require("playwright");
|
|
73
|
+
const http = __importStar(require("http"));
|
|
74
|
+
const logger_1 = __importDefault(require("../../logger"));
|
|
75
|
+
const providerRegistry_1 = require("../providerRegistry");
|
|
76
|
+
const index_1 = require("./index");
|
|
77
|
+
const chatkit_pool_1 = require("./chatkit-pool");
|
|
78
|
+
// Configuration constants
|
|
79
|
+
const DEFAULT_TIMEOUT_MS = 120000;
|
|
80
|
+
const DEFAULT_MAX_APPROVALS = 5;
|
|
81
|
+
const DEFAULT_POOL_SIZE = 4;
|
|
82
|
+
const CHATKIT_READY_TIMEOUT_MS = 60000;
|
|
83
|
+
const DOM_SETTLE_DELAY_MS = 2000;
|
|
84
|
+
const APPROVAL_PROCESS_DELAY_MS = 500;
|
|
85
|
+
const APPROVAL_CLICK_DELAY_MS = 1000;
|
|
86
|
+
const RESPONSE_EXTRACT_RETRY_DELAY_MS = 500;
|
|
87
|
+
// Note: MIN_RESPONSE_LENGTH (20), MIN_MESSAGE_LENGTH (30), MAX_INIT_ATTEMPTS (100),
|
|
88
|
+
// and INIT_POLL_INTERVAL_MS (100) are hardcoded in the HTML template string
|
|
89
|
+
// and in DOM evaluation functions where constants cannot be easily passed.
|
|
90
|
+
/**
|
|
91
|
+
* Check if a URL is from OpenAI's CDN by parsing the hostname.
|
|
92
|
+
* This is more secure than substring matching which could be bypassed.
|
|
93
|
+
*/
|
|
94
|
+
function isOpenAICdnUrl(url) {
|
|
95
|
+
try {
|
|
96
|
+
const parsed = new URL(url);
|
|
97
|
+
return parsed.hostname === 'cdn.platform.openai.com';
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
return false;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Validate workflowId format to prevent script injection
|
|
105
|
+
*/
|
|
106
|
+
function validateWorkflowId(workflowId) {
|
|
107
|
+
if (!workflowId || !/^wf_[a-zA-Z0-9]+$/.test(workflowId)) {
|
|
108
|
+
throw new Error(`Invalid workflowId format: ${workflowId}. Expected format: wf_<alphanumeric>`);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Validate version format to prevent script injection
|
|
113
|
+
*/
|
|
114
|
+
function validateVersion(version) {
|
|
115
|
+
if (!/^[a-zA-Z0-9._-]+$/.test(version)) {
|
|
116
|
+
throw new Error(`Invalid version format: ${version}. Only alphanumeric, dot, dash, and underscore allowed.`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Validate userId format to prevent script injection
|
|
121
|
+
*/
|
|
122
|
+
function validateUserId(userId) {
|
|
123
|
+
if (!/^[a-zA-Z0-9._@-]+$/.test(userId)) {
|
|
124
|
+
throw new Error(`Invalid userId format: ${userId}. Only alphanumeric, dot, dash, underscore, and @ allowed.`);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Generate the HTML page that hosts the ChatKit component
|
|
129
|
+
*/
|
|
130
|
+
function generateChatKitHTML(apiKey, workflowId, version, userId) {
|
|
131
|
+
// Validate inputs to prevent script injection
|
|
132
|
+
validateWorkflowId(workflowId);
|
|
133
|
+
if (version) {
|
|
134
|
+
validateVersion(version);
|
|
135
|
+
}
|
|
136
|
+
// userId is required - caller must provide it (constructor ensures this)
|
|
137
|
+
if (!userId) {
|
|
138
|
+
throw new Error('userId is required for ChatKit HTML generation');
|
|
139
|
+
}
|
|
140
|
+
validateUserId(userId);
|
|
141
|
+
const versionClause = version ? `, version: '${version}'` : '';
|
|
142
|
+
return `<!DOCTYPE html>
|
|
143
|
+
<html>
|
|
144
|
+
<head>
|
|
145
|
+
<meta charset="utf-8">
|
|
146
|
+
<title>ChatKit Eval</title>
|
|
147
|
+
</head>
|
|
148
|
+
<body>
|
|
149
|
+
<openai-chatkit id="chatkit"></openai-chatkit>
|
|
150
|
+
|
|
151
|
+
<script src="https://cdn.platform.openai.com/deployments/chatkit/chatkit.js"></script>
|
|
152
|
+
|
|
153
|
+
<script>
|
|
154
|
+
window.__state = { ready: false, responses: [], threadId: null, error: null };
|
|
155
|
+
|
|
156
|
+
async function init() {
|
|
157
|
+
const chatkit = document.getElementById('chatkit');
|
|
158
|
+
|
|
159
|
+
// Wait for element to be ready
|
|
160
|
+
let attempts = 0;
|
|
161
|
+
while (typeof chatkit.setOptions !== 'function' && attempts < 100) {
|
|
162
|
+
await new Promise(r => setTimeout(r, 100));
|
|
163
|
+
attempts++;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (typeof chatkit.setOptions !== 'function') {
|
|
167
|
+
window.__state.error = 'ChatKit component failed to initialize';
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
let cachedSecret = null;
|
|
172
|
+
|
|
173
|
+
chatkit.setOptions({
|
|
174
|
+
api: {
|
|
175
|
+
getClientSecret: async (existing) => {
|
|
176
|
+
if (existing) return existing;
|
|
177
|
+
if (cachedSecret) return cachedSecret;
|
|
178
|
+
|
|
179
|
+
const res = await fetch('https://api.openai.com/v1/chatkit/sessions', {
|
|
180
|
+
method: 'POST',
|
|
181
|
+
headers: {
|
|
182
|
+
'Authorization': 'Bearer ${apiKey}',
|
|
183
|
+
'Content-Type': 'application/json',
|
|
184
|
+
'OpenAI-Beta': 'chatkit_beta=v1'
|
|
185
|
+
},
|
|
186
|
+
body: JSON.stringify({
|
|
187
|
+
workflow: { id: '${workflowId}'${versionClause} },
|
|
188
|
+
user: '${userId}'
|
|
189
|
+
})
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
if (!res.ok) {
|
|
193
|
+
const text = await res.text();
|
|
194
|
+
throw new Error('Session failed: ' + res.status + ' ' + text);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const data = await res.json();
|
|
198
|
+
cachedSecret = data.client_secret;
|
|
199
|
+
return cachedSecret;
|
|
200
|
+
}
|
|
201
|
+
},
|
|
202
|
+
header: { enabled: false },
|
|
203
|
+
history: { enabled: false },
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
chatkit.addEventListener('chatkit.ready', () => {
|
|
207
|
+
window.__state.ready = true;
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
chatkit.addEventListener('chatkit.error', (e) => {
|
|
211
|
+
window.__state.error = e.detail.error?.message || 'Unknown error';
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
chatkit.addEventListener('chatkit.thread.change', (e) => {
|
|
215
|
+
window.__state.threadId = e.detail.threadId;
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
chatkit.addEventListener('chatkit.response.end', () => {
|
|
219
|
+
window.__state.responses.push({ timestamp: Date.now() });
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
window.__chatkit = chatkit;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
init().catch(e => {
|
|
226
|
+
window.__state.error = e.message;
|
|
227
|
+
});
|
|
228
|
+
</script>
|
|
229
|
+
</body>
|
|
230
|
+
</html>`;
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Extract assistant response text from the ChatKit iframe
|
|
234
|
+
* Uses retry logic since DOM may still be updating after response.end event
|
|
235
|
+
*/
|
|
236
|
+
async function extractResponseFromFrame(page, maxRetries = 3) {
|
|
237
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
238
|
+
const frames = page.frames();
|
|
239
|
+
for (const frame of frames) {
|
|
240
|
+
const url = frame.url();
|
|
241
|
+
if (isOpenAICdnUrl(url)) {
|
|
242
|
+
try {
|
|
243
|
+
const result = await frame.evaluate(() => {
|
|
244
|
+
// Helper to check if element is likely a user message
|
|
245
|
+
const isUserMessage = (el) => {
|
|
246
|
+
const className = el.className?.toString().toLowerCase() || '';
|
|
247
|
+
const role = el.getAttribute('data-role') || '';
|
|
248
|
+
const testId = el.getAttribute('data-testid') || '';
|
|
249
|
+
return className.includes('user') || role === 'user' || testId.includes('user');
|
|
250
|
+
};
|
|
251
|
+
// Helper to check if element is an assistant message
|
|
252
|
+
const isAssistantMessage = (el) => {
|
|
253
|
+
const className = el.className?.toString().toLowerCase() || '';
|
|
254
|
+
const role = el.getAttribute('data-role') || '';
|
|
255
|
+
const testId = el.getAttribute('data-testid') || '';
|
|
256
|
+
return (className.includes('assistant') ||
|
|
257
|
+
role === 'assistant' ||
|
|
258
|
+
testId.includes('assistant'));
|
|
259
|
+
};
|
|
260
|
+
// Try assistant-specific selectors first - these are most reliable
|
|
261
|
+
const assistantSelectors = [
|
|
262
|
+
'[data-testid="assistant-message"]',
|
|
263
|
+
'[data-role="assistant"]',
|
|
264
|
+
'[class*="assistant"]:not([class*="user"])',
|
|
265
|
+
];
|
|
266
|
+
for (const sel of assistantSelectors) {
|
|
267
|
+
const els = document.querySelectorAll(sel);
|
|
268
|
+
if (els.length > 0) {
|
|
269
|
+
const lastEl = els[els.length - 1];
|
|
270
|
+
const text = lastEl.textContent?.trim() || '';
|
|
271
|
+
// Accept any non-empty assistant message (removed length requirement)
|
|
272
|
+
if (text.length > 0) {
|
|
273
|
+
return { text, source: sel, isAssistant: true };
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
// Look for message containers and find messages
|
|
278
|
+
// Collect both user and assistant messages to identify the last assistant one
|
|
279
|
+
const allMessages = document.querySelectorAll('[class*="message"]');
|
|
280
|
+
const messages = [];
|
|
281
|
+
allMessages.forEach((msg) => {
|
|
282
|
+
const text = msg.textContent?.trim() || '';
|
|
283
|
+
if (text.length > 0) {
|
|
284
|
+
messages.push({
|
|
285
|
+
text,
|
|
286
|
+
isUser: isUserMessage(msg),
|
|
287
|
+
isAssistant: isAssistantMessage(msg),
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
});
|
|
291
|
+
// Find the last non-user message (assistant messages)
|
|
292
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
293
|
+
if (!messages[i].isUser && messages[i].text.length > 0) {
|
|
294
|
+
return { text: messages[i].text, source: 'last-non-user', isAssistant: true };
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
// Try markdown content (often contains the formatted response)
|
|
298
|
+
const markdown = document.querySelectorAll('.markdown, [class*="markdown"]');
|
|
299
|
+
if (markdown.length > 0) {
|
|
300
|
+
// Find markdown that's not inside a user message
|
|
301
|
+
for (let i = markdown.length - 1; i >= 0; i--) {
|
|
302
|
+
const el = markdown[i];
|
|
303
|
+
let parent = el.parentElement;
|
|
304
|
+
let inUserArea = false;
|
|
305
|
+
while (parent && parent !== document.body) {
|
|
306
|
+
if (isUserMessage(parent)) {
|
|
307
|
+
inUserArea = true;
|
|
308
|
+
break;
|
|
309
|
+
}
|
|
310
|
+
parent = parent.parentElement;
|
|
311
|
+
}
|
|
312
|
+
if (!inUserArea) {
|
|
313
|
+
const text = el.textContent?.trim() || '';
|
|
314
|
+
if (text.length > 0) {
|
|
315
|
+
return { text, source: 'markdown', isAssistant: true };
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
// Try response-specific containers
|
|
321
|
+
const responseContainers = document.querySelectorAll('[class*="response"], [class*="reply"], [class*="answer"]');
|
|
322
|
+
for (let i = responseContainers.length - 1; i >= 0; i--) {
|
|
323
|
+
const container = responseContainers[i];
|
|
324
|
+
if (!isUserMessage(container)) {
|
|
325
|
+
const text = container.textContent?.trim() || '';
|
|
326
|
+
if (text.length > 0) {
|
|
327
|
+
return { text, source: 'response-container', isAssistant: true };
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
// Fallback: look for the last div that's not in a user message area
|
|
332
|
+
// Prefer shorter texts to avoid grabbing the entire page
|
|
333
|
+
const divs = Array.from(document.querySelectorAll('div'));
|
|
334
|
+
const candidateDivs = [];
|
|
335
|
+
for (const div of divs) {
|
|
336
|
+
const text = div.textContent?.trim() || '';
|
|
337
|
+
if (text.length > 0 && text.length < 5000 && !isUserMessage(div)) {
|
|
338
|
+
// Check parent chain for user indicators
|
|
339
|
+
let parent = div.parentElement;
|
|
340
|
+
let inUserArea = false;
|
|
341
|
+
while (parent && parent !== document.body) {
|
|
342
|
+
if (isUserMessage(parent)) {
|
|
343
|
+
inUserArea = true;
|
|
344
|
+
break;
|
|
345
|
+
}
|
|
346
|
+
parent = parent.parentElement;
|
|
347
|
+
}
|
|
348
|
+
if (!inUserArea) {
|
|
349
|
+
candidateDivs.push({ text, el: div });
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
// Sort by length and prefer medium-length texts (likely actual responses)
|
|
354
|
+
// Avoid very short (labels) and very long (containers with multiple messages)
|
|
355
|
+
if (candidateDivs.length > 0) {
|
|
356
|
+
// Find divs that don't contain other message-like elements
|
|
357
|
+
const leafDivs = candidateDivs.filter((d) => d.el.querySelectorAll('[class*="message"]').length === 0);
|
|
358
|
+
if (leafDivs.length > 0) {
|
|
359
|
+
// Return the last leaf div (most recent)
|
|
360
|
+
return { text: leafDivs[leafDivs.length - 1].text, source: 'leaf-div' };
|
|
361
|
+
}
|
|
362
|
+
// Otherwise return the last candidate
|
|
363
|
+
return { text: candidateDivs[candidateDivs.length - 1].text, source: 'fallback-div' };
|
|
364
|
+
}
|
|
365
|
+
// Last resort: full body text
|
|
366
|
+
return { text: document.body?.textContent?.trim() || '', source: 'body' };
|
|
367
|
+
});
|
|
368
|
+
if (result.text && result.text.length > 0) {
|
|
369
|
+
// Clean up the response - remove Cloudflare scripts and other noise
|
|
370
|
+
let cleaned = result.text.replace(/\(function\(\)\{.*?\}\)\(\);?/gs, '').trim();
|
|
371
|
+
// Remove "You said:" prefix and everything after it if it looks like user echo
|
|
372
|
+
// This pattern matches "You said:" followed by any text
|
|
373
|
+
const youSaidMatch = cleaned.match(/^You said:([\s\S]*)/i);
|
|
374
|
+
if (youSaidMatch) {
|
|
375
|
+
// The entire response is just echoing the user - this means we got the wrong element
|
|
376
|
+
// Return empty to trigger retry or indicate no real response
|
|
377
|
+
logger_1.default.debug('[ChatKitProvider] Detected user echo, discarding', {
|
|
378
|
+
preview: cleaned.substring(0, 100),
|
|
379
|
+
});
|
|
380
|
+
// Don't return the echo - continue to retry or return empty
|
|
381
|
+
cleaned = '';
|
|
382
|
+
}
|
|
383
|
+
// Also check for "You said:" appearing anywhere in the text and remove it
|
|
384
|
+
cleaned = cleaned.replace(/You said:[\s\S]*/gi, '').trim();
|
|
385
|
+
// Don't strip JSON if it's the only response - it might be intentional
|
|
386
|
+
// Only strip if there's substantial text after the JSON
|
|
387
|
+
const jsonMatch = cleaned.match(/^(\{[^}]+\})\s+(.+)/s);
|
|
388
|
+
if (jsonMatch && jsonMatch[2].trim().length > 50) {
|
|
389
|
+
cleaned = jsonMatch[2].trim();
|
|
390
|
+
}
|
|
391
|
+
if (cleaned.length > 0) {
|
|
392
|
+
logger_1.default.debug('[ChatKitProvider] Extracted response', {
|
|
393
|
+
source: result.source,
|
|
394
|
+
length: cleaned.length,
|
|
395
|
+
preview: cleaned.substring(0, 100),
|
|
396
|
+
});
|
|
397
|
+
return cleaned;
|
|
398
|
+
}
|
|
399
|
+
// If we got here with no cleaned text but had original text,
|
|
400
|
+
// the extraction found only user content - return empty to retry
|
|
401
|
+
logger_1.default.debug('[ChatKitProvider] No assistant content found after cleaning', {
|
|
402
|
+
originalLength: result.text.length,
|
|
403
|
+
source: result.source,
|
|
404
|
+
});
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
catch (e) {
|
|
408
|
+
logger_1.default.debug('[ChatKitProvider] Could not access frame', { url, error: e, attempt });
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
// Wait before retry
|
|
413
|
+
if (attempt < maxRetries - 1) {
|
|
414
|
+
await page.waitForTimeout(RESPONSE_EXTRACT_RETRY_DELAY_MS);
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
return '';
|
|
418
|
+
}
|
|
419
|
+
/**
|
|
420
|
+
* Handle workflow approval steps by clicking approve/reject buttons.
|
|
421
|
+
* Returns true if an approval was handled, false if no approval found.
|
|
422
|
+
*/
|
|
423
|
+
async function handleApproval(page, action) {
|
|
424
|
+
const frames = page.frames();
|
|
425
|
+
for (const frame of frames) {
|
|
426
|
+
const url = frame.url();
|
|
427
|
+
if (isOpenAICdnUrl(url)) {
|
|
428
|
+
try {
|
|
429
|
+
// Look for approval buttons in the ChatKit iframe
|
|
430
|
+
const buttonText = action === 'auto-approve' ? 'Approve' : 'Reject';
|
|
431
|
+
const buttonSelectors = [
|
|
432
|
+
`button:has-text("${buttonText}")`,
|
|
433
|
+
`[role="button"]:has-text("${buttonText}")`,
|
|
434
|
+
`[data-testid="${buttonText.toLowerCase()}-button"]`,
|
|
435
|
+
];
|
|
436
|
+
for (const selector of buttonSelectors) {
|
|
437
|
+
const button = await frame.$(selector);
|
|
438
|
+
if (button) {
|
|
439
|
+
const isVisible = await button.isVisible();
|
|
440
|
+
if (isVisible) {
|
|
441
|
+
logger_1.default.debug('[ChatKitProvider] Found approval button, clicking', {
|
|
442
|
+
action,
|
|
443
|
+
selector,
|
|
444
|
+
});
|
|
445
|
+
await button.click();
|
|
446
|
+
// Wait for the approval to be processed
|
|
447
|
+
await page.waitForTimeout(APPROVAL_CLICK_DELAY_MS);
|
|
448
|
+
return true;
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
// Alternative: Look for approval UI patterns in the DOM
|
|
453
|
+
const hasApproval = await frame.evaluate((btnText) => {
|
|
454
|
+
const buttons = Array.from(document.querySelectorAll('button, [role="button"]'));
|
|
455
|
+
const approveBtn = buttons.find((b) => b.textContent?.toLowerCase().includes(btnText.toLowerCase()));
|
|
456
|
+
if (approveBtn && approveBtn instanceof HTMLElement) {
|
|
457
|
+
approveBtn.click();
|
|
458
|
+
return true;
|
|
459
|
+
}
|
|
460
|
+
return false;
|
|
461
|
+
}, buttonText);
|
|
462
|
+
if (hasApproval) {
|
|
463
|
+
logger_1.default.debug('[ChatKitProvider] Clicked approval button via evaluate', { action });
|
|
464
|
+
await page.waitForTimeout(APPROVAL_CLICK_DELAY_MS);
|
|
465
|
+
return true;
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
catch (e) {
|
|
469
|
+
logger_1.default.debug('[ChatKitProvider] Error checking for approval buttons', { error: e });
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
return false;
|
|
474
|
+
}
|
|
475
|
+
/**
|
|
476
|
+
* Process approvals until none remain or max reached.
|
|
477
|
+
* Returns the number of approvals processed.
|
|
478
|
+
*/
|
|
479
|
+
async function processApprovals(page, approvalHandling, maxApprovals, timeout) {
|
|
480
|
+
if (approvalHandling === 'skip') {
|
|
481
|
+
return 0;
|
|
482
|
+
}
|
|
483
|
+
let approvalCount = 0;
|
|
484
|
+
while (approvalCount < maxApprovals) {
|
|
485
|
+
// Small delay to let UI settle
|
|
486
|
+
await page.waitForTimeout(APPROVAL_PROCESS_DELAY_MS);
|
|
487
|
+
const handled = await handleApproval(page, approvalHandling);
|
|
488
|
+
if (!handled) {
|
|
489
|
+
break;
|
|
490
|
+
}
|
|
491
|
+
approvalCount++;
|
|
492
|
+
logger_1.default.debug('[ChatKitProvider] Processed approval', {
|
|
493
|
+
count: approvalCount,
|
|
494
|
+
max: maxApprovals,
|
|
495
|
+
});
|
|
496
|
+
// Wait for next response after approval
|
|
497
|
+
try {
|
|
498
|
+
await page.waitForFunction((prevCount) => window.__state?.responses?.length > prevCount, approvalCount, { timeout: timeout / 2 });
|
|
499
|
+
// Let DOM settle after new response
|
|
500
|
+
await page.waitForTimeout(DOM_SETTLE_DELAY_MS);
|
|
501
|
+
}
|
|
502
|
+
catch {
|
|
503
|
+
// Timeout waiting for response after approval - might be final response
|
|
504
|
+
break;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
return approvalCount;
|
|
508
|
+
}
|
|
509
|
+
class OpenAiChatKitProvider extends index_1.OpenAiGenericProvider {
|
|
510
|
+
static getDefaultUserId() {
|
|
511
|
+
if (!OpenAiChatKitProvider.defaultUserId) {
|
|
512
|
+
// Generate once per process to ensure template consistency
|
|
513
|
+
OpenAiChatKitProvider.defaultUserId = `promptfoo-eval-${Date.now()}`;
|
|
514
|
+
}
|
|
515
|
+
return OpenAiChatKitProvider.defaultUserId;
|
|
516
|
+
}
|
|
517
|
+
constructor(workflowId, options = {}) {
|
|
518
|
+
super(workflowId, options);
|
|
519
|
+
this.browser = null;
|
|
520
|
+
this.context = null;
|
|
521
|
+
this.page = null;
|
|
522
|
+
this.server = null;
|
|
523
|
+
this.serverPort = 0;
|
|
524
|
+
this.initialized = false;
|
|
525
|
+
// Default poolSize to PROMPTFOO_MAX_CONCURRENCY env var if set, otherwise DEFAULT_POOL_SIZE
|
|
526
|
+
const envPoolSize = process.env.PROMPTFOO_MAX_CONCURRENCY
|
|
527
|
+
? parseInt(process.env.PROMPTFOO_MAX_CONCURRENCY, 10)
|
|
528
|
+
: NaN;
|
|
529
|
+
const defaultPoolSize = Number.isNaN(envPoolSize) ? DEFAULT_POOL_SIZE : envPoolSize;
|
|
530
|
+
this.chatKitConfig = {
|
|
531
|
+
workflowId: options.config?.workflowId || workflowId,
|
|
532
|
+
version: options.config?.version,
|
|
533
|
+
// Use consistent default userId to ensure template stability during concurrent execution
|
|
534
|
+
userId: options.config?.userId || OpenAiChatKitProvider.getDefaultUserId(),
|
|
535
|
+
timeout: options.config?.timeout || DEFAULT_TIMEOUT_MS,
|
|
536
|
+
headless: options.config?.headless ?? true,
|
|
537
|
+
serverPort: options.config?.serverPort || 0,
|
|
538
|
+
usePool: options.config?.usePool ?? true, // Pool mode by default for better performance
|
|
539
|
+
poolSize: options.config?.poolSize ?? defaultPoolSize,
|
|
540
|
+
approvalHandling: options.config?.approvalHandling ?? 'auto-approve',
|
|
541
|
+
maxApprovals: options.config?.maxApprovals ?? DEFAULT_MAX_APPROVALS,
|
|
542
|
+
stateful: options.config?.stateful ?? false,
|
|
543
|
+
};
|
|
544
|
+
}
|
|
545
|
+
id() {
|
|
546
|
+
const version = this.chatKitConfig.version ? `:${this.chatKitConfig.version}` : '';
|
|
547
|
+
return `openai:chatkit:${this.chatKitConfig.workflowId}${version}`;
|
|
548
|
+
}
|
|
549
|
+
toString() {
|
|
550
|
+
return `[OpenAI ChatKit Provider ${this.chatKitConfig.workflowId}]`;
|
|
551
|
+
}
|
|
552
|
+
/**
|
|
553
|
+
* Initialize the browser and ChatKit page
|
|
554
|
+
*/
|
|
555
|
+
async initialize() {
|
|
556
|
+
if (this.initialized) {
|
|
557
|
+
return;
|
|
558
|
+
}
|
|
559
|
+
const apiKey = this.getApiKey();
|
|
560
|
+
if (!apiKey) {
|
|
561
|
+
throw new Error('OpenAI API key is required for ChatKit provider');
|
|
562
|
+
}
|
|
563
|
+
const workflowId = this.chatKitConfig.workflowId;
|
|
564
|
+
if (!workflowId) {
|
|
565
|
+
throw new Error('ChatKit workflowId is required');
|
|
566
|
+
}
|
|
567
|
+
logger_1.default.debug('[ChatKitProvider] Initializing', {
|
|
568
|
+
workflowId,
|
|
569
|
+
version: this.chatKitConfig.version,
|
|
570
|
+
});
|
|
571
|
+
// Create HTTP server to serve the ChatKit HTML
|
|
572
|
+
const html = generateChatKitHTML(apiKey, workflowId, this.chatKitConfig.version, this.chatKitConfig.userId);
|
|
573
|
+
this.server = http.createServer((_req, res) => {
|
|
574
|
+
res.writeHead(200, { 'Content-Type': 'text/html' });
|
|
575
|
+
res.end(html);
|
|
576
|
+
});
|
|
577
|
+
await new Promise((resolve, reject) => {
|
|
578
|
+
this.server.once('error', (err) => {
|
|
579
|
+
reject(new Error(`Failed to start ChatKit server: ${err.message}`));
|
|
580
|
+
});
|
|
581
|
+
this.server.listen(this.chatKitConfig.serverPort, () => {
|
|
582
|
+
const address = this.server.address();
|
|
583
|
+
this.serverPort = typeof address === 'object' ? address?.port || 0 : 0;
|
|
584
|
+
logger_1.default.debug('[ChatKitProvider] Server started', { port: this.serverPort });
|
|
585
|
+
resolve();
|
|
586
|
+
});
|
|
587
|
+
});
|
|
588
|
+
// Launch browser with helpful error for missing Playwright
|
|
589
|
+
try {
|
|
590
|
+
this.browser = await playwright_1.chromium.launch({
|
|
591
|
+
headless: this.chatKitConfig.headless,
|
|
592
|
+
});
|
|
593
|
+
}
|
|
594
|
+
catch (launchError) {
|
|
595
|
+
const errorMessage = launchError instanceof Error ? launchError.message : String(launchError);
|
|
596
|
+
if (errorMessage.includes("Executable doesn't exist") ||
|
|
597
|
+
errorMessage.includes('browserType.launch')) {
|
|
598
|
+
throw new Error('Playwright browser not installed. Run: npx playwright install chromium\n' +
|
|
599
|
+
`Original error: ${errorMessage}`);
|
|
600
|
+
}
|
|
601
|
+
throw launchError;
|
|
602
|
+
}
|
|
603
|
+
this.context = await this.browser.newContext({
|
|
604
|
+
viewport: { width: 800, height: 600 },
|
|
605
|
+
});
|
|
606
|
+
this.page = await this.context.newPage();
|
|
607
|
+
// Navigate to our HTML page
|
|
608
|
+
await this.page.goto(`http://localhost:${this.serverPort}`, {
|
|
609
|
+
waitUntil: 'domcontentloaded',
|
|
610
|
+
});
|
|
611
|
+
// Wait for ChatKit to be ready
|
|
612
|
+
logger_1.default.debug('[ChatKitProvider] Waiting for ChatKit ready');
|
|
613
|
+
await this.page.waitForFunction(() => window.__state?.ready === true, {
|
|
614
|
+
timeout: CHATKIT_READY_TIMEOUT_MS,
|
|
615
|
+
});
|
|
616
|
+
this.initialized = true;
|
|
617
|
+
// Register for cleanup on process exit (non-pool mode only)
|
|
618
|
+
// Pool mode has its own cleanup mechanism
|
|
619
|
+
if (!this.chatKitConfig.usePool) {
|
|
620
|
+
providerRegistry_1.providerRegistry.register(this);
|
|
621
|
+
}
|
|
622
|
+
logger_1.default.debug('[ChatKitProvider] Initialized successfully');
|
|
623
|
+
}
|
|
624
|
+
/**
|
|
625
|
+
* Shutdown method for providerRegistry cleanup
|
|
626
|
+
*/
|
|
627
|
+
async shutdown() {
|
|
628
|
+
await this.cleanup();
|
|
629
|
+
}
|
|
630
|
+
/**
|
|
631
|
+
* Clean up browser resources
|
|
632
|
+
*/
|
|
633
|
+
async cleanup() {
|
|
634
|
+
if (this.context) {
|
|
635
|
+
await this.context.close();
|
|
636
|
+
this.context = null;
|
|
637
|
+
this.page = null;
|
|
638
|
+
}
|
|
639
|
+
if (this.browser) {
|
|
640
|
+
await this.browser.close();
|
|
641
|
+
this.browser = null;
|
|
642
|
+
}
|
|
643
|
+
if (this.server) {
|
|
644
|
+
this.server.close();
|
|
645
|
+
this.server = null;
|
|
646
|
+
}
|
|
647
|
+
this.initialized = false;
|
|
648
|
+
}
|
|
649
|
+
/**
|
|
650
|
+
* Call the ChatKit workflow with the given prompt
|
|
651
|
+
*/
|
|
652
|
+
async callApi(prompt, _context, _callApiOptions) {
|
|
653
|
+
// Stateful mode requires sequential processing, so disable pool mode
|
|
654
|
+
const usePool = this.chatKitConfig.usePool && !this.chatKitConfig.stateful;
|
|
655
|
+
logger_1.default.debug('[ChatKitProvider] Starting call', {
|
|
656
|
+
prompt: prompt.substring(0, 100),
|
|
657
|
+
workflowId: this.chatKitConfig.workflowId,
|
|
658
|
+
usePool,
|
|
659
|
+
stateful: this.chatKitConfig.stateful,
|
|
660
|
+
});
|
|
661
|
+
// Use pool-based execution for better concurrency (not available in stateful mode)
|
|
662
|
+
if (usePool) {
|
|
663
|
+
return this.callApiWithPool(prompt);
|
|
664
|
+
}
|
|
665
|
+
const startTime = Date.now();
|
|
666
|
+
try {
|
|
667
|
+
await this.initialize();
|
|
668
|
+
if (!this.page) {
|
|
669
|
+
throw new Error('Browser page not initialized');
|
|
670
|
+
}
|
|
671
|
+
// For stateful mode, don't reload the page to maintain conversation state
|
|
672
|
+
// For non-stateful mode, refresh to get clean state for each evaluation
|
|
673
|
+
if (!this.chatKitConfig.stateful) {
|
|
674
|
+
await this.page.reload({ waitUntil: 'domcontentloaded' });
|
|
675
|
+
// Wait for ChatKit to be ready again after reload
|
|
676
|
+
await this.page.waitForFunction(() => window.__state?.ready === true, {
|
|
677
|
+
timeout: CHATKIT_READY_TIMEOUT_MS,
|
|
678
|
+
});
|
|
679
|
+
}
|
|
680
|
+
// For stateful mode, check if this is a follow-up message (responses already exist)
|
|
681
|
+
// Use newThread: false for follow-ups to continue the conversation
|
|
682
|
+
const responseCount = await this.page.evaluate(() => window.__state?.responses?.length || 0);
|
|
683
|
+
const isFollowUp = this.chatKitConfig.stateful && responseCount > 0;
|
|
684
|
+
logger_1.default.debug('[ChatKitProvider] Sending message', {
|
|
685
|
+
stateful: this.chatKitConfig.stateful,
|
|
686
|
+
isFollowUp,
|
|
687
|
+
responseCount,
|
|
688
|
+
});
|
|
689
|
+
// Send the message
|
|
690
|
+
await this.page.evaluate(({ text, newThread }) => {
|
|
691
|
+
return window.__chatkit.sendUserMessage({
|
|
692
|
+
text,
|
|
693
|
+
newThread,
|
|
694
|
+
});
|
|
695
|
+
}, { text: prompt, newThread: !isFollowUp });
|
|
696
|
+
// Wait for response - in stateful mode, wait for a NEW response
|
|
697
|
+
logger_1.default.debug('[ChatKitProvider] Waiting for response');
|
|
698
|
+
const expectedResponseCount = responseCount + 1;
|
|
699
|
+
await this.page.waitForFunction((expected) => window.__state?.responses?.length >= expected, expectedResponseCount, { timeout: this.chatKitConfig.timeout });
|
|
700
|
+
// Allow DOM to settle - ChatKit iframe needs time to render the response
|
|
701
|
+
await this.page.waitForTimeout(DOM_SETTLE_DELAY_MS);
|
|
702
|
+
// Handle any approval steps in the workflow
|
|
703
|
+
const approvalsHandled = await processApprovals(this.page, this.chatKitConfig.approvalHandling ?? 'auto-approve', this.chatKitConfig.maxApprovals ?? DEFAULT_MAX_APPROVALS, this.chatKitConfig.timeout ?? DEFAULT_TIMEOUT_MS);
|
|
704
|
+
if (approvalsHandled > 0) {
|
|
705
|
+
logger_1.default.debug('[ChatKitProvider] Processed approvals', { count: approvalsHandled });
|
|
706
|
+
}
|
|
707
|
+
// Extract response from iframe
|
|
708
|
+
const responseText = await extractResponseFromFrame(this.page);
|
|
709
|
+
// Get thread ID
|
|
710
|
+
const threadId = await this.page.evaluate(() => window.__state.threadId);
|
|
711
|
+
// Get final response count for turn tracking
|
|
712
|
+
const finalResponseCount = await this.page.evaluate(() => window.__state?.responses?.length || 0);
|
|
713
|
+
const latencyMs = Date.now() - startTime;
|
|
714
|
+
logger_1.default.debug('[ChatKitProvider] Response received', {
|
|
715
|
+
threadId,
|
|
716
|
+
textLength: responseText.length,
|
|
717
|
+
turnNumber: finalResponseCount,
|
|
718
|
+
latencyMs,
|
|
719
|
+
});
|
|
720
|
+
return {
|
|
721
|
+
output: responseText,
|
|
722
|
+
cached: false, // ChatKit responses are never cached (browser-based)
|
|
723
|
+
latencyMs,
|
|
724
|
+
// Use sessionId for consistency with HTTP provider's stateful handling
|
|
725
|
+
sessionId: threadId,
|
|
726
|
+
// Token usage not available from ChatKit, but track request count
|
|
727
|
+
tokenUsage: { numRequests: 1 },
|
|
728
|
+
metadata: {
|
|
729
|
+
workflowId: this.chatKitConfig.workflowId,
|
|
730
|
+
version: this.chatKitConfig.version,
|
|
731
|
+
stateful: this.chatKitConfig.stateful,
|
|
732
|
+
turnNumber: finalResponseCount,
|
|
733
|
+
},
|
|
734
|
+
};
|
|
735
|
+
}
|
|
736
|
+
catch (error) {
|
|
737
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
738
|
+
logger_1.default.error('[ChatKitProvider] Call failed', { error: errorMessage });
|
|
739
|
+
// Check for ChatKit-specific errors in page state
|
|
740
|
+
if (this.page) {
|
|
741
|
+
try {
|
|
742
|
+
const stateError = await this.page.evaluate(() => window.__state?.error);
|
|
743
|
+
if (stateError) {
|
|
744
|
+
return {
|
|
745
|
+
error: `ChatKit workflow error: ${stateError}`,
|
|
746
|
+
};
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
catch {
|
|
750
|
+
// Page may be in bad state, continue with general error
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
// Provide helpful error messages for common issues
|
|
754
|
+
if (errorMessage.includes('Timeout') || errorMessage.includes('timeout')) {
|
|
755
|
+
return {
|
|
756
|
+
error: `ChatKit response timeout after ${this.chatKitConfig.timeout}ms. ` +
|
|
757
|
+
'Try increasing timeout in config or use --max-concurrency 1 for more reliable results.',
|
|
758
|
+
};
|
|
759
|
+
}
|
|
760
|
+
if (errorMessage.includes('API key')) {
|
|
761
|
+
return {
|
|
762
|
+
error: 'OpenAI API key is required. Set OPENAI_API_KEY environment variable.',
|
|
763
|
+
};
|
|
764
|
+
}
|
|
765
|
+
if (errorMessage.includes('Playwright') || errorMessage.includes('browser')) {
|
|
766
|
+
return {
|
|
767
|
+
error: `Browser error: ${errorMessage}. Ensure Playwright is installed: npx playwright install chromium`,
|
|
768
|
+
};
|
|
769
|
+
}
|
|
770
|
+
return {
|
|
771
|
+
error: `ChatKit provider error: ${errorMessage}`,
|
|
772
|
+
};
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
/**
|
|
776
|
+
* Pool-based callApi for better concurrency support.
|
|
777
|
+
* Uses a shared browser with multiple contexts instead of separate browsers.
|
|
778
|
+
*/
|
|
779
|
+
async callApiWithPool(prompt) {
|
|
780
|
+
const apiKey = this.getApiKey();
|
|
781
|
+
if (!apiKey) {
|
|
782
|
+
return {
|
|
783
|
+
error: 'OpenAI API key is required. Set OPENAI_API_KEY environment variable.',
|
|
784
|
+
};
|
|
785
|
+
}
|
|
786
|
+
const workflowId = this.chatKitConfig.workflowId;
|
|
787
|
+
if (!workflowId) {
|
|
788
|
+
return {
|
|
789
|
+
error: 'ChatKit workflowId is required',
|
|
790
|
+
};
|
|
791
|
+
}
|
|
792
|
+
// Get or create the pool
|
|
793
|
+
const pool = chatkit_pool_1.ChatKitBrowserPool.getInstance({
|
|
794
|
+
maxConcurrency: this.chatKitConfig.poolSize,
|
|
795
|
+
headless: this.chatKitConfig.headless,
|
|
796
|
+
});
|
|
797
|
+
// Generate a unique template key for this workflow configuration
|
|
798
|
+
// This ensures different workflows get isolated pages in the pool
|
|
799
|
+
const templateKey = chatkit_pool_1.ChatKitBrowserPool.generateTemplateKey(workflowId, this.chatKitConfig.version, this.chatKitConfig.userId);
|
|
800
|
+
// Register the HTML template for this workflow
|
|
801
|
+
const html = generateChatKitHTML(apiKey, workflowId, this.chatKitConfig.version, this.chatKitConfig.userId);
|
|
802
|
+
pool.setTemplate(templateKey, html);
|
|
803
|
+
let pooledPage = null;
|
|
804
|
+
const startTime = Date.now();
|
|
805
|
+
try {
|
|
806
|
+
// Acquire a page from the pool for this specific template
|
|
807
|
+
pooledPage = await pool.acquirePage(templateKey);
|
|
808
|
+
const page = pooledPage.page;
|
|
809
|
+
logger_1.default.debug('[ChatKitProvider] Acquired page from pool', {
|
|
810
|
+
stats: pool.getStats(),
|
|
811
|
+
});
|
|
812
|
+
// Send the message
|
|
813
|
+
await page.evaluate((text) => {
|
|
814
|
+
return window.__chatkit.sendUserMessage({
|
|
815
|
+
text,
|
|
816
|
+
newThread: true,
|
|
817
|
+
});
|
|
818
|
+
}, prompt);
|
|
819
|
+
// Wait for response
|
|
820
|
+
await page.waitForFunction(() => window.__state?.responses?.length > 0, {
|
|
821
|
+
timeout: this.chatKitConfig.timeout,
|
|
822
|
+
});
|
|
823
|
+
// Allow DOM to settle
|
|
824
|
+
await page.waitForTimeout(DOM_SETTLE_DELAY_MS);
|
|
825
|
+
// Handle any approval steps in the workflow
|
|
826
|
+
const approvalsHandled = await processApprovals(page, this.chatKitConfig.approvalHandling ?? 'auto-approve', this.chatKitConfig.maxApprovals ?? DEFAULT_MAX_APPROVALS, this.chatKitConfig.timeout ?? DEFAULT_TIMEOUT_MS);
|
|
827
|
+
if (approvalsHandled > 0) {
|
|
828
|
+
logger_1.default.debug('[ChatKitProvider] Pool processed approvals', { count: approvalsHandled });
|
|
829
|
+
}
|
|
830
|
+
// Extract response from iframe
|
|
831
|
+
const responseText = await extractResponseFromFrame(page);
|
|
832
|
+
// Get thread ID
|
|
833
|
+
const threadId = await page.evaluate(() => window.__state.threadId);
|
|
834
|
+
const latencyMs = Date.now() - startTime;
|
|
835
|
+
logger_1.default.debug('[ChatKitProvider] Pool response received', {
|
|
836
|
+
threadId,
|
|
837
|
+
textLength: responseText.length,
|
|
838
|
+
latencyMs,
|
|
839
|
+
});
|
|
840
|
+
return {
|
|
841
|
+
output: responseText,
|
|
842
|
+
cached: false, // ChatKit responses are never cached (browser-based)
|
|
843
|
+
latencyMs,
|
|
844
|
+
// Use sessionId for consistency with HTTP provider's stateful handling
|
|
845
|
+
sessionId: threadId,
|
|
846
|
+
// Token usage not available from ChatKit, but track request count
|
|
847
|
+
tokenUsage: { numRequests: 1 },
|
|
848
|
+
metadata: {
|
|
849
|
+
workflowId: this.chatKitConfig.workflowId,
|
|
850
|
+
version: this.chatKitConfig.version,
|
|
851
|
+
poolMode: true,
|
|
852
|
+
},
|
|
853
|
+
};
|
|
854
|
+
}
|
|
855
|
+
catch (error) {
|
|
856
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
857
|
+
logger_1.default.error('[ChatKitProvider] Pool call failed', { error: errorMessage });
|
|
858
|
+
if (errorMessage.includes('Timeout') || errorMessage.includes('timeout')) {
|
|
859
|
+
return {
|
|
860
|
+
error: `ChatKit response timeout after ${this.chatKitConfig.timeout}ms. ` +
|
|
861
|
+
'Try increasing timeout or reducing concurrency.',
|
|
862
|
+
};
|
|
863
|
+
}
|
|
864
|
+
return {
|
|
865
|
+
error: `ChatKit provider error: ${errorMessage}`,
|
|
866
|
+
};
|
|
867
|
+
}
|
|
868
|
+
finally {
|
|
869
|
+
// Release the page back to the pool
|
|
870
|
+
if (pooledPage) {
|
|
871
|
+
await pool.releasePage(pooledPage);
|
|
872
|
+
}
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
exports.OpenAiChatKitProvider = OpenAiChatKitProvider;
|
|
877
|
+
// Static userId for consistent template keys across concurrent evaluations
|
|
878
|
+
OpenAiChatKitProvider.defaultUserId = null;
|
|
879
|
+
//# sourceMappingURL=chatkit.js.map
|