@use-lattice/litmus 0.121.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/LICENSE +19 -0
  2. package/dist/src/accounts-Bt1oJb1Z.cjs +219 -0
  3. package/dist/src/accounts-DjOU8Rm3.js +178 -0
  4. package/dist/src/agentic-utils-D03IiXQc.js +153 -0
  5. package/dist/src/agentic-utils-Dh7xaMQM.cjs +180 -0
  6. package/dist/src/agents-C6BIMlZa.js +231 -0
  7. package/dist/src/agents-DvIpNX1L.cjs +666 -0
  8. package/dist/src/agents-ZP0RP9vV.cjs +231 -0
  9. package/dist/src/agents-maJXdjbR.js +665 -0
  10. package/dist/src/aimlapi-BTbQjG2E.cjs +30 -0
  11. package/dist/src/aimlapi-CwMxqfXP.js +30 -0
  12. package/dist/src/audio-BBUdvsde.cjs +97 -0
  13. package/dist/src/audio-D5DPZ7I-.js +97 -0
  14. package/dist/src/base-BEysXrkq.cjs +222 -0
  15. package/dist/src/base-C451JQfq.js +193 -0
  16. package/dist/src/blobs-BY8MDmpo.js +230 -0
  17. package/dist/src/blobs-BgcNn97m.cjs +256 -0
  18. package/dist/src/cache-BBE_lsTA.cjs +4 -0
  19. package/dist/src/cache-BkrqU5Ba.js +237 -0
  20. package/dist/src/cache-DsCxFlsZ.cjs +297 -0
  21. package/dist/src/chat-CPJWDP6a.cjs +289 -0
  22. package/dist/src/chat-CXX3xzkk.cjs +811 -0
  23. package/dist/src/chat-CcDgZFJ4.js +787 -0
  24. package/dist/src/chat-Dz5ZeGO2.js +289 -0
  25. package/dist/src/chatkit-Dw0mKkML.cjs +1158 -0
  26. package/dist/src/chatkit-swAIVuea.js +1157 -0
  27. package/dist/src/chunk-DEq-mXcV.js +15 -0
  28. package/dist/src/claude-agent-sdk-BXZJtOg6.js +379 -0
  29. package/dist/src/claude-agent-sdk-CkfyjDoG.cjs +383 -0
  30. package/dist/src/cloudflare-ai-BzpJcqUH.js +161 -0
  31. package/dist/src/cloudflare-ai-Cmy_R1y2.cjs +161 -0
  32. package/dist/src/cloudflare-gateway-B9tVQKok.cjs +272 -0
  33. package/dist/src/cloudflare-gateway-DrD3ew3H.js +272 -0
  34. package/dist/src/codex-sdk-Dezj9Nwm.js +1056 -0
  35. package/dist/src/codex-sdk-Dl9D4k5B.cjs +1060 -0
  36. package/dist/src/cometapi-C-9YvCHC.js +54 -0
  37. package/dist/src/cometapi-DHgDKoO2.cjs +54 -0
  38. package/dist/src/completion-B8Ctyxpr.js +120 -0
  39. package/dist/src/completion-Cxrt08sj.cjs +131 -0
  40. package/dist/src/createHash-BwgE13yv.cjs +27 -0
  41. package/dist/src/createHash-DmPQkvBh.js +15 -0
  42. package/dist/src/docker-BiqcTwLv.js +80 -0
  43. package/dist/src/docker-C7tEJnP-.cjs +80 -0
  44. package/dist/src/esm-C62Zofr1.cjs +409 -0
  45. package/dist/src/esm-DMVc93eh.js +379 -0
  46. package/dist/src/evalResult-C3NJPQOo.cjs +301 -0
  47. package/dist/src/evalResult-C7JJAPBb.js +295 -0
  48. package/dist/src/evalResult-DoVTZZWI.cjs +2 -0
  49. package/dist/src/extractor-DnMD3fwt.cjs +391 -0
  50. package/dist/src/extractor-DtlL28vL.js +374 -0
  51. package/dist/src/fetch-BTxakTSg.cjs +1133 -0
  52. package/dist/src/fetch-DQckpUFz.js +928 -0
  53. package/dist/src/fileExtensions-DnqA1y9x.js +85 -0
  54. package/dist/src/fileExtensions-bYh77CN8.cjs +114 -0
  55. package/dist/src/genaiTracer-CyZrmaK0.cjs +268 -0
  56. package/dist/src/genaiTracer-D3fD9dNV.js +256 -0
  57. package/dist/src/graders-BNscxFrU.js +13644 -0
  58. package/dist/src/graders-D2oE9Msq.js +2 -0
  59. package/dist/src/graders-c0Ez_w-9.cjs +2 -0
  60. package/dist/src/graders-d0F2M3e9.cjs +14056 -0
  61. package/dist/src/image-0ZhE0VlR.cjs +280 -0
  62. package/dist/src/image-CWE1pdNv.js +257 -0
  63. package/dist/src/image-D9ZK6hwL.js +163 -0
  64. package/dist/src/image-DKZgZITg.cjs +163 -0
  65. package/dist/src/index.cjs +11366 -0
  66. package/dist/src/index.d.cts +19640 -0
  67. package/dist/src/index.d.ts +19641 -0
  68. package/dist/src/index.js +11306 -0
  69. package/dist/src/invariant-Ddh24eXh.js +25 -0
  70. package/dist/src/invariant-kfQ8Bu82.cjs +30 -0
  71. package/dist/src/knowledgeBase-BgPyGFUd.cjs +122 -0
  72. package/dist/src/knowledgeBase-DyHilYaP.js +122 -0
  73. package/dist/src/litellm-CyMeneHS.js +135 -0
  74. package/dist/src/litellm-DWDF73yF.cjs +135 -0
  75. package/dist/src/logger-C40ZGil9.js +717 -0
  76. package/dist/src/logger-DyfK9PBt.cjs +917 -0
  77. package/dist/src/luma-ray-BAU9X_ep.cjs +315 -0
  78. package/dist/src/luma-ray-nwVseBbv.js +313 -0
  79. package/dist/src/messages-B5ADWTTv.js +245 -0
  80. package/dist/src/messages-BCnZfqrS.cjs +257 -0
  81. package/dist/src/meteor-DLZZ3osF.cjs +134 -0
  82. package/dist/src/meteor-DUiCJRC-.js +134 -0
  83. package/dist/src/modelslab-00cveB8L.cjs +163 -0
  84. package/dist/src/modelslab-D9sCU_L7.js +163 -0
  85. package/dist/src/nova-reel-CTapvqYH.js +276 -0
  86. package/dist/src/nova-reel-DlWuuroF.cjs +278 -0
  87. package/dist/src/nova-sonic-5UPWfeMv.cjs +363 -0
  88. package/dist/src/nova-sonic-BhSwQNym.js +363 -0
  89. package/dist/src/openai-BWrJK9d8.cjs +52 -0
  90. package/dist/src/openai-DumO8WQn.js +47 -0
  91. package/dist/src/openclaw-B8brrjC_.cjs +577 -0
  92. package/dist/src/openclaw-Bkayww9q.js +571 -0
  93. package/dist/src/opencode-sdk-7xjoDNiM.cjs +562 -0
  94. package/dist/src/opencode-sdk-SGwAPxht.js +558 -0
  95. package/dist/src/otlpReceiver-CoAHfAN9.cjs +15 -0
  96. package/dist/src/otlpReceiver-oO3EQwI9.js +14 -0
  97. package/dist/src/providerRegistry-4yjhaEM8.js +45 -0
  98. package/dist/src/providerRegistry-DhV4rJIc.cjs +50 -0
  99. package/dist/src/providers-B5RJVG-7.cjs +33609 -0
  100. package/dist/src/providers-BdmZCLzV.js +33262 -0
  101. package/dist/src/providers-CxtRxn8e.js +2 -0
  102. package/dist/src/providers-DnQLNbx1.cjs +3 -0
  103. package/dist/src/pythonUtils-BD0druiM.cjs +275 -0
  104. package/dist/src/pythonUtils-IBhn5YGR.js +249 -0
  105. package/dist/src/quiverai-BDOwZBsM.cjs +213 -0
  106. package/dist/src/quiverai-D3JTF5lD.js +213 -0
  107. package/dist/src/responses-B2LCDCXZ.js +667 -0
  108. package/dist/src/responses-BvNm4Xv9.cjs +685 -0
  109. package/dist/src/rubyUtils-B0NwnfpY.cjs +245 -0
  110. package/dist/src/rubyUtils-BroxzZ7c.cjs +2 -0
  111. package/dist/src/rubyUtils-hqVw5UvJ.js +222 -0
  112. package/dist/src/sagemaker-Cno2V-Sx.js +689 -0
  113. package/dist/src/sagemaker-fV_KUgs5.cjs +691 -0
  114. package/dist/src/server-BOuAXb06.cjs +238 -0
  115. package/dist/src/server-CtI-EWzm.cjs +2 -0
  116. package/dist/src/server-Cy3DZymt.js +189 -0
  117. package/dist/src/slack-CP8xBePa.js +135 -0
  118. package/dist/src/slack-DSQ1yXVb.cjs +135 -0
  119. package/dist/src/store-BwDDaBjb.cjs +246 -0
  120. package/dist/src/store-DcbLC593.cjs +2 -0
  121. package/dist/src/store-IGpqMIkv.js +240 -0
  122. package/dist/src/tables-3Q2cL7So.cjs +373 -0
  123. package/dist/src/tables-Bi2fjr4W.js +288 -0
  124. package/dist/src/telemetry-Bg2WqF79.js +161 -0
  125. package/dist/src/telemetry-D0x6u5kX.cjs +166 -0
  126. package/dist/src/telemetry-DXNimrI0.cjs +2 -0
  127. package/dist/src/text-B_UCRPp2.js +22 -0
  128. package/dist/src/text-CW1cyrwj.cjs +33 -0
  129. package/dist/src/tokenUsageUtils-NYT-WKS6.js +138 -0
  130. package/dist/src/tokenUsageUtils-bVa1ga6f.cjs +173 -0
  131. package/dist/src/transcription-Cl_W16Pr.js +122 -0
  132. package/dist/src/transcription-yt1EecY8.cjs +124 -0
  133. package/dist/src/transform-BCtGrl_W.cjs +228 -0
  134. package/dist/src/transform-Bv6gG2MJ.cjs +1688 -0
  135. package/dist/src/transform-CY1wbpRy.js +1507 -0
  136. package/dist/src/transform-DU8rUL9P.cjs +2 -0
  137. package/dist/src/transform-yWaShiKr.js +216 -0
  138. package/dist/src/transformersAvailability-BGkzavwb.js +35 -0
  139. package/dist/src/transformersAvailability-DKoRtQLy.cjs +35 -0
  140. package/dist/src/types-5aqHpBwE.cjs +3769 -0
  141. package/dist/src/types-Bn6D9c4U.js +3300 -0
  142. package/dist/src/util-BkKlTkI2.js +293 -0
  143. package/dist/src/util-CTh0bfOm.cjs +1119 -0
  144. package/dist/src/util-D17oBwo7.cjs +328 -0
  145. package/dist/src/util-DsS_-v4p.js +613 -0
  146. package/dist/src/util-DuntT1Ga.js +951 -0
  147. package/dist/src/util-aWjdCYMI.cjs +667 -0
  148. package/dist/src/utils-CisQwpjA.js +94 -0
  149. package/dist/src/utils-yWamDvmz.cjs +123 -0
  150. package/dist/tsconfig.tsbuildinfo +1 -0
  151. package/drizzle/0000_lush_hellion.sql +36 -0
  152. package/drizzle/0001_wide_calypso.sql +3 -0
  153. package/drizzle/0002_tidy_juggernaut.sql +1 -0
  154. package/drizzle/0003_lively_naoko.sql +8 -0
  155. package/drizzle/0004_minor_peter_quill.sql +19 -0
  156. package/drizzle/0005_silky_millenium_guard.sql +2 -0
  157. package/drizzle/0006_harsh_caretaker.sql +42 -0
  158. package/drizzle/0007_cloudy_wong.sql +1 -0
  159. package/drizzle/0008_broad_boomer.sql +2 -0
  160. package/drizzle/0009_strong_marten_broadcloak.sql +19 -0
  161. package/drizzle/0010_needy_bishop.sql +11 -0
  162. package/drizzle/0011_moaning_millenium_guard.sql +1 -0
  163. package/drizzle/0012_late_marten_broadcloak.sql +2 -0
  164. package/drizzle/0013_previous_dormammu.sql +9 -0
  165. package/drizzle/0014_lazy_captain_universe.sql +2 -0
  166. package/drizzle/0015_zippy_wallop.sql +29 -0
  167. package/drizzle/0016_jazzy_zemo.sql +2 -0
  168. package/drizzle/0017_reflective_praxagora.sql +4 -0
  169. package/drizzle/0018_fat_vanisher.sql +22 -0
  170. package/drizzle/0019_new_clint_barton.sql +8 -0
  171. package/drizzle/0020_skinny_maverick.sql +1 -0
  172. package/drizzle/0021_mysterious_madelyne_pryor.sql +13 -0
  173. package/drizzle/0022_sleepy_ultimo.sql +25 -0
  174. package/drizzle/0023_wooden_mandrill.sql +2 -0
  175. package/drizzle/AGENTS.md +68 -0
  176. package/drizzle/CLAUDE.md +1 -0
  177. package/drizzle/meta/0000_snapshot.json +221 -0
  178. package/drizzle/meta/0001_snapshot.json +214 -0
  179. package/drizzle/meta/0002_snapshot.json +221 -0
  180. package/drizzle/meta/0005_snapshot.json +369 -0
  181. package/drizzle/meta/0006_snapshot.json +638 -0
  182. package/drizzle/meta/0007_snapshot.json +640 -0
  183. package/drizzle/meta/0008_snapshot.json +649 -0
  184. package/drizzle/meta/0009_snapshot.json +554 -0
  185. package/drizzle/meta/0010_snapshot.json +619 -0
  186. package/drizzle/meta/0011_snapshot.json +627 -0
  187. package/drizzle/meta/0012_snapshot.json +639 -0
  188. package/drizzle/meta/0013_snapshot.json +717 -0
  189. package/drizzle/meta/0014_snapshot.json +717 -0
  190. package/drizzle/meta/0015_snapshot.json +897 -0
  191. package/drizzle/meta/0016_snapshot.json +1031 -0
  192. package/drizzle/meta/0018_snapshot.json +1210 -0
  193. package/drizzle/meta/0019_snapshot.json +1165 -0
  194. package/drizzle/meta/0020_snapshot.json +1232 -0
  195. package/drizzle/meta/0021_snapshot.json +1311 -0
  196. package/drizzle/meta/0022_snapshot.json +1481 -0
  197. package/drizzle/meta/0023_snapshot.json +1496 -0
  198. package/drizzle/meta/_journal.json +174 -0
  199. package/package.json +240 -0
@@ -0,0 +1,1157 @@
1
+ import { r as logger } from "./logger-C40ZGil9.js";
2
+ import { t as OpenAiGenericProvider } from "./openai-DumO8WQn.js";
3
+ import { t as providerRegistry } from "./providerRegistry-4yjhaEM8.js";
4
+ import * as http$1 from "http";
5
+ import { chromium } from "playwright";
6
+ //#region src/providers/openai/chatkit-pool.ts
7
+ /**
8
+ * ChatKit Browser Pool
9
+ *
10
+ * Manages a pool of browser contexts for concurrent ChatKit evaluations.
11
+ * This significantly reduces resource usage compared to spawning separate
12
+ * browsers for each test.
13
+ *
14
+ * Architecture:
15
+ * - Single browser process (shared across all tests)
16
+ * - Multiple browser contexts (isolated like incognito windows)
17
+ * - Shared HTTP server with per-workflow template routing
18
+ * - Pages are workflow-specific (different workflows get different pages)
19
+ */
20
+ const CHATKIT_READY_TIMEOUT_MS$1 = 6e4;
21
+ const PAGE_REFRESH_TIMEOUT_MS = 6e4;
22
+ const PAGE_ACQUIRE_TIMEOUT_MS = 12e4;
23
+ const IDLE_SHUTDOWN_DELAY_MS = 5e3;
24
+ /**
25
+ * Singleton browser pool for ChatKit evaluations.
26
+ * Supports high concurrency by reusing browser contexts.
27
+ * Each workflow gets its own isolated pages via template routing.
28
+ */
29
+ var ChatKitBrowserPool = class ChatKitBrowserPool {
30
+ static instance = null;
31
+ static cleanupRegistered = false;
32
+ browser = null;
33
+ server = null;
34
+ serverPort = 0;
35
+ pages = [];
36
+ waitQueue = [];
37
+ config;
38
+ templates = /* @__PURE__ */ new Map();
39
+ initialized = false;
40
+ initPromise = null;
41
+ idleTimer = null;
42
+ constructor(config) {
43
+ this.config = config;
44
+ }
45
+ /**
46
+ * Register process exit handlers to clean up browser resources
47
+ */
48
+ static registerCleanupHandlers() {
49
+ if (ChatKitBrowserPool.cleanupRegistered) return;
50
+ ChatKitBrowserPool.cleanupRegistered = true;
51
+ const cleanup = () => {
52
+ if (ChatKitBrowserPool.instance) {
53
+ ChatKitBrowserPool.instance.shutdown().catch(() => {});
54
+ ChatKitBrowserPool.instance = null;
55
+ }
56
+ };
57
+ process.on("beforeExit", () => {
58
+ if (ChatKitBrowserPool.instance) {
59
+ ChatKitBrowserPool.instance.shutdown().catch(() => {});
60
+ ChatKitBrowserPool.instance = null;
61
+ }
62
+ });
63
+ process.on("exit", cleanup);
64
+ }
65
+ /**
66
+ * Get the singleton pool instance
67
+ */
68
+ static getInstance(config) {
69
+ if (!ChatKitBrowserPool.instance) {
70
+ ChatKitBrowserPool.instance = new ChatKitBrowserPool({
71
+ maxConcurrency: config?.maxConcurrency ?? 4,
72
+ headless: config?.headless ?? true,
73
+ serverPort: config?.serverPort ?? 0
74
+ });
75
+ ChatKitBrowserPool.registerCleanupHandlers();
76
+ const instance = ChatKitBrowserPool.instance;
77
+ providerRegistry.register({ async shutdown() {
78
+ if (instance) {
79
+ await instance.shutdown();
80
+ ChatKitBrowserPool.instance = null;
81
+ }
82
+ } });
83
+ } else if (config) {
84
+ const existing = ChatKitBrowserPool.instance.config;
85
+ if (config.maxConcurrency !== void 0 && config.maxConcurrency !== existing.maxConcurrency || config.headless !== void 0 && config.headless !== existing.headless) logger.warn("[ChatKitPool] Pool already exists with different config, ignoring new config", {
86
+ existing: {
87
+ maxConcurrency: existing.maxConcurrency,
88
+ headless: existing.headless
89
+ },
90
+ requested: {
91
+ maxConcurrency: config.maxConcurrency,
92
+ headless: config.headless
93
+ }
94
+ });
95
+ }
96
+ return ChatKitBrowserPool.instance;
97
+ }
98
+ /**
99
+ * Reset the singleton (for testing)
100
+ */
101
+ static resetInstance() {
102
+ if (ChatKitBrowserPool.instance) {
103
+ ChatKitBrowserPool.instance.shutdown().catch((err) => {
104
+ logger.debug("[ChatKitPool] Error during shutdown:", { error: String(err) });
105
+ });
106
+ ChatKitBrowserPool.instance = null;
107
+ }
108
+ }
109
+ /**
110
+ * Generate a template key from workflow configuration.
111
+ * This ensures different workflows get isolated pages.
112
+ */
113
+ static generateTemplateKey(workflowId, version, userId) {
114
+ return `${workflowId}:${version || "default"}:${userId || "default"}`;
115
+ }
116
+ /**
117
+ * Register a template for a workflow configuration
118
+ */
119
+ setTemplate(templateKey, html) {
120
+ if (this.templates.get(templateKey) !== html) {
121
+ this.templates.set(templateKey, html);
122
+ logger.debug("[ChatKitPool] Registered template", { templateKey });
123
+ for (const page of this.pages) if (page.templateKey === templateKey) page.ready = false;
124
+ }
125
+ }
126
+ /**
127
+ * Initialize the pool - launches browser and creates server
128
+ */
129
+ async initialize() {
130
+ if (this.initialized) return;
131
+ if (this.initPromise != null) return this.initPromise;
132
+ this.initPromise = this.doInitialize();
133
+ await this.initPromise;
134
+ this.initPromise = null;
135
+ }
136
+ async doInitialize() {
137
+ logger.debug("[ChatKitPool] Initializing browser pool", { maxConcurrency: this.config.maxConcurrency });
138
+ this.server = http$1.createServer((req, res) => {
139
+ const pathParts = new URL(req.url || "/", `http://localhost`).pathname.split("/").filter(Boolean);
140
+ if (pathParts[0] === "template" && pathParts[1]) {
141
+ const templateKey = decodeURIComponent(pathParts[1]);
142
+ const template = this.templates.get(templateKey);
143
+ if (template) {
144
+ res.writeHead(200, { "Content-Type": "text/html" });
145
+ res.end(template);
146
+ return;
147
+ }
148
+ }
149
+ res.writeHead(404, { "Content-Type": "text/plain" });
150
+ res.end("Template not found");
151
+ });
152
+ await new Promise((resolve, reject) => {
153
+ this.server.once("error", (err) => {
154
+ reject(/* @__PURE__ */ new Error(`Failed to start ChatKit pool server: ${err.message}`));
155
+ });
156
+ this.server.listen(this.config.serverPort, () => {
157
+ const address = this.server.address();
158
+ this.serverPort = typeof address === "object" ? address?.port || 0 : 0;
159
+ logger.debug("[ChatKitPool] Server started", { port: this.serverPort });
160
+ resolve();
161
+ });
162
+ });
163
+ try {
164
+ this.browser = await chromium.launch({ headless: this.config.headless });
165
+ } catch (error) {
166
+ if ((error instanceof Error ? error.message : String(error)).includes("Executable doesn't exist")) throw new Error("Playwright browser not installed. Run: npx playwright install chromium");
167
+ throw error;
168
+ }
169
+ this.initialized = true;
170
+ logger.debug("[ChatKitPool] Browser pool initialized");
171
+ }
172
+ /**
173
+ * Acquire a page from the pool for a specific template.
174
+ * Only returns pages configured for the requested template.
175
+ * Blocks if all pages are in use.
176
+ */
177
+ async acquirePage(templateKey) {
178
+ this.cancelIdleTimer();
179
+ await this.initialize();
180
+ if (!this.templates.has(templateKey)) throw new Error(`Template not registered: ${templateKey}. Call setTemplate first.`);
181
+ const available = this.pages.find((p) => !p.inUse && p.ready && p.templateKey === templateKey);
182
+ if (available) {
183
+ available.inUse = true;
184
+ logger.debug("[ChatKitPool] Acquired existing page", {
185
+ templateKey,
186
+ poolSize: this.pages.length
187
+ });
188
+ return available;
189
+ }
190
+ const needsRefresh = this.pages.find((p) => !p.inUse && !p.ready && p.templateKey === templateKey);
191
+ if (needsRefresh) {
192
+ await this.refreshPooledPage(needsRefresh);
193
+ needsRefresh.inUse = true;
194
+ logger.debug("[ChatKitPool] Acquired and refreshed page", {
195
+ templateKey,
196
+ poolSize: this.pages.length
197
+ });
198
+ return needsRefresh;
199
+ }
200
+ if (this.pages.length < this.config.maxConcurrency) {
201
+ const pooledPage = await this.createPooledPage(templateKey);
202
+ pooledPage.inUse = true;
203
+ this.pages.push(pooledPage);
204
+ logger.debug("[ChatKitPool] Created new page", {
205
+ templateKey,
206
+ poolSize: this.pages.length
207
+ });
208
+ return pooledPage;
209
+ }
210
+ logger.debug("[ChatKitPool] Waiting for available page", {
211
+ templateKey,
212
+ poolSize: this.pages.length,
213
+ waiting: this.waitQueue.length + 1
214
+ });
215
+ return new Promise((resolve, reject) => {
216
+ const timeoutId = setTimeout(() => {
217
+ const index = this.waitQueue.findIndex((w) => w.resolve === wrappedResolve);
218
+ if (index >= 0) this.waitQueue.splice(index, 1);
219
+ reject(/* @__PURE__ */ new Error(`Timeout waiting for available page after ${PAGE_ACQUIRE_TIMEOUT_MS}ms. Pool has ${this.pages.length} pages, ${this.pages.filter((p) => p.inUse).length} in use.`));
220
+ }, PAGE_ACQUIRE_TIMEOUT_MS);
221
+ const wrappedResolve = (page) => {
222
+ clearTimeout(timeoutId);
223
+ resolve(page);
224
+ };
225
+ this.waitQueue.push({
226
+ templateKey,
227
+ resolve: wrappedResolve
228
+ });
229
+ });
230
+ }
231
+ /**
232
+ * Release a page back to the pool
233
+ */
234
+ async releasePage(pooledPage) {
235
+ const originalTemplateKey = pooledPage.templateKey;
236
+ try {
237
+ await this.refreshPooledPage(pooledPage);
238
+ } catch (error) {
239
+ logger.warn("[ChatKitPool] Failed to reset page, recreating", { error });
240
+ const index = this.pages.indexOf(pooledPage);
241
+ if (index >= 0) this.pages.splice(index, 1);
242
+ try {
243
+ await pooledPage.context.close();
244
+ } catch {}
245
+ try {
246
+ const newPage = await this.createPooledPage(originalTemplateKey);
247
+ this.pages.push(newPage);
248
+ pooledPage = newPage;
249
+ } catch (createError) {
250
+ logger.warn("[ChatKitPool] Failed to create replacement page", { error: createError });
251
+ await this.tryServeWaiters();
252
+ this.scheduleIdleShutdown();
253
+ return;
254
+ }
255
+ }
256
+ const waiterIndex = this.waitQueue.findIndex((w) => w.templateKey === pooledPage.templateKey);
257
+ if (waiterIndex >= 0) {
258
+ const waiter = this.waitQueue.splice(waiterIndex, 1)[0];
259
+ pooledPage.inUse = true;
260
+ waiter.resolve(pooledPage);
261
+ this.cancelIdleTimer();
262
+ } else {
263
+ pooledPage.inUse = false;
264
+ await this.tryServeWaiters();
265
+ this.scheduleIdleShutdown();
266
+ }
267
+ }
268
+ /**
269
+ * Try to serve waiting requests by creating new pages if we have capacity
270
+ */
271
+ async tryServeWaiters() {
272
+ while (this.waitQueue.length > 0 && this.pages.length < this.config.maxConcurrency) {
273
+ const waiter = this.waitQueue.shift();
274
+ if (!waiter) break;
275
+ try {
276
+ const newPage = await this.createPooledPage(waiter.templateKey);
277
+ newPage.inUse = true;
278
+ this.pages.push(newPage);
279
+ waiter.resolve(newPage);
280
+ logger.debug("[ChatKitPool] Created page for waiting request", {
281
+ templateKey: waiter.templateKey,
282
+ poolSize: this.pages.length,
283
+ remainingWaiters: this.waitQueue.length
284
+ });
285
+ } catch (error) {
286
+ logger.warn("[ChatKitPool] Failed to create page for waiter", {
287
+ templateKey: waiter.templateKey,
288
+ error
289
+ });
290
+ this.waitQueue.unshift(waiter);
291
+ break;
292
+ }
293
+ }
294
+ }
295
+ /**
296
+ * Schedule automatic shutdown if pool remains idle
297
+ */
298
+ scheduleIdleShutdown() {
299
+ this.cancelIdleTimer();
300
+ if (this.pages.filter((p) => p.inUse).length === 0 && this.waitQueue.length === 0 && this.pages.length > 0) {
301
+ logger.debug("[ChatKitPool] Pool idle, scheduling shutdown", { delay: IDLE_SHUTDOWN_DELAY_MS });
302
+ this.idleTimer = setTimeout(() => {
303
+ if (this.pages.filter((p) => p.inUse).length === 0 && this.waitQueue.length === 0) {
304
+ logger.debug("[ChatKitPool] Auto-shutting down idle pool");
305
+ this.shutdown().catch((err) => {
306
+ logger.debug("[ChatKitPool] Error during idle shutdown", { error: String(err) });
307
+ });
308
+ ChatKitBrowserPool.instance = null;
309
+ }
310
+ }, IDLE_SHUTDOWN_DELAY_MS);
311
+ if (this.idleTimer.unref) this.idleTimer.unref();
312
+ }
313
+ }
314
+ /**
315
+ * Cancel scheduled idle shutdown
316
+ */
317
+ cancelIdleTimer() {
318
+ if (this.idleTimer) {
319
+ clearTimeout(this.idleTimer);
320
+ this.idleTimer = null;
321
+ }
322
+ }
323
+ /**
324
+ * Create a new pooled page with ChatKit initialized for a specific template
325
+ */
326
+ async createPooledPage(templateKey) {
327
+ if (!this.browser) throw new Error("Browser not initialized");
328
+ const context = await this.browser.newContext({ viewport: {
329
+ width: 800,
330
+ height: 600
331
+ } });
332
+ context.setDefaultTimeout(12e4);
333
+ try {
334
+ const page = await context.newPage();
335
+ const templateUrl = `http://localhost:${this.serverPort}/template/${encodeURIComponent(templateKey)}`;
336
+ await page.goto(templateUrl, { waitUntil: "domcontentloaded" });
337
+ await page.waitForFunction(() => window.__state?.ready === true, { timeout: CHATKIT_READY_TIMEOUT_MS$1 });
338
+ return {
339
+ context,
340
+ page,
341
+ ready: true,
342
+ inUse: false,
343
+ templateKey
344
+ };
345
+ } catch (error) {
346
+ try {
347
+ await context.close();
348
+ } catch {}
349
+ throw error;
350
+ }
351
+ }
352
+ async refreshPooledPage(pooledPage) {
353
+ logger.debug("[ChatKitPool] Refreshing page", { timeout: PAGE_REFRESH_TIMEOUT_MS });
354
+ await pooledPage.page.reload({ waitUntil: "domcontentloaded" });
355
+ await pooledPage.page.waitForFunction(() => window.__state?.ready === true, { timeout: PAGE_REFRESH_TIMEOUT_MS });
356
+ pooledPage.ready = true;
357
+ }
358
+ /**
359
+ * Get pool statistics
360
+ */
361
+ getStats() {
362
+ return {
363
+ total: this.pages.length,
364
+ inUse: this.pages.filter((p) => p.inUse).length,
365
+ waiting: this.waitQueue.length,
366
+ templates: this.templates.size
367
+ };
368
+ }
369
+ /**
370
+ * Shutdown the pool and release all resources
371
+ */
372
+ async shutdown() {
373
+ logger.debug("[ChatKitPool] Shutting down");
374
+ this.cancelIdleTimer();
375
+ if (this.waitQueue.length > 0) {
376
+ logger.debug("[ChatKitPool] Clearing pending waiters", { count: this.waitQueue.length });
377
+ this.waitQueue = [];
378
+ }
379
+ for (const pooledPage of this.pages) try {
380
+ await pooledPage.context.close();
381
+ } catch {}
382
+ this.pages = [];
383
+ if (this.browser) {
384
+ try {
385
+ await this.browser.close();
386
+ } catch {}
387
+ this.browser = null;
388
+ }
389
+ if (this.server) {
390
+ this.server.close();
391
+ this.server = null;
392
+ }
393
+ this.initialized = false;
394
+ this.templates.clear();
395
+ logger.debug("[ChatKitPool] Shutdown complete");
396
+ }
397
+ };
398
+ //#endregion
399
+ //#region src/providers/openai/chatkit.ts
400
+ /**
401
+ * OpenAI ChatKit Provider
402
+ *
403
+ * Evaluates ChatKit workflows deployed via Agent Builder using Playwright
404
+ * to interact with the ChatKit web component.
405
+ *
406
+ * ChatKit workflows created in OpenAI's Agent Builder don't expose a direct
407
+ * REST API for sending messages. Instead, they require interaction through
408
+ * the ChatKit web component, which this provider automates using Playwright.
409
+ *
410
+ * Prerequisites:
411
+ * - Playwright installed: npm install playwright && npx playwright install chromium
412
+ * - OPENAI_API_KEY environment variable set
413
+ *
414
+ * Usage:
415
+ * providers:
416
+ * - id: openai:chatkit:wf_68ffb83dbfc88190a38103c2bb9f421003f913035dbdb131
417
+ * config:
418
+ * version: '3' # Optional: workflow version
419
+ * timeout: 120000 # Optional: response timeout in ms (default: 120000)
420
+ * headless: true # Optional: run browser headless (default: true)
421
+ *
422
+ * Performance Notes:
423
+ * - Each evaluation spawns a browser instance, so it's slower than REST APIs
424
+ * - For reliable results, use --max-concurrency 1 to avoid resource contention
425
+ * - First test may be slower due to browser launch and ChatKit initialization
426
+ *
427
+ * Troubleshooting:
428
+ * - "Playwright not found": Run `npx playwright install chromium`
429
+ * - Timeout errors: Increase timeout config or use --max-concurrency 1
430
+ * - Empty responses: The workflow may not generate text for some inputs
431
+ */
432
+ const DEFAULT_TIMEOUT_MS = 12e4;
433
+ const DEFAULT_MAX_APPROVALS = 5;
434
+ const DEFAULT_POOL_SIZE = 4;
435
+ const CHATKIT_READY_TIMEOUT_MS = 6e4;
436
+ const DOM_SETTLE_DELAY_MS = 2e3;
437
+ const APPROVAL_PROCESS_DELAY_MS = 500;
438
+ const APPROVAL_CLICK_DELAY_MS = 1e3;
439
+ const RESPONSE_EXTRACT_RETRY_DELAY_MS = 500;
440
+ const CONTENT_STABILIZATION_MS = 1e4;
441
+ const CONTENT_POLL_MS = 500;
442
+ const MIN_WORKFLOW_WAIT_MS = 6e4;
443
+ const SHORT_RESPONSE_THRESHOLD = 100;
444
+ /**
445
+ * Check if a URL is from OpenAI's CDN by parsing the hostname.
446
+ * This is more secure than substring matching which could be bypassed.
447
+ */
448
+ function isOpenAICdnUrl(url) {
449
+ try {
450
+ return new URL(url).hostname === "cdn.platform.openai.com";
451
+ } catch {
452
+ return false;
453
+ }
454
+ }
455
+ /**
456
+ * Validate workflowId format to prevent script injection
457
+ */
458
+ function validateWorkflowId(workflowId) {
459
+ if (!workflowId || !/^wf_[a-zA-Z0-9]+$/.test(workflowId)) throw new Error(`Invalid workflowId format: ${workflowId}. Expected format: wf_<alphanumeric>`);
460
+ }
461
+ /**
462
+ * Validate version format to prevent script injection
463
+ */
464
+ function validateVersion(version) {
465
+ if (!/^[a-zA-Z0-9._-]+$/.test(version)) throw new Error(`Invalid version format: ${version}. Only alphanumeric, dot, dash, and underscore allowed.`);
466
+ }
467
+ /**
468
+ * Validate userId format to prevent script injection
469
+ */
470
+ function validateUserId(userId) {
471
+ if (!/^[a-zA-Z0-9._@-]+$/.test(userId)) throw new Error(`Invalid userId format: ${userId}. Only alphanumeric, dot, dash, underscore, and @ allowed.`);
472
+ }
473
+ /**
474
+ * Clean up assistant response text by removing noise and artifacts.
475
+ * This includes Cloudflare scripts, approval UI text, user echo, and JSON classification prefixes.
476
+ */
477
+ function cleanAssistantResponse(text) {
478
+ if (!text) return "";
479
+ let cleaned = text.replace(/\(function\(\)\{.*?\}\)\(\);?/gs, "").trim();
480
+ cleaned = cleaned.replace(/\n?Approval required\n?Does this work for you\?\n?Approve\n?Reject$/gi, "").replace(/\n?Approval required[\s\n]+Does this work for you\?[\s\n]+Approve[\s\n]+Reject$/gi, "").trim();
481
+ if (/^You said:/i.test(cleaned)) cleaned = "";
482
+ else cleaned = cleaned.replace(/You said:[\s\S]*/gi, "").trim();
483
+ const jsonMatch = cleaned.match(/^(\{[^}]+\})\s+(.+)/s);
484
+ if (jsonMatch && jsonMatch[2].trim().length > 50) cleaned = jsonMatch[2].trim();
485
+ return cleaned;
486
+ }
487
+ /**
488
+ * Generate the HTML page that hosts the ChatKit component
489
+ */
490
+ function generateChatKitHTML(apiKey, workflowId, version, userId) {
491
+ validateWorkflowId(workflowId);
492
+ if (version) validateVersion(version);
493
+ if (!userId) throw new Error("userId is required for ChatKit HTML generation");
494
+ validateUserId(userId);
495
+ return `<!DOCTYPE html>
496
+ <html>
497
+ <head>
498
+ <meta charset="utf-8">
499
+ <title>ChatKit Eval</title>
500
+ </head>
501
+ <body>
502
+ <openai-chatkit id="chatkit"></openai-chatkit>
503
+
504
+ <script src="https://cdn.platform.openai.com/deployments/chatkit/chatkit.js"><\/script>
505
+
506
+ <script>
507
+ window.__state = { ready: false, responses: [], threadId: null, error: null, responding: false };
508
+
509
+ async function init() {
510
+ const chatkit = document.getElementById('chatkit');
511
+
512
+ // Wait for element to be ready
513
+ let attempts = 0;
514
+ while (typeof chatkit.setOptions !== 'function' && attempts < 100) {
515
+ await new Promise(r => setTimeout(r, 100));
516
+ attempts++;
517
+ }
518
+
519
+ if (typeof chatkit.setOptions !== 'function') {
520
+ window.__state.error = 'ChatKit component failed to initialize';
521
+ return;
522
+ }
523
+
524
+ let cachedSecret = null;
525
+
526
+ chatkit.setOptions({
527
+ api: {
528
+ getClientSecret: async (existing) => {
529
+ if (existing) return existing;
530
+ if (cachedSecret) return cachedSecret;
531
+
532
+ const res = await fetch('https://api.openai.com/v1/chatkit/sessions', {
533
+ method: 'POST',
534
+ headers: {
535
+ 'Authorization': 'Bearer ${apiKey}',
536
+ 'Content-Type': 'application/json',
537
+ 'OpenAI-Beta': 'chatkit_beta=v1'
538
+ },
539
+ body: JSON.stringify({
540
+ workflow: { id: '${workflowId}'${version ? `, version: '${version}'` : ""} },
541
+ user: '${userId}'
542
+ })
543
+ });
544
+
545
+ if (!res.ok) {
546
+ const text = await res.text();
547
+ throw new Error('Session failed: ' + res.status + ' ' + text);
548
+ }
549
+
550
+ const data = await res.json();
551
+ cachedSecret = data.client_secret;
552
+ return cachedSecret;
553
+ }
554
+ },
555
+ header: { enabled: false },
556
+ history: { enabled: false },
557
+ });
558
+
559
+ chatkit.addEventListener('chatkit.ready', () => {
560
+ window.__state.ready = true;
561
+ });
562
+
563
+ chatkit.addEventListener('chatkit.error', (e) => {
564
+ window.__state.error = e.detail.error?.message || 'Unknown error';
565
+ });
566
+
567
+ chatkit.addEventListener('chatkit.thread.change', (e) => {
568
+ window.__state.threadId = e.detail.threadId;
569
+ });
570
+
571
+ chatkit.addEventListener('chatkit.response.start', () => {
572
+ window.__state.responding = true;
573
+ });
574
+
575
+ chatkit.addEventListener('chatkit.response.end', () => {
576
+ window.__state.responding = false;
577
+ window.__state.responses.push({ timestamp: Date.now() });
578
+ });
579
+
580
+ window.__chatkit = chatkit;
581
+ }
582
+
583
+ init().catch(e => {
584
+ window.__state.error = e.message;
585
+ });
586
+ <\/script>
587
+ </body>
588
+ </html>`;
589
+ }
590
+ /**
591
+ * Extract assistant response text from the ChatKit iframe
592
+ * Uses retry logic since DOM may still be updating after response.end event
593
+ */
594
+ async function extractResponseFromFrame(page, maxRetries = 3) {
595
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
596
+ const frames = page.frames();
597
+ for (const frame of frames) {
598
+ const url = frame.url();
599
+ if (isOpenAICdnUrl(url)) try {
600
+ const result = await frame.evaluate(() => {
601
+ const isUserMessage = (el) => {
602
+ const className = el.className?.toString().toLowerCase() || "";
603
+ const role = el.getAttribute("data-role") || "";
604
+ const testId = el.getAttribute("data-testid") || "";
605
+ return className.includes("user") || role === "user" || testId.includes("user");
606
+ };
607
+ const isAssistantMessage = (el) => {
608
+ const className = el.className?.toString().toLowerCase() || "";
609
+ const role = el.getAttribute("data-role") || "";
610
+ const testId = el.getAttribute("data-testid") || "";
611
+ return className.includes("assistant") || role === "assistant" || testId.includes("assistant");
612
+ };
613
+ for (const sel of [
614
+ "[data-thread-item=\"assistant-message\"]",
615
+ "[data-testid=\"assistant-message\"]",
616
+ "[data-role=\"assistant\"]",
617
+ "[class*=\"assistant\"]:not([class*=\"user\"])"
618
+ ]) {
619
+ const els = document.querySelectorAll(sel);
620
+ if (els.length > 0) {
621
+ const text = els[els.length - 1].textContent?.trim() || "";
622
+ if (text.length > 0) return {
623
+ text,
624
+ source: sel,
625
+ isAssistant: true
626
+ };
627
+ }
628
+ }
629
+ const allMessages = document.querySelectorAll("[class*=\"message\"]");
630
+ const messages = [];
631
+ allMessages.forEach((msg) => {
632
+ const text = msg.textContent?.trim() || "";
633
+ if (text.length > 0) messages.push({
634
+ text,
635
+ isUser: isUserMessage(msg),
636
+ isAssistant: isAssistantMessage(msg)
637
+ });
638
+ });
639
+ for (let i = messages.length - 1; i >= 0; i--) if (!messages[i].isUser && messages[i].text.length > 0) return {
640
+ text: messages[i].text,
641
+ source: "last-non-user",
642
+ isAssistant: true
643
+ };
644
+ const markdown = document.querySelectorAll(".markdown, [class*=\"markdown\"]");
645
+ if (markdown.length > 0) for (let i = markdown.length - 1; i >= 0; i--) {
646
+ const el = markdown[i];
647
+ let parent = el.parentElement;
648
+ let inUserArea = false;
649
+ while (parent && parent !== document.body) {
650
+ if (isUserMessage(parent)) {
651
+ inUserArea = true;
652
+ break;
653
+ }
654
+ parent = parent.parentElement;
655
+ }
656
+ if (!inUserArea) {
657
+ const text = el.textContent?.trim() || "";
658
+ if (text.length > 0) return {
659
+ text,
660
+ source: "markdown",
661
+ isAssistant: true
662
+ };
663
+ }
664
+ }
665
+ const responseContainers = document.querySelectorAll("[class*=\"response\"], [class*=\"reply\"], [class*=\"answer\"]");
666
+ for (let i = responseContainers.length - 1; i >= 0; i--) {
667
+ const container = responseContainers[i];
668
+ if (!isUserMessage(container)) {
669
+ const text = container.textContent?.trim() || "";
670
+ if (text.length > 0) return {
671
+ text,
672
+ source: "response-container",
673
+ isAssistant: true
674
+ };
675
+ }
676
+ }
677
+ const divs = Array.from(document.querySelectorAll("div"));
678
+ const candidateDivs = [];
679
+ for (const div of divs) {
680
+ const text = div.textContent?.trim() || "";
681
+ if (text.length > 0 && text.length < 5e3 && !isUserMessage(div)) {
682
+ let parent = div.parentElement;
683
+ let inUserArea = false;
684
+ while (parent && parent !== document.body) {
685
+ if (isUserMessage(parent)) {
686
+ inUserArea = true;
687
+ break;
688
+ }
689
+ parent = parent.parentElement;
690
+ }
691
+ if (!inUserArea) candidateDivs.push({
692
+ text,
693
+ el: div
694
+ });
695
+ }
696
+ }
697
+ if (candidateDivs.length > 0) {
698
+ const leafDivs = candidateDivs.filter((d) => d.el.querySelectorAll("[class*=\"message\"]").length === 0);
699
+ if (leafDivs.length > 0) return {
700
+ text: leafDivs[leafDivs.length - 1].text,
701
+ source: "leaf-div"
702
+ };
703
+ return {
704
+ text: candidateDivs[candidateDivs.length - 1].text,
705
+ source: "fallback-div"
706
+ };
707
+ }
708
+ return {
709
+ text: document.body?.textContent?.trim() || "",
710
+ source: "body"
711
+ };
712
+ });
713
+ if (result.text && result.text.length > 0) {
714
+ const trimmed = result.text.trim();
715
+ if (trimmed === "ApproveReject" || trimmed === "Approve" || trimmed === "Reject") {
716
+ logger.debug("[ChatKitProvider] Skipping approval button text", { text: trimmed });
717
+ continue;
718
+ }
719
+ const cleaned = cleanAssistantResponse(result.text);
720
+ if (cleaned.length > 0) {
721
+ logger.debug("[ChatKitProvider] Extracted response", {
722
+ source: result.source,
723
+ length: cleaned.length,
724
+ preview: cleaned.substring(0, 100)
725
+ });
726
+ return cleaned;
727
+ }
728
+ logger.debug("[ChatKitProvider] No assistant content found after cleaning", {
729
+ originalLength: result.text.length,
730
+ source: result.source
731
+ });
732
+ }
733
+ } catch (e) {
734
+ logger.debug("[ChatKitProvider] Could not access frame", {
735
+ url,
736
+ error: e,
737
+ attempt
738
+ });
739
+ }
740
+ }
741
+ if (attempt < maxRetries - 1) await page.waitForTimeout(RESPONSE_EXTRACT_RETRY_DELAY_MS);
742
+ }
743
+ return "";
744
+ }
745
+ /**
746
+ * Get the current visible text content from the ChatKit iframe.
747
+ * Returns the text content or null if iframe not accessible.
748
+ */
749
+ async function getIframeContent(page) {
750
+ const frames = page.frames();
751
+ logger.debug("[ChatKitProvider] Checking frames", {
752
+ frameCount: frames.length,
753
+ frameUrls: frames.map((f) => f.url())
754
+ });
755
+ for (const frame of frames) if (isOpenAICdnUrl(frame.url())) try {
756
+ return await frame.evaluate(() => {
757
+ return document.body?.innerText || "";
758
+ });
759
+ } catch {}
760
+ return null;
761
+ }
762
+ async function waitForContentStabilization(page, timeout, startTime) {
763
+ let lastContent = "";
764
+ let lastChangeTime = Date.now();
765
+ const pollStartTime = Date.now();
766
+ let capturedAssistantResponse = "";
767
+ logger.debug("[ChatKitProvider] Starting content stabilization polling");
768
+ while (Date.now() - pollStartTime < timeout) {
769
+ const state = await page.evaluate(() => window.__state);
770
+ const pollElapsed = Date.now() - pollStartTime;
771
+ if (pollElapsed % 5e3 < CONTENT_POLL_MS) logger.debug("[ChatKitProvider] Polling state", {
772
+ pollElapsedMs: pollElapsed,
773
+ responding: state.responding,
774
+ responseCount: state.responses?.length,
775
+ error: state.error,
776
+ threadId: state.threadId
777
+ });
778
+ const currentContent = await getIframeContent(page) || "";
779
+ if (currentContent !== lastContent) {
780
+ logger.debug("[ChatKitProvider] Content changed", {
781
+ previousLength: lastContent.length,
782
+ newLength: currentContent.length,
783
+ preview: currentContent.substring(Math.max(0, currentContent.length - 200))
784
+ });
785
+ lastContent = currentContent;
786
+ lastChangeTime = Date.now();
787
+ }
788
+ const timeSinceStart = Date.now() - startTime;
789
+ const timeSinceLastChange = Date.now() - lastChangeTime;
790
+ const assistantMatch = currentContent.match(/The assistant said:\s*\n*([\s\S]*)/i);
791
+ const assistantResponse = assistantMatch ? assistantMatch[1].trim() : currentContent;
792
+ if (!assistantMatch && currentContent.length > 0) logger.debug("[ChatKitProvider] Assistant pattern not found, using full content for length check", { contentLength: currentContent.length });
793
+ capturedAssistantResponse = assistantResponse;
794
+ const isShortResponse = assistantResponse.length < SHORT_RESPONSE_THRESHOLD;
795
+ const effectiveStabilizationMs = isShortResponse ? CONTENT_STABILIZATION_MS * 2 : CONTENT_STABILIZATION_MS;
796
+ const effectiveMinWaitMs = isShortResponse ? MIN_WORKFLOW_WAIT_MS * 2 : MIN_WORKFLOW_WAIT_MS;
797
+ if (!state.responding && timeSinceLastChange >= effectiveStabilizationMs && timeSinceStart >= effectiveMinWaitMs) {
798
+ logger.debug("[ChatKitProvider] Content stabilized", {
799
+ timeSinceStart,
800
+ timeSinceLastChange,
801
+ contentLength: currentContent.length,
802
+ assistantResponseLength: assistantResponse.length,
803
+ isShortResponse,
804
+ responseCount: state.responses?.length
805
+ });
806
+ return {
807
+ assistantResponse: capturedAssistantResponse,
808
+ fullContent: currentContent
809
+ };
810
+ }
811
+ await page.waitForTimeout(CONTENT_POLL_MS);
812
+ }
813
+ logger.debug("[ChatKitProvider] Content stabilization timeout reached");
814
+ return {
815
+ assistantResponse: capturedAssistantResponse,
816
+ fullContent: lastContent
817
+ };
818
+ }
819
+ /**
820
+ * Handle workflow approval steps by clicking approve/reject buttons.
821
+ * Returns true if an approval was handled, false if no approval found.
822
+ */
823
+ async function handleApproval(page, action) {
824
+ const frames = page.frames();
825
+ for (const frame of frames) if (isOpenAICdnUrl(frame.url())) try {
826
+ const buttonText = action === "auto-approve" ? "Approve" : "Reject";
827
+ const buttonSelectors = [
828
+ `button:has-text("${buttonText}")`,
829
+ `[role="button"]:has-text("${buttonText}")`,
830
+ `[data-testid="${buttonText.toLowerCase()}-button"]`
831
+ ];
832
+ for (const selector of buttonSelectors) {
833
+ const button = await frame.$(selector);
834
+ if (button) {
835
+ if (await button.isVisible()) {
836
+ logger.debug("[ChatKitProvider] Found approval button, clicking", {
837
+ action,
838
+ selector
839
+ });
840
+ await button.click();
841
+ await page.waitForTimeout(APPROVAL_CLICK_DELAY_MS);
842
+ return true;
843
+ }
844
+ }
845
+ }
846
+ if (await frame.evaluate((btnText) => {
847
+ const approveBtn = Array.from(document.querySelectorAll("button, [role=\"button\"]")).find((b) => b.textContent?.toLowerCase().includes(btnText.toLowerCase()));
848
+ if (approveBtn && approveBtn instanceof HTMLElement) {
849
+ approveBtn.click();
850
+ return true;
851
+ }
852
+ return false;
853
+ }, buttonText)) {
854
+ logger.debug("[ChatKitProvider] Clicked approval button via evaluate", { action });
855
+ await page.waitForTimeout(APPROVAL_CLICK_DELAY_MS);
856
+ return true;
857
+ }
858
+ } catch (e) {
859
+ logger.debug("[ChatKitProvider] Error checking for approval buttons", { error: e });
860
+ }
861
+ return false;
862
+ }
863
+ /**
864
+ * Process approvals until none remain or max reached.
865
+ * Returns the number of approvals processed.
866
+ */
867
+ async function processApprovals(page, approvalHandling, maxApprovals, timeout) {
868
+ if (approvalHandling === "skip") return 0;
869
+ let approvalCount = 0;
870
+ while (approvalCount < maxApprovals) {
871
+ await page.waitForTimeout(APPROVAL_PROCESS_DELAY_MS);
872
+ if (!await handleApproval(page, approvalHandling)) break;
873
+ approvalCount++;
874
+ logger.debug("[ChatKitProvider] Processed approval", {
875
+ count: approvalCount,
876
+ max: maxApprovals
877
+ });
878
+ try {
879
+ await page.waitForFunction((prevCount) => window.__state?.responses?.length > prevCount, approvalCount, { timeout: timeout / 2 });
880
+ await page.waitForTimeout(DOM_SETTLE_DELAY_MS);
881
+ } catch {
882
+ break;
883
+ }
884
+ }
885
+ return approvalCount;
886
+ }
887
+ var OpenAiChatKitProvider = class OpenAiChatKitProvider extends OpenAiGenericProvider {
888
+ chatKitConfig;
889
+ browser = null;
890
+ context = null;
891
+ page = null;
892
+ server = null;
893
+ serverPort = 0;
894
+ initialized = false;
895
+ static defaultUserId = null;
896
+ static getDefaultUserId() {
897
+ if (!OpenAiChatKitProvider.defaultUserId) OpenAiChatKitProvider.defaultUserId = `promptfoo-eval-${Date.now()}`;
898
+ return OpenAiChatKitProvider.defaultUserId;
899
+ }
900
+ constructor(workflowId, options = {}) {
901
+ super(workflowId, options);
902
+ const envPoolSize = process.env.PROMPTFOO_MAX_CONCURRENCY ? parseInt(process.env.PROMPTFOO_MAX_CONCURRENCY, 10) : NaN;
903
+ const defaultPoolSize = Number.isNaN(envPoolSize) ? DEFAULT_POOL_SIZE : envPoolSize;
904
+ this.chatKitConfig = {
905
+ workflowId: options.config?.workflowId || workflowId,
906
+ version: options.config?.version,
907
+ userId: options.config?.userId || OpenAiChatKitProvider.getDefaultUserId(),
908
+ timeout: options.config?.timeout || DEFAULT_TIMEOUT_MS,
909
+ headless: options.config?.headless ?? true,
910
+ serverPort: options.config?.serverPort || 0,
911
+ usePool: options.config?.usePool ?? true,
912
+ poolSize: options.config?.poolSize ?? defaultPoolSize,
913
+ approvalHandling: options.config?.approvalHandling ?? "auto-approve",
914
+ maxApprovals: options.config?.maxApprovals ?? DEFAULT_MAX_APPROVALS,
915
+ stateful: options.config?.stateful ?? false
916
+ };
917
+ }
918
+ id() {
919
+ const version = this.chatKitConfig.version ? `:${this.chatKitConfig.version}` : "";
920
+ return `openai:chatkit:${this.chatKitConfig.workflowId}${version}`;
921
+ }
922
+ toString() {
923
+ return `[OpenAI ChatKit Provider ${this.chatKitConfig.workflowId}]`;
924
+ }
925
+ /**
926
+ * Initialize the browser and ChatKit page
927
+ */
928
+ async initialize() {
929
+ if (this.initialized) return;
930
+ const apiKey = this.getApiKey();
931
+ if (!apiKey) throw new Error("OpenAI API key is required for ChatKit provider");
932
+ const workflowId = this.chatKitConfig.workflowId;
933
+ if (!workflowId) throw new Error("ChatKit workflowId is required");
934
+ logger.debug("[ChatKitProvider] Initializing", {
935
+ workflowId,
936
+ version: this.chatKitConfig.version
937
+ });
938
+ const html = generateChatKitHTML(apiKey, workflowId, this.chatKitConfig.version, this.chatKitConfig.userId);
939
+ this.server = http$1.createServer((_req, res) => {
940
+ res.writeHead(200, { "Content-Type": "text/html" });
941
+ res.end(html);
942
+ });
943
+ await new Promise((resolve, reject) => {
944
+ this.server.once("error", (err) => {
945
+ reject(/* @__PURE__ */ new Error(`Failed to start ChatKit server: ${err.message}`));
946
+ });
947
+ this.server.listen(this.chatKitConfig.serverPort, () => {
948
+ const address = this.server.address();
949
+ this.serverPort = typeof address === "object" ? address?.port || 0 : 0;
950
+ logger.debug("[ChatKitProvider] Server started", { port: this.serverPort });
951
+ resolve();
952
+ });
953
+ });
954
+ try {
955
+ this.browser = await chromium.launch({ headless: this.chatKitConfig.headless });
956
+ } catch (launchError) {
957
+ const errorMessage = launchError instanceof Error ? launchError.message : String(launchError);
958
+ if (errorMessage.includes("Executable doesn't exist") || errorMessage.includes("browserType.launch")) throw new Error(`Playwright browser not installed. Run: npx playwright install chromium
959
+ Original error: ${errorMessage}`);
960
+ throw launchError;
961
+ }
962
+ this.context = await this.browser.newContext({ viewport: {
963
+ width: 800,
964
+ height: 600
965
+ } });
966
+ this.page = await this.context.newPage();
967
+ this.page.on("console", (msg) => {
968
+ const type = msg.type();
969
+ if (type === "error" || type === "warning") logger.debug("[ChatKitProvider] Browser console", {
970
+ type,
971
+ text: msg.text()
972
+ });
973
+ });
974
+ await this.page.goto(`http://localhost:${this.serverPort}`, { waitUntil: "domcontentloaded" });
975
+ logger.debug("[ChatKitProvider] Waiting for ChatKit ready");
976
+ await this.page.waitForFunction(() => window.__state?.ready === true, { timeout: CHATKIT_READY_TIMEOUT_MS });
977
+ this.initialized = true;
978
+ if (!this.chatKitConfig.usePool) providerRegistry.register(this);
979
+ logger.debug("[ChatKitProvider] Initialized successfully");
980
+ }
981
+ /**
982
+ * Shutdown method for providerRegistry cleanup
983
+ */
984
+ async shutdown() {
985
+ await this.cleanup();
986
+ }
987
+ /**
988
+ * Clean up browser resources
989
+ */
990
+ async cleanup() {
991
+ if (this.context) {
992
+ await this.context.close();
993
+ this.context = null;
994
+ this.page = null;
995
+ }
996
+ if (this.browser) {
997
+ await this.browser.close();
998
+ this.browser = null;
999
+ }
1000
+ if (this.server) {
1001
+ this.server.close();
1002
+ this.server = null;
1003
+ }
1004
+ this.initialized = false;
1005
+ }
1006
+ /**
1007
+ * Call the ChatKit workflow with the given prompt
1008
+ */
1009
+ async callApi(prompt, _context, _callApiOptions) {
1010
+ const usePool = this.chatKitConfig.usePool && !this.chatKitConfig.stateful;
1011
+ logger.debug("[ChatKitProvider] Starting call", {
1012
+ prompt: prompt.substring(0, 100),
1013
+ workflowId: this.chatKitConfig.workflowId,
1014
+ usePool,
1015
+ stateful: this.chatKitConfig.stateful
1016
+ });
1017
+ if (usePool) return this.callApiWithPool(prompt);
1018
+ const startTime = Date.now();
1019
+ try {
1020
+ await this.initialize();
1021
+ if (!this.page) throw new Error("Browser page not initialized");
1022
+ if (!this.chatKitConfig.stateful) {
1023
+ await this.page.reload({ waitUntil: "domcontentloaded" });
1024
+ await this.page.waitForFunction(() => window.__state?.ready === true, { timeout: CHATKIT_READY_TIMEOUT_MS });
1025
+ }
1026
+ const responseCount = await this.page.evaluate(() => window.__state?.responses?.length || 0);
1027
+ const isFollowUp = this.chatKitConfig.stateful && responseCount > 0;
1028
+ logger.debug("[ChatKitProvider] Sending message", {
1029
+ stateful: this.chatKitConfig.stateful,
1030
+ isFollowUp,
1031
+ responseCount
1032
+ });
1033
+ await this.page.evaluate(({ text, newThread }) => {
1034
+ return window.__chatkit.sendUserMessage({
1035
+ text,
1036
+ newThread
1037
+ });
1038
+ }, {
1039
+ text: prompt,
1040
+ newThread: !isFollowUp
1041
+ });
1042
+ logger.debug("[ChatKitProvider] Waiting for response");
1043
+ const expectedResponseCount = responseCount + 1;
1044
+ await this.page.waitForFunction((expected) => window.__state?.responses?.length >= expected, expectedResponseCount, { timeout: this.chatKitConfig.timeout });
1045
+ const stabilizationResult = await waitForContentStabilization(this.page, this.chatKitConfig.timeout ?? DEFAULT_TIMEOUT_MS, startTime);
1046
+ const approvalsHandled = await processApprovals(this.page, this.chatKitConfig.approvalHandling ?? "auto-approve", this.chatKitConfig.maxApprovals ?? DEFAULT_MAX_APPROVALS, this.chatKitConfig.timeout ?? DEFAULT_TIMEOUT_MS);
1047
+ if (approvalsHandled > 0) logger.debug("[ChatKitProvider] Processed approvals", { count: approvalsHandled });
1048
+ let responseText = await extractResponseFromFrame(this.page);
1049
+ if (!responseText && stabilizationResult.assistantResponse) {
1050
+ logger.debug("[ChatKitProvider] Using fallback content from stabilization", { fallbackLength: stabilizationResult.assistantResponse.length });
1051
+ responseText = cleanAssistantResponse(stabilizationResult.assistantResponse);
1052
+ }
1053
+ const threadId = await this.page.evaluate(() => window.__state.threadId);
1054
+ const finalResponseCount = await this.page.evaluate(() => window.__state?.responses?.length || 0);
1055
+ const latencyMs = Date.now() - startTime;
1056
+ logger.debug("[ChatKitProvider] Response received", {
1057
+ threadId,
1058
+ textLength: responseText.length,
1059
+ turnNumber: finalResponseCount,
1060
+ latencyMs
1061
+ });
1062
+ return {
1063
+ output: responseText,
1064
+ cached: false,
1065
+ latencyMs,
1066
+ sessionId: threadId,
1067
+ tokenUsage: { numRequests: 1 },
1068
+ metadata: {
1069
+ workflowId: this.chatKitConfig.workflowId,
1070
+ version: this.chatKitConfig.version,
1071
+ stateful: this.chatKitConfig.stateful,
1072
+ turnNumber: finalResponseCount
1073
+ }
1074
+ };
1075
+ } catch (error) {
1076
+ const errorMessage = error instanceof Error ? error.message : String(error);
1077
+ logger.error("[ChatKitProvider] Call failed", { error: errorMessage });
1078
+ if (this.page) try {
1079
+ const stateError = await this.page.evaluate(() => window.__state?.error);
1080
+ if (stateError) return { error: `ChatKit workflow error: ${stateError}` };
1081
+ } catch {}
1082
+ if (errorMessage.includes("Timeout") || errorMessage.includes("timeout")) return { error: `ChatKit response timeout after ${this.chatKitConfig.timeout}ms. Try increasing timeout in config or use --max-concurrency 1 for more reliable results.` };
1083
+ if (errorMessage.includes("API key")) return { error: "OpenAI API key is required. Set OPENAI_API_KEY environment variable." };
1084
+ if (errorMessage.includes("Playwright") || errorMessage.includes("browser")) return { error: `Browser error: ${errorMessage}. Ensure Playwright is installed: npx playwright install chromium` };
1085
+ return { error: `ChatKit provider error: ${errorMessage}` };
1086
+ }
1087
+ }
1088
+ /**
1089
+ * Pool-based callApi for better concurrency support.
1090
+ * Uses a shared browser with multiple contexts instead of separate browsers.
1091
+ */
1092
+ async callApiWithPool(prompt) {
1093
+ const apiKey = this.getApiKey();
1094
+ if (!apiKey) return { error: "OpenAI API key is required. Set OPENAI_API_KEY environment variable." };
1095
+ const workflowId = this.chatKitConfig.workflowId;
1096
+ if (!workflowId) return { error: "ChatKit workflowId is required" };
1097
+ const pool = ChatKitBrowserPool.getInstance({
1098
+ maxConcurrency: this.chatKitConfig.poolSize,
1099
+ headless: this.chatKitConfig.headless
1100
+ });
1101
+ const templateKey = ChatKitBrowserPool.generateTemplateKey(workflowId, this.chatKitConfig.version, this.chatKitConfig.userId);
1102
+ const html = generateChatKitHTML(apiKey, workflowId, this.chatKitConfig.version, this.chatKitConfig.userId);
1103
+ pool.setTemplate(templateKey, html);
1104
+ let pooledPage = null;
1105
+ const startTime = Date.now();
1106
+ try {
1107
+ pooledPage = await pool.acquirePage(templateKey);
1108
+ const page = pooledPage.page;
1109
+ logger.debug("[ChatKitProvider] Acquired page from pool", { stats: pool.getStats() });
1110
+ await page.evaluate((text) => {
1111
+ return window.__chatkit.sendUserMessage({
1112
+ text,
1113
+ newThread: true
1114
+ });
1115
+ }, prompt);
1116
+ await page.waitForFunction(() => window.__state?.responses?.length > 0, { timeout: this.chatKitConfig.timeout });
1117
+ const stabilizationResult = await waitForContentStabilization(page, this.chatKitConfig.timeout ?? DEFAULT_TIMEOUT_MS, startTime);
1118
+ const approvalsHandled = await processApprovals(page, this.chatKitConfig.approvalHandling ?? "auto-approve", this.chatKitConfig.maxApprovals ?? DEFAULT_MAX_APPROVALS, this.chatKitConfig.timeout ?? DEFAULT_TIMEOUT_MS);
1119
+ if (approvalsHandled > 0) logger.debug("[ChatKitProvider] Pool processed approvals", { count: approvalsHandled });
1120
+ let responseText = await extractResponseFromFrame(page);
1121
+ if (!responseText && stabilizationResult.assistantResponse) {
1122
+ logger.debug("[ChatKitProvider] Pool using fallback content from stabilization", { fallbackLength: stabilizationResult.assistantResponse.length });
1123
+ responseText = cleanAssistantResponse(stabilizationResult.assistantResponse);
1124
+ }
1125
+ const threadId = await page.evaluate(() => window.__state.threadId);
1126
+ const latencyMs = Date.now() - startTime;
1127
+ logger.debug("[ChatKitProvider] Pool response received", {
1128
+ threadId,
1129
+ textLength: responseText.length,
1130
+ latencyMs
1131
+ });
1132
+ return {
1133
+ output: responseText,
1134
+ cached: false,
1135
+ latencyMs,
1136
+ sessionId: threadId,
1137
+ tokenUsage: { numRequests: 1 },
1138
+ metadata: {
1139
+ workflowId: this.chatKitConfig.workflowId,
1140
+ version: this.chatKitConfig.version,
1141
+ poolMode: true
1142
+ }
1143
+ };
1144
+ } catch (error) {
1145
+ const errorMessage = error instanceof Error ? error.message : String(error);
1146
+ logger.error("[ChatKitProvider] Pool call failed", { error: errorMessage });
1147
+ if (errorMessage.includes("Timeout") || errorMessage.includes("timeout")) return { error: `ChatKit response timeout after ${this.chatKitConfig.timeout}ms. Try increasing timeout or reducing concurrency.` };
1148
+ return { error: `ChatKit provider error: ${errorMessage}` };
1149
+ } finally {
1150
+ if (pooledPage) await pool.releasePage(pooledPage);
1151
+ }
1152
+ }
1153
+ };
1154
+ //#endregion
1155
+ export { OpenAiChatKitProvider };
1156
+
1157
+ //# sourceMappingURL=chatkit-swAIVuea.js.map