@midscene/core 1.5.5 → 1.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -152,7 +152,7 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
152
152
  return;
153
153
  }
154
154
  }
155
- const getMidsceneVersion = ()=>"1.5.5";
155
+ const getMidsceneVersion = ()=>"1.5.6";
156
156
  const parsePrompt = (prompt)=>{
157
157
  if ('string' == typeof prompt) return {
158
158
  textPrompt: prompt,
@@ -0,0 +1,584 @@
1
+ import { getDebug } from "@midscene/shared/logger";
2
+ import { ifInBrowser } from "@midscene/shared/utils";
3
+ function _define_property(obj, key, value) {
4
+ if (key in obj) Object.defineProperty(obj, key, {
5
+ value: value,
6
+ enumerable: true,
7
+ configurable: true,
8
+ writable: true
9
+ });
10
+ else obj[key] = value;
11
+ return obj;
12
+ }
13
+ const CODEX_PROVIDER_SCHEME = 'codex://';
14
+ const CODEX_DEFAULT_TIMEOUT_MS = 600000;
15
+ const CODEX_DEFAULT_PROCESS_START_TIMEOUT_MS = 15000;
16
+ const CODEX_DEFAULT_CLEANUP_TIMEOUT_MS = 8000;
17
+ const CODEX_TEXT_INPUT_MAX_LENGTH = 262144;
18
+ const debugCodex = getDebug('ai:call:codex');
19
+ const warnCodex = getDebug('ai:call:codex', {
20
+ console: true
21
+ });
22
+ class SerializedRunner {
23
+ async run(work) {
24
+ const previous = this.tail;
25
+ let release;
26
+ this.tail = new Promise((resolve)=>{
27
+ release = resolve;
28
+ });
29
+ await previous;
30
+ try {
31
+ return await work();
32
+ } finally{
33
+ release();
34
+ }
35
+ }
36
+ constructor(){
37
+ _define_property(this, "tail", Promise.resolve());
38
+ }
39
+ }
40
+ const isCodexAppServerProvider = (baseURL)=>{
41
+ if (!baseURL) return false;
42
+ return baseURL.trim().toLowerCase().startsWith(CODEX_PROVIDER_SCHEME);
43
+ };
44
+ const isAbortError = (error)=>{
45
+ if (!error) return false;
46
+ if (error instanceof Error && 'AbortError' === error.name) return true;
47
+ const message = error instanceof Error ? error.message : String(error ?? 'unknown error');
48
+ return /aborted|abort/i.test(message);
49
+ };
50
+ const toNonEmptyString = (value)=>{
51
+ if ('string' != typeof value) return;
52
+ const trimmed = value.trim();
53
+ return trimmed || void 0;
54
+ };
55
+ const normalizeCodexLocalImagePath = (imageUrl, platform = process.platform)=>{
56
+ if (!imageUrl.startsWith('file://')) return imageUrl;
57
+ try {
58
+ const parsed = new URL(imageUrl);
59
+ const pathname = decodeURIComponent(parsed.pathname);
60
+ const host = parsed.hostname.toLowerCase();
61
+ if ('win32' === platform) {
62
+ const windowsPath = pathname.replace(/\//g, '\\').replace(/^\\([A-Za-z]:)/, '$1');
63
+ if (host && 'localhost' !== host) return `\\\\${parsed.hostname}${windowsPath}`;
64
+ return windowsPath;
65
+ }
66
+ if (host && 'localhost' !== host) return `//${parsed.hostname}${pathname}`;
67
+ return pathname;
68
+ } catch {
69
+ return decodeURIComponent(imageUrl.slice(7));
70
+ }
71
+ };
72
+ const extractTextFromMessage = (message)=>{
73
+ const content = message.content;
74
+ if ('string' == typeof content) return content;
75
+ if (Array.isArray(content)) return content.map((part)=>{
76
+ if (!part || 'object' != typeof part) return '';
77
+ if ('text' === part.type && 'string' == typeof part.text) return part.text;
78
+ if ('input_text' === part.type && 'string' == typeof part.text) return part.text;
79
+ return '';
80
+ }).filter(Boolean).join('\n');
81
+ return '';
82
+ };
83
+ const extractImageInputs = (message, imageDetailOverride)=>{
84
+ const content = message.content;
85
+ if (!Array.isArray(content)) return [];
86
+ const inputs = [];
87
+ for (const part of content){
88
+ if (!part || 'object' != typeof part) continue;
89
+ const partType = String(part.type || '');
90
+ const imageUrl = 'image_url' === partType ? toNonEmptyString(part.image_url?.url) : 'input_image' === partType ? toNonEmptyString(part.image_url || part.url) : void 0;
91
+ if (!imageUrl) continue;
92
+ const detail = imageDetailOverride || toNonEmptyString(part.image_url?.detail) || toNonEmptyString(part.detail);
93
+ if (imageUrl.startsWith('/') || imageUrl.startsWith('./') || imageUrl.startsWith('../') || imageUrl.startsWith('file://')) {
94
+ const path = imageUrl.startsWith('file://') ? normalizeCodexLocalImagePath(imageUrl) : imageUrl;
95
+ inputs.push({
96
+ type: 'localImage',
97
+ path,
98
+ ...detail ? {
99
+ detail
100
+ } : {}
101
+ });
102
+ continue;
103
+ }
104
+ inputs.push({
105
+ type: 'image',
106
+ url: imageUrl,
107
+ ...detail ? {
108
+ detail
109
+ } : {}
110
+ });
111
+ }
112
+ return inputs;
113
+ };
114
+ const resolveCodexReasoningEffort = ({ deepThink, modelConfig })=>{
115
+ if (true === deepThink) return 'high';
116
+ if (false === deepThink) return 'low';
117
+ const normalized = modelConfig.reasoningEffort?.trim().toLowerCase();
118
+ if ('low' === normalized || 'medium' === normalized || 'high' === normalized || 'xhigh' === normalized) return normalized;
119
+ if (true === modelConfig.reasoningEnabled) return 'high';
120
+ if (false === modelConfig.reasoningEnabled) return 'low';
121
+ };
122
+ const buildCodexTurnPayloadFromMessages = (messages, options)=>{
123
+ const developerInstructionParts = [];
124
+ const transcriptParts = [];
125
+ const imageInputs = [];
126
+ for (const message of messages){
127
+ const role = String(message.role || 'user');
128
+ const text = extractTextFromMessage(message);
129
+ if ('system' === role) {
130
+ if (text.trim()) developerInstructionParts.push(text.trim());
131
+ continue;
132
+ }
133
+ const roleTag = role.toUpperCase();
134
+ if (text.trim()) transcriptParts.push(`[${roleTag}]\n${text.trim()}`);
135
+ else transcriptParts.push(`[${roleTag}]\n(no text content)`);
136
+ if ('user' === role) imageInputs.push(...extractImageInputs(message, options?.imageDetailOverride));
137
+ }
138
+ const fullTranscript = transcriptParts.join('\n\n');
139
+ const transcriptText = (fullTranscript.length > CODEX_TEXT_INPUT_MAX_LENGTH ? fullTranscript.slice(-CODEX_TEXT_INPUT_MAX_LENGTH) : fullTranscript) || 'Please answer the latest user request.';
140
+ const input = [
141
+ {
142
+ type: 'text',
143
+ text: transcriptText,
144
+ text_elements: []
145
+ },
146
+ ...imageInputs
147
+ ];
148
+ const developerInstructions = developerInstructionParts.length ? developerInstructionParts.join('\n\n') : void 0;
149
+ return {
150
+ developerInstructions,
151
+ input
152
+ };
153
+ };
154
+ class CodexAppServerConnection {
155
+ static async create() {
156
+ if (ifInBrowser) throw new Error('codex app-server provider is not supported in browser runtime');
157
+ const childProcessModuleName = 'node:child_process';
158
+ const readlineModuleName = 'node:readline';
159
+ const { spawn } = await import(childProcessModuleName);
160
+ const readline = await import(readlineModuleName);
161
+ const child = spawn('codex', [
162
+ 'app-server'
163
+ ], {
164
+ stdio: [
165
+ 'pipe',
166
+ 'pipe',
167
+ 'pipe'
168
+ ]
169
+ });
170
+ if (!child.stdin || !child.stdout || !child.stderr) throw new Error('failed to start codex app-server: stdio unavailable');
171
+ const lineReader = readline.createInterface({
172
+ input: child.stdout,
173
+ crlfDelay: 1 / 0
174
+ });
175
+ const connection = new CodexAppServerConnection(child, lineReader);
176
+ connection.detachFromEventLoop();
177
+ connection.attachProcessListeners();
178
+ await connection.initializeHandshake();
179
+ return connection;
180
+ }
181
+ isClosed() {
182
+ return this.closed;
183
+ }
184
+ async runTurn({ messages, modelConfig, stream, onChunk, deepThink, abortSignal }) {
185
+ const startTime = Date.now();
186
+ const timeoutMs = modelConfig.timeout || CODEX_DEFAULT_TIMEOUT_MS;
187
+ const deadlineAt = Date.now() + timeoutMs;
188
+ const isStreaming = !!(stream && onChunk);
189
+ const imageDetailOverride = 'gpt-5' === modelConfig.modelFamily ? 'original' : void 0;
190
+ const { developerInstructions, input } = buildCodexTurnPayloadFromMessages(messages, {
191
+ imageDetailOverride
192
+ });
193
+ const effort = resolveCodexReasoningEffort({
194
+ deepThink,
195
+ modelConfig
196
+ });
197
+ let threadId;
198
+ let turnId;
199
+ let latestErrorMessage;
200
+ let accumulatedText = '';
201
+ let accumulatedReasoning = '';
202
+ let latestUsage;
203
+ const emitChunk = ({ content, reasoning, isComplete, usage })=>{
204
+ if (!isStreaming || !onChunk) return;
205
+ const chunk = {
206
+ content,
207
+ reasoning_content: reasoning,
208
+ accumulated: accumulatedText,
209
+ isComplete,
210
+ usage
211
+ };
212
+ onChunk(chunk);
213
+ };
214
+ try {
215
+ const threadStartResponse = await this.request({
216
+ method: 'thread/start',
217
+ params: {
218
+ model: modelConfig.modelName,
219
+ cwd: process.cwd(),
220
+ approvalPolicy: 'never',
221
+ sandbox: 'read-only',
222
+ ephemeral: true,
223
+ experimentalRawEvents: false,
224
+ persistExtendedHistory: false,
225
+ developerInstructions: developerInstructions || null
226
+ },
227
+ deadlineAt,
228
+ abortSignal
229
+ });
230
+ threadId = threadStartResponse?.thread?.id;
231
+ if (!threadId) throw new Error('thread/start did not return a thread id');
232
+ const turnStartResponse = await this.request({
233
+ method: 'turn/start',
234
+ params: {
235
+ threadId,
236
+ input,
237
+ effort
238
+ },
239
+ deadlineAt,
240
+ abortSignal
241
+ });
242
+ turnId = turnStartResponse?.turn?.id;
243
+ if (!turnId) throw new Error('turn/start did not return a turn id');
244
+ let turnStatus;
245
+ while(!turnStatus){
246
+ const message = await this.nextMessage({
247
+ deadlineAt,
248
+ abortSignal
249
+ });
250
+ if (this.isResponseMessage(message)) continue;
251
+ if (this.isRequestMessage(message)) {
252
+ await this.respondToServerRequest(message);
253
+ continue;
254
+ }
255
+ const notification = message;
256
+ const method = notification.method;
257
+ const params = notification.params || {};
258
+ if ('error' === method) {
259
+ const messageText = params.error?.message || params.message || 'codex app-server reported turn error';
260
+ latestErrorMessage = String(messageText);
261
+ continue;
262
+ }
263
+ if ('item/agentMessage/delta' === method && params.threadId === threadId && params.turnId === turnId) {
264
+ const delta = String(params.delta || '');
265
+ if (delta) {
266
+ accumulatedText += delta;
267
+ emitChunk({
268
+ content: delta,
269
+ reasoning: '',
270
+ isComplete: false
271
+ });
272
+ }
273
+ continue;
274
+ }
275
+ if (('item/reasoning/summaryTextDelta' === method || 'item/reasoning/textDelta' === method) && params.threadId === threadId && params.turnId === turnId) {
276
+ const delta = String(params.delta || '');
277
+ if (delta) {
278
+ accumulatedReasoning += delta;
279
+ emitChunk({
280
+ content: '',
281
+ reasoning: delta,
282
+ isComplete: false
283
+ });
284
+ }
285
+ continue;
286
+ }
287
+ if ('item/completed' === method && params.threadId === threadId && params.turnId === turnId && params.item?.type === 'agentMessage' && 'string' == typeof params.item?.text && !accumulatedText) {
288
+ accumulatedText = params.item.text;
289
+ continue;
290
+ }
291
+ if ('thread/tokenUsage/updated' === method && params.threadId === threadId && params.turnId === turnId) {
292
+ latestUsage = this.mapUsage({
293
+ usage: params,
294
+ modelConfig,
295
+ turnId,
296
+ startTime
297
+ });
298
+ continue;
299
+ }
300
+ if ('turn/completed' === method && params.threadId === threadId && params.turn?.id === turnId) {
301
+ turnStatus = String(params.turn.status || '');
302
+ latestErrorMessage = params.turn?.error?.message || latestErrorMessage || void 0;
303
+ break;
304
+ }
305
+ }
306
+ if ('completed' !== turnStatus) throw new Error(latestErrorMessage || `codex turn finished with status "${turnStatus || 'unknown'}"`);
307
+ if (isStreaming) emitChunk({
308
+ content: '',
309
+ reasoning: '',
310
+ isComplete: true,
311
+ usage: latestUsage
312
+ });
313
+ return {
314
+ content: accumulatedText,
315
+ reasoning_content: accumulatedReasoning || void 0,
316
+ usage: latestUsage,
317
+ isStreamed: isStreaming
318
+ };
319
+ } catch (error) {
320
+ if (isAbortError(error) && threadId && turnId) await this.request({
321
+ method: 'turn/interrupt',
322
+ params: {
323
+ threadId,
324
+ turnId
325
+ },
326
+ deadlineAt: Date.now() + 5000
327
+ }).catch(()=>{});
328
+ throw error;
329
+ } finally{
330
+ if (threadId) await this.request({
331
+ method: 'thread/unsubscribe',
332
+ params: {
333
+ threadId
334
+ },
335
+ deadlineAt: Date.now() + CODEX_DEFAULT_CLEANUP_TIMEOUT_MS
336
+ }).catch((error)=>{
337
+ warnCodex(`failed to unsubscribe codex thread ${threadId}: ${String(error)}`);
338
+ });
339
+ }
340
+ }
341
+ async dispose() {
342
+ if (this.closed) return;
343
+ this.closed = true;
344
+ try {
345
+ this.lineReader?.close?.();
346
+ } catch {}
347
+ try {
348
+ this.child?.stdin?.end?.();
349
+ } catch {}
350
+ try {
351
+ this.child?.kill?.();
352
+ } catch {}
353
+ }
354
+ attachProcessListeners() {
355
+ this.lineReader.on('line', (line)=>{
356
+ this.lineBuffer.push(line);
357
+ });
358
+ this.child.stderr.on('data', (chunk)=>{
359
+ const text = Buffer.isBuffer(chunk) ? chunk.toString('utf8') : String(chunk);
360
+ this.stderrBuffer += text;
361
+ if (this.stderrBuffer.length > 8192) this.stderrBuffer = this.stderrBuffer.slice(-8192);
362
+ });
363
+ this.child.on('exit', (code)=>{
364
+ this.closed = true;
365
+ this.lastExitCode = code;
366
+ });
367
+ this.child.on('error', (error)=>{
368
+ this.closed = true;
369
+ this.processErrorMessage = error.message;
370
+ });
371
+ }
372
+ detachFromEventLoop() {
373
+ this.child.unref?.();
374
+ this.child.stdin?.unref?.();
375
+ this.child.stdout?.unref?.();
376
+ this.child.stderr?.unref?.();
377
+ }
378
+ async initializeHandshake() {
379
+ const deadlineAt = Date.now() + CODEX_DEFAULT_PROCESS_START_TIMEOUT_MS;
380
+ await this.request({
381
+ method: 'initialize',
382
+ params: {
383
+ clientInfo: {
384
+ name: 'midscene_codex_provider',
385
+ title: 'Midscene Codex Provider',
386
+ version: '1.0.0'
387
+ },
388
+ capabilities: {
389
+ experimentalApi: false
390
+ }
391
+ },
392
+ deadlineAt
393
+ });
394
+ await this.sendMessage({
395
+ method: 'initialized'
396
+ });
397
+ }
398
+ mapUsage({ usage, modelConfig, turnId, startTime }) {
399
+ const tokenUsage = usage.tokenUsage;
400
+ const picked = tokenUsage?.last || tokenUsage?.total;
401
+ if (!picked) return;
402
+ return {
403
+ prompt_tokens: picked.inputTokens ?? 0,
404
+ completion_tokens: picked.outputTokens ?? 0,
405
+ total_tokens: picked.totalTokens ?? 0,
406
+ cached_input: picked.cachedInputTokens ?? 0,
407
+ time_cost: Date.now() - startTime,
408
+ model_name: modelConfig.modelName,
409
+ model_description: modelConfig.modelDescription,
410
+ intent: modelConfig.intent,
411
+ request_id: turnId
412
+ };
413
+ }
414
+ isRequestMessage(message) {
415
+ return 'string' == typeof message?.method && message?.id !== void 0;
416
+ }
417
+ isResponseMessage(message) {
418
+ return message?.id !== void 0 && (message?.result !== void 0 || message?.error !== void 0) && 'string' != typeof message?.method;
419
+ }
420
+ async request({ method, params, deadlineAt, abortSignal }) {
421
+ const requestId = this.nextRequestId++;
422
+ await this.sendMessage({
423
+ id: requestId,
424
+ method,
425
+ params
426
+ });
427
+ while(true){
428
+ const message = await this.nextMessage({
429
+ deadlineAt,
430
+ abortSignal,
431
+ includePending: false
432
+ });
433
+ if (this.isResponseMessage(message) && message.id === requestId) {
434
+ if (message.error) throw new Error(`codex app-server ${method} failed: ${message.error.message || 'unknown error'}`);
435
+ return message.result || {};
436
+ }
437
+ if (this.isRequestMessage(message)) {
438
+ await this.respondToServerRequest(message);
439
+ continue;
440
+ }
441
+ this.pendingMessages.push(message);
442
+ }
443
+ }
444
+ async respondToServerRequest(request) {
445
+ const requestId = request.id;
446
+ const method = request.method;
447
+ let result = {};
448
+ if ('item/commandExecution/requestApproval' === method) result = {
449
+ decision: 'decline'
450
+ };
451
+ else if ('item/fileChange/requestApproval' === method) result = {
452
+ decision: 'decline'
453
+ };
454
+ else if ('mcpServer/elicitation/request' === method) result = {
455
+ action: 'cancel',
456
+ content: null
457
+ };
458
+ else {
459
+ if ('item/tool/requestUserInput' !== method) return void await this.sendMessage({
460
+ id: requestId,
461
+ error: {
462
+ code: -32601,
463
+ message: `unsupported server request: ${method}`
464
+ }
465
+ });
466
+ result = {
467
+ answers: []
468
+ };
469
+ }
470
+ await this.sendMessage({
471
+ id: requestId,
472
+ result
473
+ });
474
+ }
475
+ async nextMessage({ deadlineAt, abortSignal, includePending = true }) {
476
+ if (includePending && this.pendingMessages.length) return this.pendingMessages.shift();
477
+ while(true){
478
+ if (abortSignal?.aborted) throw new Error('codex app-server request aborted');
479
+ if (deadlineAt && Date.now() > deadlineAt) throw new Error('codex app-server request timed out');
480
+ if (this.lineBuffer.length) {
481
+ const line = this.lineBuffer.shift();
482
+ const trimmed = line.trim();
483
+ if (!trimmed) continue;
484
+ let parsed;
485
+ try {
486
+ parsed = JSON.parse(trimmed);
487
+ } catch (error) {
488
+ warnCodex(`ignored non-JSON message from codex app-server: ${trimmed}`);
489
+ continue;
490
+ }
491
+ return parsed;
492
+ }
493
+ if (this.closed) throw this.createClosedConnectionError();
494
+ await new Promise((resolve)=>setTimeout(resolve, 20));
495
+ }
496
+ }
497
+ async sendMessage(payload) {
498
+ if (this.closed) throw this.createClosedConnectionError();
499
+ const line = JSON.stringify(payload);
500
+ await new Promise((resolve, reject)=>{
501
+ this.child.stdin.write(`${line}\n`, (error)=>{
502
+ if (error) return void reject(new Error(`failed writing to codex app-server stdin: ${error.message}`));
503
+ resolve();
504
+ });
505
+ });
506
+ }
507
+ createClosedConnectionError() {
508
+ const stderr = this.stderrBuffer.trim();
509
+ if (this.processErrorMessage) return new Error(stderr ? `codex app-server process error: ${this.processErrorMessage}. stderr=${stderr}` : `codex app-server process error: ${this.processErrorMessage}`);
510
+ return new Error(stderr ? `codex app-server connection closed (exitCode=${this.lastExitCode}). stderr=${stderr}` : `codex app-server connection closed (exitCode=${this.lastExitCode})`);
511
+ }
512
+ constructor(child, lineReader){
513
+ _define_property(this, "child", void 0);
514
+ _define_property(this, "lineReader", void 0);
515
+ _define_property(this, "pendingMessages", []);
516
+ _define_property(this, "lineBuffer", []);
517
+ _define_property(this, "nextRequestId", 1);
518
+ _define_property(this, "closed", false);
519
+ _define_property(this, "lastExitCode", null);
520
+ _define_property(this, "processErrorMessage", null);
521
+ _define_property(this, "stderrBuffer", '');
522
+ this.child = child;
523
+ this.lineReader = lineReader;
524
+ }
525
+ }
526
+ class CodexAppServerConnectionManager {
527
+ async runTurn({ messages, modelConfig, stream, onChunk, deepThink, abortSignal }) {
528
+ return this.runner.run(async ()=>{
529
+ const connection = await this.getConnection();
530
+ try {
531
+ return await connection.runTurn({
532
+ messages,
533
+ modelConfig,
534
+ stream,
535
+ onChunk,
536
+ deepThink,
537
+ abortSignal
538
+ });
539
+ } catch (error) {
540
+ if (connection.isClosed() || !isAbortError(error)) await this.resetConnection();
541
+ throw error;
542
+ }
543
+ });
544
+ }
545
+ async shutdownForTests() {
546
+ await this.resetConnection();
547
+ }
548
+ async getConnection() {
549
+ if (!this.connection || this.connection.isClosed()) {
550
+ this.connection = await CodexAppServerConnection.create();
551
+ debugCodex('started long-lived codex app-server connection');
552
+ }
553
+ return this.connection;
554
+ }
555
+ async resetConnection() {
556
+ if (!this.connection) return;
557
+ const staleConnection = this.connection;
558
+ this.connection = null;
559
+ await staleConnection.dispose();
560
+ debugCodex('reset codex app-server connection');
561
+ }
562
+ constructor(){
563
+ _define_property(this, "connection", null);
564
+ _define_property(this, "runner", new SerializedRunner());
565
+ }
566
+ }
567
+ const codexConnectionManager = new CodexAppServerConnectionManager();
568
+ async function callAIWithCodexAppServer(messages, modelConfig, options) {
569
+ if (ifInBrowser) throw new Error('codex app-server provider is not supported in browser runtime');
570
+ return codexConnectionManager.runTurn({
571
+ messages,
572
+ modelConfig,
573
+ stream: options?.stream,
574
+ onChunk: options?.onChunk,
575
+ deepThink: options?.deepThink,
576
+ abortSignal: options?.abortSignal
577
+ });
578
+ }
579
+ async function __shutdownCodexAppServerForTests() {
580
+ await codexConnectionManager.shutdownForTests();
581
+ }
582
+ export { __shutdownCodexAppServerForTests, buildCodexTurnPayloadFromMessages, callAIWithCodexAppServer, isCodexAppServerProvider, normalizeCodexLocalImagePath, resolveCodexReasoningEffort };
583
+
584
+ //# sourceMappingURL=codex-app-server.mjs.map