@testdriverai/mcp 7.9.103-canary → 7.9.104-test

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,8 +12,9 @@ process.env.TD_DEBUG = "true";
12
12
  import { registerAppResource, registerAppTool, RESOURCE_MIME_TYPE } from "@modelcontextprotocol/ext-apps/server";
13
13
  import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
14
14
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
15
+ import type { RequestHandlerExtra } from "@modelcontextprotocol/sdk/shared/protocol.js";
15
16
  import type { Variables } from "@modelcontextprotocol/sdk/shared/uriTemplate.js";
16
- import type { CallToolResult, ReadResourceResult } from "@modelcontextprotocol/sdk/types.js";
17
+ import type { CallToolResult, ReadResourceResult, ServerNotification, ServerRequest } from "@modelcontextprotocol/sdk/types.js";
17
18
  import * as Sentry from "@sentry/node";
18
19
  import * as fs from "fs";
19
20
  import * as os from "os";
@@ -333,11 +334,147 @@ function requireActiveSession(): { valid: true } | { valid: false; error: CallTo
333
334
  return { valid: true };
334
335
  }
335
336
 
337
+ // =============================================================================
338
+ // Progress reporting (MCP `notifications/progress`)
339
+ // =============================================================================
340
+
341
+ /** The `extra` argument every tool callback receives from the MCP SDK. */
342
+ type ToolExtra = RequestHandlerExtra<ServerRequest, ServerNotification>;
343
+
344
+ /**
345
+ * Reports progress for a long-running tool back to the client.
346
+ *
347
+ * Per the MCP spec, progress is only sent when the caller included a
348
+ * `progressToken` in the request's `_meta`. When no token is present this is a
349
+ * no-op, so callers can report unconditionally without branching.
350
+ *
351
+ * Each `report()` call increments an internal counter (indeterminate progress —
352
+ * we rarely know a real total ahead of time) and forwards a human-readable
353
+ * `message`. A `heartbeat()` helper keeps the client's idle timeout alive while
354
+ * a single long SDK call is in flight (e.g. provisioning a sandbox, polling a
355
+ * find), which is the situation that was tripping the `session_start` timeout.
356
+ */
357
+ interface ProgressReporter {
358
+ /** Emit one progress step with an optional human-readable message. */
359
+ report(message?: string): void;
360
+ /**
361
+ * Emit a progress tick every `intervalMs` until the returned stop function is
362
+ * called. Use this around a single long `await` so the client keeps receiving
363
+ * activity. Always pair with the returned `stop()` in a `finally`.
364
+ */
365
+ heartbeat(message: string, intervalMs?: number): () => void;
366
+ }
367
+
368
+ const DEFAULT_HEARTBEAT_MS = 3000;
369
+
370
+ function makeProgressReporter(extra: ToolExtra): ProgressReporter {
371
+ const progressToken = extra?._meta?.progressToken;
372
+
373
+ // No token → client did not opt into progress. Return a no-op reporter.
374
+ if (progressToken === undefined || progressToken === null) {
375
+ return {
376
+ report: () => {},
377
+ heartbeat: () => () => {},
378
+ };
379
+ }
380
+
381
+ let progress = 0;
382
+
383
+ const send = (message?: string) => {
384
+ progress += 1;
385
+ // Fire-and-forget: a failed notification must never break the tool call.
386
+ void extra
387
+ .sendNotification({
388
+ method: "notifications/progress",
389
+ params: { progressToken, progress, message },
390
+ })
391
+ .catch((err) => {
392
+ logger.debug("progress: sendNotification failed", { error: String(err) });
393
+ });
394
+ };
395
+
396
+ return {
397
+ report: (message) => send(message),
398
+ heartbeat: (message, intervalMs = DEFAULT_HEARTBEAT_MS) => {
399
+ send(message);
400
+ const timer = setInterval(() => send(message), intervalMs);
401
+ // Don't let the heartbeat keep the event loop alive on its own.
402
+ timer.unref?.();
403
+ return () => clearInterval(timer);
404
+ },
405
+ };
406
+ }
407
+
408
+ // =============================================================================
409
+ // Cancellation (MCP `notifications/cancelled` → `extra.signal`)
410
+ // =============================================================================
411
+
412
+ /** Thrown when a tool call is aborted by the client. */
413
+ class ToolAbortError extends Error {
414
+ constructor(tool: string) {
415
+ super(`${tool} was cancelled by the client`);
416
+ this.name = "ToolAbortError";
417
+ }
418
+ }
419
+
420
+ /** Reject as soon as `signal` aborts. Used to race against long SDK calls. */
421
+ function rejectOnAbort(signal: AbortSignal | undefined, tool: string): { promise: Promise<never>; cleanup: () => void } {
422
+ if (!signal) {
423
+ // Never-resolving promise with a no-op cleanup — Promise.race ignores it.
424
+ return { promise: new Promise<never>(() => {}), cleanup: () => {} };
425
+ }
426
+ let onAbort: () => void = () => {};
427
+ const promise = new Promise<never>((_, reject) => {
428
+ onAbort = () => reject(new ToolAbortError(tool));
429
+ if (signal.aborted) {
430
+ onAbort();
431
+ } else {
432
+ signal.addEventListener("abort", onAbort, { once: true });
433
+ }
434
+ });
435
+ return { promise, cleanup: () => signal.removeEventListener("abort", onAbort) };
436
+ }
437
+
438
+ /**
439
+ * Run a long-running SDK call, but settle as soon as the client aborts.
440
+ *
441
+ * The wrapped SDK methods are not themselves signal-aware, so on abort the
442
+ * underlying work keeps running to completion in the background — but the tool
443
+ * call returns promptly with a `ToolAbortError` instead of blocking the client.
444
+ * Callers that hold cleanable resources (e.g. `session_start`) should catch
445
+ * `ToolAbortError` and tear them down.
446
+ */
447
+ async function raceAbort<T>(signal: AbortSignal | undefined, tool: string, work: Promise<T>): Promise<T> {
448
+ if (signal?.aborted) {
449
+ throw new ToolAbortError(tool);
450
+ }
451
+ const { promise, cleanup } = rejectOnAbort(signal, tool);
452
+ try {
453
+ return await Promise.race([work, promise]);
454
+ } finally {
455
+ cleanup();
456
+ }
457
+ }
458
+
459
+ /**
460
+ * If `error` is a client cancellation, return a "cancelled" tool result so the
461
+ * caller can `return` it; otherwise return null so normal error handling (log +
462
+ * Sentry + rethrow) proceeds. Keeps abort out of error reporting — a user
463
+ * cancelling is not a failure.
464
+ */
465
+ function cancelledResultOrNull(error: unknown, tool: string): CallToolResult | null {
466
+ if (error instanceof ToolAbortError) {
467
+ logger.info(`${tool}: Cancelled by client`);
468
+ return createToolResult(false, `${tool} was cancelled.`, { action: tool, cancelled: true });
469
+ }
470
+ return null;
471
+ }
472
+
336
473
  /**
337
474
  * Create tool result with structured content for MCP App
338
475
  * Images: imageUrl (data URL) goes to structuredContent for UI to display
339
476
  * The croppedImage from find() is small (~10KB) so it's acceptable as data URL
340
- *
477
+ *
341
478
  * If generatedCode is provided, it's appended to the text response with instructions
342
479
  * for the agent to write it to the test file.
343
480
  */
@@ -524,8 +661,9 @@ Debug mode (connect to existing sandbox):
524
661
  inputSchema: SessionStartInputSchema as any,
525
662
  _meta: { ui: { resourceUri: RESOURCE_URI, expanded: true } },
526
663
  },
527
- async (params: SessionStartInput): Promise<CallToolResult> => {
664
+ async (params: SessionStartInput, extra: ToolExtra): Promise<CallToolResult> => {
528
665
  const startTime = Date.now();
666
+ const progress = makeProgressReporter(extra);
529
667
 
530
668
  // Resolve OS with priority: explicit param > TD_OS env var > "linux" default
531
669
  // This mirrors the behavior of the Vitest hooks (hooks.mjs) which also reads TD_OS
@@ -621,10 +759,15 @@ Debug mode (connect to existing sandbox):
621
759
  // Handle sandboxId mode - connect to existing sandbox (debug-on-failure mode)
622
760
  if (params.sandboxId) {
623
761
  logger.info("session_start: Connecting to existing sandbox (debug mode)", { sandboxId: params.sandboxId });
624
- await sdk.connect({
625
- sandboxId: params.sandboxId,
626
- keepAlive: params.keepAlive,
627
- });
762
+ const stopHeartbeat = progress.heartbeat(`Connecting to existing sandbox ${params.sandboxId}...`);
763
+ try {
764
+ await raceAbort(extra.signal, "session_start", sdk.connect({
765
+ sandboxId: params.sandboxId,
766
+ keepAlive: params.keepAlive,
767
+ }));
768
+ } finally {
769
+ stopHeartbeat();
770
+ }
628
771
 
629
772
  // Get sandbox ID
630
773
  const instance = sdk.getInstance();
@@ -636,7 +779,8 @@ Debug mode (connect to existing sandbox):
636
779
 
637
780
  // Capture screenshot of current state
638
781
  logger.debug("session_start: Capturing screenshot of existing sandbox");
639
- const screenshotBase64 = await sdk.agent.system.captureScreenBase64(1, false, true);
782
+ progress.report("Capturing screenshot...");
783
+ const screenshotBase64 = await raceAbort(extra.signal, "session_start", sdk.agent.system.captureScreenBase64(1, false, true)) as string;
640
784
 
641
785
  let screenshotResourceUri: string | undefined;
642
786
  if (screenshotBase64) {
@@ -673,11 +817,20 @@ You are now connected to the sandbox in its current state. Use find, click, type
673
817
  } else {
674
818
  logger.info("session_start: Connecting to cloud sandbox...");
675
819
  }
676
- await sdk.connect({
677
- reconnect: params.reconnect,
678
- keepAlive: params.keepAlive,
679
- ip: instanceIp,
680
- });
820
+ {
821
+ const stopHeartbeat = progress.heartbeat(
822
+ instanceIp ? `Connecting to self-hosted instance ${instanceIp}...` : "Connecting to cloud sandbox..."
823
+ );
824
+ try {
825
+ await raceAbort(extra.signal, "session_start", sdk.connect({
826
+ reconnect: params.reconnect,
827
+ keepAlive: params.keepAlive,
828
+ ip: instanceIp,
829
+ }));
830
+ } finally {
831
+ stopHeartbeat();
832
+ }
833
+ }
681
834
 
682
835
  // Get sandbox ID
683
836
  const instance = sdk.getInstance();
@@ -691,12 +844,16 @@ You are now connected to the sandbox in its current state. Use find, click, type
691
844
  const provisionOptions = getProvisionOptions(params);
692
845
  let provisionCmd = "";
693
846
 
847
+ // Provisioning can take tens of seconds (downloading installers, booting
848
+ // apps). Heartbeat so the client's idle timeout keeps resetting.
849
+ const stopProvisionHeartbeat = progress.heartbeat(`Provisioning ${params.type}...`);
850
+ try {
694
851
  // Provision based on type
695
852
  switch (params.type) {
696
853
  case "chrome": {
697
854
  const chromeOpts = provisionOptions as { url: string; maximized?: boolean; guest?: boolean };
698
855
  logger.info("session_start: Provisioning Chrome", { url: chromeOpts.url });
699
- await sdk.provision.chrome(chromeOpts);
856
+ await raceAbort(extra.signal, "session_start", sdk.provision.chrome(chromeOpts));
700
857
  provisionCmd = "provision.chrome";
701
858
  logger.debug("session_start: Chrome provisioned");
702
859
  break;
@@ -705,7 +862,7 @@ You are now connected to the sandbox in its current state. Use find, click, type
705
862
  case "chromeExtension": {
706
863
  const extOpts = provisionOptions as { extensionPath?: string; extensionId?: string; maximized?: boolean };
707
864
  logger.info("session_start: Provisioning Chrome Extension", { extensionPath: extOpts.extensionPath, extensionId: extOpts.extensionId });
708
- await sdk.provision.chromeExtension(extOpts);
865
+ await raceAbort(extra.signal, "session_start", sdk.provision.chromeExtension(extOpts));
709
866
  provisionCmd = "provision.chromeExtension";
710
867
  logger.debug("session_start: Chrome Extension provisioned");
711
868
  break;
@@ -714,7 +871,7 @@ You are now connected to the sandbox in its current state. Use find, click, type
714
871
  case "vscode": {
715
872
  const vscodeOpts = provisionOptions as { workspace?: string; extensions?: string[] };
716
873
  logger.info("session_start: Provisioning VS Code", { workspace: vscodeOpts.workspace });
717
- await sdk.provision.vscode(vscodeOpts);
874
+ await raceAbort(extra.signal, "session_start", sdk.provision.vscode(vscodeOpts));
718
875
  provisionCmd = "provision.vscode";
719
876
  logger.debug("session_start: VS Code provisioned");
720
877
  break;
@@ -723,7 +880,7 @@ You are now connected to the sandbox in its current state. Use find, click, type
723
880
  case "installer": {
724
881
  const installerOpts = provisionOptions as { url: string; filename?: string; appName?: string; launch?: boolean };
725
882
  logger.info("session_start: Provisioning installer", { url: installerOpts.url });
726
- await sdk.provision.installer(installerOpts);
883
+ await raceAbort(extra.signal, "session_start", sdk.provision.installer(installerOpts));
727
884
  provisionCmd = "provision.installer";
728
885
  logger.debug("session_start: Installer provisioned");
729
886
  break;
@@ -732,16 +889,20 @@ You are now connected to the sandbox in its current state. Use find, click, type
732
889
  case "electron": {
733
890
  const electronOpts = provisionOptions as { appPath: string; args?: string[] };
734
891
  logger.info("session_start: Provisioning Electron", { appPath: electronOpts.appPath });
735
- await sdk.provision.electron(electronOpts);
892
+ await raceAbort(extra.signal, "session_start", sdk.provision.electron(electronOpts));
736
893
  provisionCmd = "provision.electron";
737
894
  logger.debug("session_start: Electron app provisioned");
738
895
  break;
739
896
  }
740
897
  }
898
+ } finally {
899
+ stopProvisionHeartbeat();
900
+ }
741
901
 
742
902
  // Capture initial screenshot after provisioning
743
903
  logger.debug("session_start: Capturing initial screenshot");
744
- const screenshotBase64 = await sdk.agent.system.captureScreenBase64(1, false, true);
904
+ progress.report("Capturing screenshot...");
905
+ const screenshotBase64 = await raceAbort(extra.signal, "session_start", sdk.agent.system.captureScreenBase64(1, false, true)) as string;
745
906
 
746
907
  let screenshotResourceUri: string | undefined;
747
908
  if (screenshotBase64) {
@@ -787,6 +948,18 @@ IMPORTANT - If creating a new test project, use these EXACT dependencies in pack
787
948
  generatedCode
788
949
  );
789
950
  } catch (error) {
951
+ // On client cancellation, tear down the half-provisioned session so we
952
+ // don't leak a connected sandbox. The underlying SDK call may still be
953
+ // running in the background; best-effort cleanup is all we can do.
954
+ if (error instanceof ToolAbortError) {
955
+ logger.info("session_start: Cancelled by client, tearing down session");
956
+ try {
957
+ await sdk?.disconnect?.();
958
+ } catch (cleanupErr) {
959
+ logger.warn("session_start: Cleanup after cancel failed", { error: String(cleanupErr) });
960
+ }
961
+ return createToolResult(false, "Session start was cancelled.", { action: "session_start", cancelled: true });
962
+ }
790
963
  logger.error("session_start: Failed", { error: String(error) });
791
964
  captureException(error as Error, { tags: { tool: "session_start" }, extra: { params } });
792
965
  throw error;
@@ -874,8 +1047,9 @@ registerAppTool(
874
1047
  }) as any,
875
1048
  _meta: { ui: { resourceUri: RESOURCE_URI, expanded: true } },
876
1049
  },
877
- async (params: { description: string; timeout?: number }): Promise<CallToolResult> => {
1050
+ async (params: { description: string; timeout?: number }, extra: ToolExtra): Promise<CallToolResult> => {
878
1051
  const startTime = Date.now();
1052
+ const progress = makeProgressReporter(extra);
879
1053
  logger.info("find: Starting", { description: params.description, timeout: params.timeout });
880
1054
 
881
1055
  const sessionCheck = requireActiveSession();
@@ -886,7 +1060,17 @@ registerAppTool(
886
1060
 
887
1061
  try {
888
1062
  logger.debug("find: Calling SDK find");
889
- const element = await sdk.find(params.description, params.timeout ? { timeout: params.timeout } : undefined);
1063
+ const stopHeartbeat = progress.heartbeat(`Looking for "${params.description}"...`);
1064
+ let element: any;
1065
+ try {
1066
+ element = await raceAbort(
1067
+ extra.signal,
1068
+ "find",
1069
+ sdk.find(params.description, params.timeout ? { timeout: params.timeout } : undefined)
1070
+ );
1071
+ } finally {
1072
+ stopHeartbeat();
1073
+ }
890
1074
  const found = element.found();
891
1075
  const coords = element.getCoordinates();
892
1076
 
@@ -975,6 +1159,8 @@ registerAppTool(
975
1159
  generatedCode
976
1160
  );
977
1161
  } catch (error) {
1162
+ const cancelled = cancelledResultOrNull(error, "find");
1163
+ if (cancelled) return cancelled;
978
1164
  logger.error("find: Failed", { error: String(error), description: params.description });
979
1165
  captureException(error as Error, { tags: { tool: "find" }, extra: { description: params.description } });
980
1166
  throw error;
@@ -995,8 +1181,9 @@ registerAppTool(
995
1181
  }) as any,
996
1182
  _meta: { ui: { resourceUri: RESOURCE_URI, expanded: true } },
997
1183
  },
998
- async (params: { description: string; timeout?: number }): Promise<CallToolResult> => {
1184
+ async (params: { description: string; timeout?: number }, extra: ToolExtra): Promise<CallToolResult> => {
999
1185
  const startTime = Date.now();
1186
+ const progress = makeProgressReporter(extra);
1000
1187
  logger.info("findall: Starting", { description: params.description, timeout: params.timeout });
1001
1188
 
1002
1189
  const sessionCheck = requireActiveSession();
@@ -1007,7 +1194,17 @@ registerAppTool(
1007
1194
 
1008
1195
  try {
1009
1196
  logger.debug("findall: Calling SDK findAll");
1010
- const elements = await sdk.findAll(params.description, params.timeout ? { timeout: params.timeout } : undefined);
1197
+ const stopHeartbeat = progress.heartbeat(`Looking for all "${params.description}"...`);
1198
+ let elements: any;
1199
+ try {
1200
+ elements = await raceAbort(
1201
+ extra.signal,
1202
+ "findall",
1203
+ sdk.findAll(params.description, params.timeout ? { timeout: params.timeout } : undefined)
1204
+ );
1205
+ } finally {
1206
+ stopHeartbeat();
1207
+ }
1011
1208
  const count = elements.length;
1012
1209
 
1013
1210
  // Store element refs for later use
@@ -1104,6 +1301,8 @@ registerAppTool(
1104
1301
  generatedCode
1105
1302
  );
1106
1303
  } catch (error) {
1304
+ const cancelled = cancelledResultOrNull(error, "findall");
1305
+ if (cancelled) return cancelled;
1107
1306
  logger.error("findall: Failed", { error: String(error), description: params.description });
1108
1307
  captureException(error as Error, { tags: { tool: "findall" }, extra: { description: params.description } });
1109
1308
  throw error;
@@ -1373,8 +1572,9 @@ registerAppTool(
1373
1572
  }) as any,
1374
1573
  _meta: { ui: { resourceUri: RESOURCE_URI, expanded: true } },
1375
1574
  },
1376
- async (params: { description: string; action: "click" | "double-click" | "right-click" }): Promise<CallToolResult> => {
1575
+ async (params: { description: string; action: "click" | "double-click" | "right-click" }, extra: ToolExtra): Promise<CallToolResult> => {
1377
1576
  const startTime = Date.now();
1577
+ const progress = makeProgressReporter(extra);
1378
1578
  logger.info("find_and_click: Starting", { description: params.description, action: params.action });
1379
1579
 
1380
1580
  const sessionCheck = requireActiveSession();
@@ -1385,7 +1585,13 @@ registerAppTool(
1385
1585
 
1386
1586
  try {
1387
1587
  logger.debug("find_and_click: Finding element");
1388
- const element = await sdk.find(params.description);
1588
+ const stopHeartbeat = progress.heartbeat(`Looking for "${params.description}"...`);
1589
+ let element: any;
1590
+ try {
1591
+ element = await raceAbort(extra.signal, "find_and_click", sdk.find(params.description));
1592
+ } finally {
1593
+ stopHeartbeat();
1594
+ }
1389
1595
  const found = element.found();
1390
1596
 
1391
1597
  if (!found) {
@@ -1511,6 +1717,8 @@ registerAppTool(
1511
1717
  generatedCode
1512
1718
  );
1513
1719
  } catch (error) {
1720
+ const cancelled = cancelledResultOrNull(error, "find_and_click");
1721
+ if (cancelled) return cancelled;
1514
1722
  logger.error("find_and_click: Failed", { error: String(error), description: params.description });
1515
1723
  captureException(error as Error, { tags: { tool: "find_and_click" }, extra: { description: params.description, action: params.action } });
1516
1724
  throw error;
@@ -1730,8 +1938,9 @@ You can optionally provide a reference image URI to compare against a previous s
1730
1938
  }) as any,
1731
1939
  _meta: { ui: { resourceUri: RESOURCE_URI, expanded: true } },
1732
1940
  },
1733
- async (params: { task: string; referenceImageUri?: string }): Promise<CallToolResult> => {
1941
+ async (params: { task: string; referenceImageUri?: string }, extra: ToolExtra): Promise<CallToolResult> => {
1734
1942
  const startTime = Date.now();
1943
+ const progress = makeProgressReporter(extra);
1735
1944
  logger.info("check: Starting", { task: params.task, hasReferenceImageUri: !!params.referenceImageUri });
1736
1945
 
1737
1946
  const sessionCheck = requireActiveSession();
@@ -1743,6 +1952,7 @@ You can optionally provide a reference image URI to compare against a previous s
1743
1952
  try {
1744
1953
  // Capture current screenshot
1745
1954
  logger.debug("check: Capturing current screenshot");
1955
+ progress.report("Capturing screenshot...");
1746
1956
  const currentScreenshot = await sdk.agent.system.captureScreenBase64(1, false, true);
1747
1957
 
1748
1958
  // Use provided reference image URI, last screenshot as "before" state, or current if no previous screenshot
@@ -1798,12 +2008,18 @@ You can optionally provide a reference image URI to compare against a previous s
1798
2008
  beforeScreenshotPreview: beforeScreenshot?.substring(0, 50),
1799
2009
  currentScreenshotPreview: currentScreenshot?.substring(0, 50)
1800
2010
  });
1801
- const response = await sdk.agent.sdk.req("check", {
1802
- tasks: [params.task],
1803
- images: [beforeScreenshot, currentScreenshot],
1804
- mousePosition,
1805
- activeWindow,
1806
- });
2011
+ const stopHeartbeat = progress.heartbeat(`Checking: "${params.task}"...`);
2012
+ let response: any;
2013
+ try {
2014
+ response = await raceAbort(extra.signal, "check", sdk.agent.sdk.req("check", {
2015
+ tasks: [params.task],
2016
+ images: [beforeScreenshot, currentScreenshot],
2017
+ mousePosition,
2018
+ activeWindow,
2019
+ }));
2020
+ } finally {
2021
+ stopHeartbeat();
2022
+ }
1807
2023
 
1808
2024
  const aiResponse = response.data;
1809
2025
 
@@ -1843,6 +2059,8 @@ You can optionally provide a reference image URI to compare against a previous s
1843
2059
  }
1844
2060
  );
1845
2061
  } catch (error) {
2062
+ const cancelled = cancelledResultOrNull(error, "check");
2063
+ if (cancelled) return cancelled;
1846
2064
  logger.error("check: Failed", { error: String(error), task: params.task });
1847
2065
  captureException(error as Error, { tags: { tool: "check" }, extra: { task: params.task } });
1848
2066
  throw error;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@testdriverai/mcp",
3
- "version": "7.9.103-canary",
3
+ "version": "7.9.104-test",
4
4
  "description": "Next generation autonomous AI agent for end-to-end testing of web & desktop",
5
5
  "main": "sdk.js",
6
6
  "types": "sdk.d.ts",
package/sdk.js CHANGED
@@ -472,10 +472,12 @@ class Element {
472
472
  this.sdk.emitter.emit(events.log.log, findingMessage);
473
473
 
474
474
  try {
475
- const screenshot = await this.system.captureScreenBase64();
475
+ // Returns { imageKey } (fast S3-key path, no local round-trip) or
476
+ // { image } (base64 fallback). See system.captureScreenImage.
477
+ const imagePayload = await this.system.captureScreenImage();
476
478
  // Only store screenshot in DEBUG mode to prevent memory leaks
477
- if (debugMode) {
478
- this._screenshot = screenshot;
479
+ if (debugMode && imagePayload.image) {
480
+ this._screenshot = imagePayload.image;
479
481
  }
480
482
 
481
483
  // Handle options - can be a number (cacheThreshold) or object with cacheKey/cacheThreshold/cache
@@ -566,7 +568,7 @@ class Element {
566
568
  response = await this.sdk.apiClient.req("find", {
567
569
  session: this.sdk.getSessionId(),
568
570
  element: description,
569
- image: screenshot,
571
+ ...imagePayload,
570
572
  threshold: threshold,
571
573
  elementSimilarity: elementSimilarity,
572
574
  cacheKey: cacheKey,
@@ -2405,7 +2407,8 @@ CAPTCHA_SOLVER_EOF`,
2405
2407
  const { events } = require("./agent/events.js");
2406
2408
 
2407
2409
  try {
2408
- const screenshot = await this.system.captureScreenBase64();
2410
+ // { imageKey } (fast S3-key path) or { image } (base64 fallback).
2411
+ const imagePayload = await this.system.captureScreenImage();
2409
2412
 
2410
2413
  // Handle options - can be a number (cacheThreshold) or object with cacheKey/cacheThreshold/cache
2411
2414
  let cacheKey = null;
@@ -2474,7 +2477,7 @@ CAPTCHA_SOLVER_EOF`,
2474
2477
  {
2475
2478
  session: this.getSessionId(),
2476
2479
  element: description,
2477
- image: screenshot,
2480
+ ...imagePayload,
2478
2481
  threshold: threshold,
2479
2482
  cacheKey: cacheKey,
2480
2483
  os: this.os,
@@ -2513,11 +2516,12 @@ CAPTCHA_SOLVER_EOF`,
2513
2516
  elementData,
2514
2517
  );
2515
2518
 
2516
- // Only store screenshot in DEBUG mode
2519
+ // Only store screenshot in DEBUG mode (and only when we have the
2520
+ // bytes locally — the fast imageKey path doesn't download them)
2517
2521
  const debugMode =
2518
2522
  process.env.VERBOSE || process.env.TD_DEBUG;
2519
- if (debugMode) {
2520
- element._screenshot = screenshot;
2523
+ if (debugMode && imagePayload.image) {
2524
+ element._screenshot = imagePayload.image;
2521
2525
  }
2522
2526
 
2523
2527
  return element;
@@ -3001,11 +3005,9 @@ CAPTCHA_SOLVER_EOF`,
3001
3005
  const { events } = require("./agent/events.js");
3002
3006
  this.emitter.emit(events.log.log, "🔍 Running OmniParser screen analysis...");
3003
3007
 
3004
- const screenshot = await this.system.captureScreenBase64();
3005
-
3006
3008
  const response = await this.apiClient.req("parse", {
3007
3009
  session: this.getSessionId(),
3008
- image: screenshot,
3010
+ ...(await this.system.captureScreenImage()),
3009
3011
  });
3010
3012
 
3011
3013
  if (response.error) {