test-wuying-agentbay-sdk 0.13.1-beta.20251223203147 → 0.13.1-beta.20251224094729

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -2325,11 +2325,20 @@ var LOG_LEVEL_VALUES = {
2325
2325
  };
2326
2326
  var currentRequestId = "";
2327
2327
  var currentLogLevel = process.env.LOG_LEVEL || process.env.AGENTBAY_LOG_LEVEL || "INFO";
2328
+ var currentLogFormat = process.env.AGENTBAY_LOG_FORMAT || "pretty";
2329
+ if (["sls", "compact"].includes(String(currentLogFormat).toLowerCase())) {
2330
+ currentLogFormat = "sls";
2331
+ } else {
2332
+ currentLogFormat = "pretty";
2333
+ }
2328
2334
  var fileLoggingEnabled = false;
2329
2335
  var logFilePath = null;
2330
2336
  var logFileMaxSize = 10 * 1024 * 1024;
2331
2337
  var consoleLoggingEnabled = true;
2332
2338
  function shouldUseColors() {
2339
+ if (currentLogFormat === "sls") {
2340
+ return false;
2341
+ }
2333
2342
  if (process.env.DISABLE_COLORS === "true") {
2334
2343
  return false;
2335
2344
  }
@@ -2371,6 +2380,9 @@ var SENSITIVE_FIELDS = [
2371
2380
  "authorization"
2372
2381
  ];
2373
2382
  function getLogLevelEmoji(level) {
2383
+ if (currentLogFormat === "sls") {
2384
+ return level;
2385
+ }
2374
2386
  switch (level) {
2375
2387
  case "DEBUG":
2376
2388
  return "\u{1F41B} DEBUG";
@@ -2390,6 +2402,13 @@ function shouldLog(level) {
2390
2402
  }
2391
2403
  __name(shouldLog, "shouldLog");
2392
2404
  function formatLogMessage(level, message, forFile = false) {
2405
+ if (currentLogFormat === "sls") {
2406
+ let formattedMessage2 = `${level}: ${message}`;
2407
+ if (currentRequestId) {
2408
+ formattedMessage2 += ` [RequestId=${currentRequestId}]`;
2409
+ }
2410
+ return formattedMessage2;
2411
+ }
2393
2412
  let formattedMessage = `${getLogLevelEmoji(level)}: ${message}`;
2394
2413
  if (currentRequestId) {
2395
2414
  formattedMessage += ` [RequestId=${currentRequestId}]`;
@@ -2537,6 +2556,23 @@ function setupLogger(config) {
2537
2556
  if (config.enableConsole !== void 0) {
2538
2557
  consoleLoggingEnabled = config.enableConsole;
2539
2558
  }
2559
+ if (config.format) {
2560
+ if (["sls", "compact"].includes(String(config.format).toLowerCase())) {
2561
+ currentLogFormat = "sls";
2562
+ } else {
2563
+ currentLogFormat = "pretty";
2564
+ }
2565
+ } else {
2566
+ const envFormat = process.env.AGENTBAY_LOG_FORMAT;
2567
+ if (envFormat) {
2568
+ if (["sls", "compact"].includes(String(envFormat).toLowerCase())) {
2569
+ currentLogFormat = "sls";
2570
+ } else {
2571
+ currentLogFormat = "pretty";
2572
+ }
2573
+ }
2574
+ }
2575
+ useColors = shouldUseColors();
2540
2576
  }
2541
2577
  __name(setupLogger, "setupLogger");
2542
2578
  function log(message, ...args) {
@@ -2644,6 +2680,22 @@ ${error.stack}`;
2644
2680
  __name(logError, "logError");
2645
2681
  function logAPICall(apiName, requestData) {
2646
2682
  if (!shouldLog("INFO")) return;
2683
+ if (currentLogFormat === "sls") {
2684
+ let message2 = `API Call: ${apiName}`;
2685
+ if (requestData) {
2686
+ const maskedData = maskSensitiveData(requestData);
2687
+ if (typeof maskedData === "string") {
2688
+ message2 += `, ${maskedData}`;
2689
+ } else {
2690
+ message2 += `, ${JSON.stringify(maskedData)}`;
2691
+ }
2692
+ }
2693
+ if (consoleLoggingEnabled) {
2694
+ process.stdout.write(message2 + "\n");
2695
+ }
2696
+ writeToFile(message2);
2697
+ return;
2698
+ }
2647
2699
  const message = `\u{1F517} API Call: ${apiName}`;
2648
2700
  const savedRequestId = currentRequestId;
2649
2701
  currentRequestId = "";
@@ -2661,6 +2713,35 @@ function logAPICall(apiName, requestData) {
2661
2713
  }
2662
2714
  __name(logAPICall, "logAPICall");
2663
2715
  function logAPIResponseWithDetails(apiName, requestId, success = true, keyFields, fullResponse) {
2716
+ if (currentLogFormat === "sls") {
2717
+ if (!shouldLog(success ? "INFO" : "ERROR")) return;
2718
+ const status = success ? "API Response" : "API Response Failed";
2719
+ let msg = `${status}: ${apiName}`;
2720
+ if (requestId) {
2721
+ msg += `, RequestId=${requestId}`;
2722
+ }
2723
+ if (keyFields) {
2724
+ for (const [key, value] of Object.entries(keyFields)) {
2725
+ const maskedValue = maskSensitiveData({ [key]: value });
2726
+ msg += `, ${key}=${maskedValue[key]}`;
2727
+ }
2728
+ }
2729
+ if (success) {
2730
+ if (consoleLoggingEnabled) {
2731
+ process.stdout.write(msg + "\n");
2732
+ }
2733
+ writeToFile(msg);
2734
+ } else {
2735
+ if (consoleLoggingEnabled) {
2736
+ process.stderr.write(msg + "\n");
2737
+ }
2738
+ writeToFile(msg);
2739
+ }
2740
+ if (fullResponse && shouldLog("DEBUG")) {
2741
+ logDebug(`Full Response: ${fullResponse}`);
2742
+ }
2743
+ return;
2744
+ }
2664
2745
  if (success) {
2665
2746
  if (shouldLog("INFO")) {
2666
2747
  let mainMessage = `\u2705 API Response: ${apiName}`;
@@ -2732,6 +2813,16 @@ function logCodeExecutionOutput(requestId, rawOutput) {
2732
2813
  return;
2733
2814
  }
2734
2815
  const actualOutput = texts.join("");
2816
+ if (currentLogFormat === "sls") {
2817
+ const header2 = `Code Execution Output (RequestID: ${requestId}):`;
2818
+ if (consoleLoggingEnabled) {
2819
+ process.stdout.write(header2 + "\n");
2820
+ process.stdout.write(actualOutput + "\n");
2821
+ }
2822
+ writeToFile(header2);
2823
+ writeToFile(actualOutput);
2824
+ return;
2825
+ }
2735
2826
  const header = `\u{1F4CB} Code Execution Output (RequestID: ${requestId}):`;
2736
2827
  if (useColors) {
2737
2828
  process.stdout.write(`${ANSI_GREEN}\u2139\uFE0F INFO: ${header}${ANSI_RESET}
@@ -2761,6 +2852,10 @@ function logCodeExecutionOutput(requestId, rawOutput) {
2761
2852
  __name(logCodeExecutionOutput, "logCodeExecutionOutput");
2762
2853
  function logInfoWithColor(message, color = ANSI_RED) {
2763
2854
  if (!shouldLog("INFO")) return;
2855
+ if (currentLogFormat === "sls") {
2856
+ logInfo(message);
2857
+ return;
2858
+ }
2764
2859
  const emoji = "\u2139\uFE0F INFO";
2765
2860
  const fullMessage = `${emoji}: ${message}`;
2766
2861
  if (useColors) {
@@ -3886,41 +3981,35 @@ init_esm_shims();
3886
3981
 
3887
3982
  // src/agent/agent.ts
3888
3983
  init_esm_shims();
3889
- var _ComputerUseAgent = class _ComputerUseAgent {
3890
- /**
3891
- * Initialize an Computer Agent object.
3892
- *
3893
- * @param session - The Session instance that this Agent belongs to.
3894
- */
3984
+ var _BaseTaskAgent = class _BaseTaskAgent {
3895
3985
  constructor(session) {
3896
3986
  this.session = session;
3897
3987
  }
3988
+ /**
3989
+ * Get the full MCP tool name based on prefix and action.
3990
+ */
3991
+ getToolName(action) {
3992
+ const toolMap = {
3993
+ execute: "execute_task",
3994
+ get_status: "get_task_status",
3995
+ terminate: "terminate_task"
3996
+ };
3997
+ const baseName = toolMap[action] || action;
3998
+ if (this.toolPrefix) {
3999
+ return `${this.toolPrefix}_${baseName}`;
4000
+ }
4001
+ return baseName;
4002
+ }
3898
4003
  /**
3899
4004
  * Execute a specific task described in human language.
3900
- *
3901
- * @param task - Task description in human language.
3902
- * @param maxTryTimes - Maximum number of retry attempts.
3903
- * @returns ExecutionResult containing success status, task output, and error
3904
- * message if any.
3905
- *
3906
- * @example
3907
- * ```typescript
3908
- * const agentBay = new AgentBay({ apiKey: 'your_api_key' });
3909
- * const result = await agentBay.create({ imageId: 'windows_latest' });
3910
- * if (result.success) {
3911
- * const taskResult = await result.session.agent.computer.executeTask(
3912
- * 'Open notepad',
3913
- * 10
3914
- * );
3915
- * console.log(`Task status: ${taskResult.taskStatus}`);
3916
- * await result.session.delete();
3917
- * }
3918
- * ```
3919
4005
  */
3920
4006
  async executeTask(task, maxTryTimes) {
3921
4007
  try {
3922
4008
  const args = { task, max_try_times: maxTryTimes };
3923
- const result = await this.session.callMcpTool("flux_execute_task", args);
4009
+ const result = await this.session.callMcpTool(
4010
+ this.getToolName("execute"),
4011
+ args
4012
+ );
3924
4013
  if (!result.success) {
3925
4014
  return {
3926
4015
  requestId: result.requestId,
@@ -3944,7 +4033,7 @@ var _ComputerUseAgent = class _ComputerUseAgent {
3944
4033
  taskResult: "Invalid execution response."
3945
4034
  };
3946
4035
  }
3947
- const taskId = content.task_id;
4036
+ const taskId = content.taskId || content.task_id;
3948
4037
  if (!taskId) {
3949
4038
  return {
3950
4039
  requestId: result.requestId,
@@ -3969,13 +4058,13 @@ var _ComputerUseAgent = class _ComputerUseAgent {
3969
4058
  };
3970
4059
  }
3971
4060
  switch (query.taskStatus) {
3972
- case "finished":
4061
+ case "completed":
3973
4062
  return {
3974
4063
  requestId: query.requestId,
3975
4064
  success: true,
3976
4065
  errorMessage: "",
3977
4066
  taskId,
3978
- taskStatus: "finished",
4067
+ taskStatus: "completed",
3979
4068
  taskResult: query.taskProduct
3980
4069
  };
3981
4070
  case "failed":
@@ -3987,6 +4076,15 @@ var _ComputerUseAgent = class _ComputerUseAgent {
3987
4076
  taskStatus: "failed",
3988
4077
  taskResult: ""
3989
4078
  };
4079
+ case "cancelled":
4080
+ return {
4081
+ requestId: query.requestId,
4082
+ success: false,
4083
+ errorMessage: query.errorMessage || "Task was cancelled.",
4084
+ taskId,
4085
+ taskStatus: "cancelled",
4086
+ taskResult: ""
4087
+ };
3990
4088
  case "unsupported":
3991
4089
  return {
3992
4090
  requestId: query.requestId,
@@ -4022,32 +4120,20 @@ var _ComputerUseAgent = class _ComputerUseAgent {
4022
4120
  }
4023
4121
  /**
4024
4122
  * Get the status of the task with the given task ID.
4025
- *
4026
- * @param taskId - Task ID
4027
- * @returns QueryResult containing the task status
4028
- *
4029
- * @example
4030
- * ```typescript
4031
- * const agentBay = new AgentBay({ apiKey: 'your_api_key' });
4032
- * const result = await agentBay.create({ imageId: 'windows_latest' });
4033
- * if (result.success) {
4034
- * const taskResult = await result.session.agent.computer.executeTask('Open
4035
- * calculator', 10); const statusResult = await
4036
- * result.session.agent.computer.getTaskStatus(taskResult.taskId);
4037
- * console.log(`Status:
4038
- * ${JSON.parse(statusResult.output).status}`); await result.session.delete();
4039
- * }
4040
- * ```
4041
4123
  */
4042
4124
  async getTaskStatus(taskId) {
4043
4125
  try {
4044
4126
  const args = { task_id: taskId };
4045
- const result = await this.session.callMcpTool("flux_get_task_status", args);
4127
+ const result = await this.session.callMcpTool(
4128
+ this.getToolName("get_status"),
4129
+ args
4130
+ );
4046
4131
  if (!result.success) {
4047
4132
  return {
4048
4133
  requestId: result.requestId,
4049
4134
  success: false,
4050
4135
  errorMessage: result.errorMessage,
4136
+ taskId,
4051
4137
  taskAction: "",
4052
4138
  taskProduct: "",
4053
4139
  taskStatus: "failed"
@@ -4056,19 +4142,27 @@ var _ComputerUseAgent = class _ComputerUseAgent {
4056
4142
  let queryResult;
4057
4143
  try {
4058
4144
  queryResult = JSON.parse(result.data);
4145
+ const contentTaskId = queryResult.taskId || queryResult.task_id || taskId;
4146
+ const taskProduct = queryResult.result || queryResult.product || "";
4147
+ const stream = Array.isArray(queryResult.stream) ? queryResult.stream : void 0;
4148
+ const error = queryResult.error || void 0;
4059
4149
  return {
4060
4150
  requestId: result.requestId,
4061
4151
  success: true,
4062
4152
  errorMessage: "",
4153
+ taskId: contentTaskId,
4063
4154
  taskAction: queryResult.action || "",
4064
- taskProduct: queryResult.product || "",
4065
- taskStatus: queryResult.status || "failed"
4155
+ taskProduct,
4156
+ taskStatus: queryResult.status || "completed",
4157
+ stream,
4158
+ error
4066
4159
  };
4067
4160
  } catch (error) {
4068
4161
  return {
4069
4162
  requestId: result.requestId,
4070
4163
  success: false,
4071
4164
  errorMessage: `Failed to get task status: ${error}`,
4165
+ taskId,
4072
4166
  taskAction: "",
4073
4167
  taskProduct: "",
4074
4168
  taskStatus: "failed"
@@ -4079,6 +4173,7 @@ var _ComputerUseAgent = class _ComputerUseAgent {
4079
4173
  requestId: "",
4080
4174
  success: false,
4081
4175
  errorMessage: `Failed to get task status: ${error}`,
4176
+ taskId,
4082
4177
  taskAction: "",
4083
4178
  taskProduct: "",
4084
4179
  taskStatus: "failed"
@@ -4087,33 +4182,15 @@ var _ComputerUseAgent = class _ComputerUseAgent {
4087
4182
  }
4088
4183
  /**
4089
4184
  * Terminate a task with a specified task ID.
4090
- *
4091
- * @param taskId - The ID of the running task.
4092
- * @returns ExecutionResult containing success status, task output, and
4093
- * error message if any.
4094
- *
4095
- * @example
4096
- * ```typescript
4097
- * const agentBay = new AgentBay({ apiKey: 'your_api_key' });
4098
- * const result = await agentBay.create({ imageId: 'windows_latest' });
4099
- * if (result.success) {
4100
- * const taskResult = await result.session.agent.computer.executeTask(
4101
- * 'Open notepad',
4102
- * 5
4103
- * );
4104
- * const terminateResult = await result.session.agent.computer.terminateTask(
4105
- * taskResult.taskId
4106
- * );
4107
- * console.log(`Terminated: ${terminateResult.taskStatus}`);
4108
- * await result.session.delete();
4109
- * }
4110
- * ```
4111
4185
  */
4112
4186
  async terminateTask(taskId) {
4113
4187
  logDebug("Terminating task");
4114
4188
  try {
4115
4189
  const args = { task_id: taskId };
4116
- const result = await this.session.callMcpTool("flux_terminate_task", args);
4190
+ const result = await this.session.callMcpTool(
4191
+ this.getToolName("terminate"),
4192
+ args
4193
+ );
4117
4194
  let content;
4118
4195
  try {
4119
4196
  content = JSON.parse(result.data);
@@ -4127,8 +4204,8 @@ var _ComputerUseAgent = class _ComputerUseAgent {
4127
4204
  taskResult: ""
4128
4205
  };
4129
4206
  }
4130
- const terminatedTaskId = content.task_id || taskId;
4131
- const status = content.status || "unknown";
4207
+ const terminatedTaskId = content.taskId || content.task_id || taskId;
4208
+ const status = content.status || "cancelling";
4132
4209
  if (result.success) {
4133
4210
  return {
4134
4211
  requestId: result.requestId,
@@ -4159,17 +4236,20 @@ var _ComputerUseAgent = class _ComputerUseAgent {
4159
4236
  }
4160
4237
  }
4161
4238
  };
4239
+ __name(_BaseTaskAgent, "BaseTaskAgent");
4240
+ var BaseTaskAgent = _BaseTaskAgent;
4241
+ var _ComputerUseAgent = class _ComputerUseAgent extends (BaseTaskAgent.default || BaseTaskAgent) {
4242
+ constructor() {
4243
+ super(...arguments);
4244
+ this.toolPrefix = "flux";
4245
+ }
4246
+ };
4162
4247
  __name(_ComputerUseAgent, "ComputerUseAgent");
4163
4248
  var ComputerUseAgent = _ComputerUseAgent;
4164
- var _BrowserUseAgent = class _BrowserUseAgent {
4165
- /**
4166
- * Initialize an Browser Agent object.
4167
- * @description Browser Use Agent is in BETA ⚠️ .
4168
- *
4169
- * @param session - The Session instance that this Agent belongs to.
4170
- */
4171
- constructor(session) {
4172
- this.session = session;
4249
+ var _BrowserUseAgent = class _BrowserUseAgent extends (BaseTaskAgent.default || BaseTaskAgent) {
4250
+ constructor() {
4251
+ super(...arguments);
4252
+ this.toolPrefix = "browser_use";
4173
4253
  }
4174
4254
  /**
4175
4255
  * Initialize the browser agent with specific options.
@@ -4214,276 +4294,345 @@ var _BrowserUseAgent = class _BrowserUseAgent {
4214
4294
  };
4215
4295
  }
4216
4296
  }
4297
+ };
4298
+ __name(_BrowserUseAgent, "BrowserUseAgent");
4299
+ var BrowserUseAgent = _BrowserUseAgent;
4300
+ var _MobileUseAgent = class _MobileUseAgent extends (BaseTaskAgent.default || BaseTaskAgent) {
4301
+ constructor() {
4302
+ super(...arguments);
4303
+ this.toolPrefix = "";
4304
+ }
4217
4305
  /**
4218
- * Execute a specific task described in human language.
4306
+ * Execute a task in human language without waiting for completion
4307
+ * (non-blocking). This is a fire-and-return interface that immediately
4308
+ * provides a task ID. Call getTaskStatus to check the task status.
4219
4309
  *
4220
4310
  * @param task - Task description in human language.
4221
- * @param maxTryTimes - Maximum number of retry attempts.
4222
- * @returns ExecutionResult containing success status, task output, and
4223
- * error message if any.
4311
+ * @param maxSteps - Maximum number of steps (clicks/swipes/etc.) allowed.
4312
+ * Used to prevent infinite loops or excessive resource
4313
+ * consumption. Default is 50.
4314
+ * @param maxStepRetries - Maximum retry times for MCP tool call failures
4315
+ * at SDK level. Used to retry when callMcpTool fails
4316
+ * (e.g., network errors, timeouts). Default is 3.
4317
+ * @returns ExecutionResult containing success status, task ID, task status,
4318
+ * and error message if any.
4224
4319
  *
4225
4320
  * @example
4226
4321
  * ```typescript
4227
4322
  * const agentBay = new AgentBay({ apiKey: 'your_api_key' });
4228
- * const result = await agentBay.create({ imageId: 'linux_latest' });
4323
+ * const result = await agentBay.create({ imageId: 'mobile_latest' });
4229
4324
  * if (result.success) {
4230
- * const taskResult = await result.session.agent.browser.executeTask(
4231
- * 'Navigate to baidu and query the weather of Shanghai',
4232
- * 10
4325
+ * const execResult = await result.session.agent.mobile.executeTask(
4326
+ * 'Open WeChat app', 100, 5
4233
4327
  * );
4234
- * console.log(`Task status: ${taskResult.taskStatus}`);
4328
+ * console.log(`Task ID: ${execResult.taskId}`);
4235
4329
  * await result.session.delete();
4236
4330
  * }
4237
4331
  * ```
4238
4332
  */
4239
- async executeTask(task, maxTryTimes) {
4240
- try {
4241
- const args = { task, max_try_times: maxTryTimes };
4242
- const result = await this.session.callMcpTool("browser_use_execute_task", args);
4243
- if (!result.success) {
4244
- return {
4245
- requestId: result.requestId,
4246
- success: false,
4247
- errorMessage: result.errorMessage,
4248
- taskStatus: "failed",
4249
- taskId: "",
4250
- taskResult: "Task Failed"
4251
- };
4252
- }
4253
- let content;
4333
+ async executeTask(task, maxSteps = 50, maxStepRetries = 3) {
4334
+ const args = {
4335
+ task,
4336
+ max_steps: maxSteps
4337
+ };
4338
+ let lastError;
4339
+ let lastRequestId = "";
4340
+ for (let attempt = 0; attempt < maxStepRetries; attempt++) {
4254
4341
  try {
4255
- content = JSON.parse(result.data);
4256
- } catch (err) {
4257
- return {
4258
- requestId: result.requestId,
4259
- success: false,
4260
- errorMessage: `Failed to parse response: ${err}`,
4261
- taskStatus: "failed",
4262
- taskId: "",
4263
- taskResult: "Invalid execution response."
4264
- };
4265
- }
4266
- const taskId = content.task_id;
4267
- if (!taskId) {
4268
- return {
4269
- requestId: result.requestId,
4270
- success: false,
4271
- errorMessage: "Task ID not found in response",
4272
- taskStatus: "failed",
4273
- taskId: "",
4274
- taskResult: "Invalid task ID."
4275
- };
4276
- }
4277
- let triedTime = 0;
4278
- while (triedTime < maxTryTimes) {
4279
- const query = await this.getTaskStatus(taskId);
4280
- if (!query.success) {
4281
- return {
4282
- requestId: query.requestId,
4283
- success: false,
4284
- errorMessage: query.errorMessage,
4285
- taskStatus: "failed",
4286
- taskId,
4287
- taskResult: ""
4288
- };
4289
- }
4290
- switch (query.taskStatus) {
4291
- case "finished":
4342
+ const result = await this.session.callMcpTool(
4343
+ this.getToolName("execute"),
4344
+ args
4345
+ );
4346
+ lastRequestId = result.requestId;
4347
+ if (result.success) {
4348
+ let content;
4349
+ try {
4350
+ content = JSON.parse(result.data);
4351
+ } catch (err) {
4292
4352
  return {
4293
- requestId: query.requestId,
4294
- success: true,
4295
- errorMessage: "",
4296
- taskId,
4297
- taskStatus: "finished",
4298
- taskResult: query.taskProduct
4353
+ requestId: result.requestId,
4354
+ success: false,
4355
+ errorMessage: `Failed to parse response: ${err}`,
4356
+ taskStatus: "failed",
4357
+ taskId: "",
4358
+ taskResult: "Invalid execution response."
4299
4359
  };
4300
- case "failed":
4360
+ }
4361
+ const taskId = content.taskId || content.task_id;
4362
+ if (!taskId) {
4301
4363
  return {
4302
- requestId: query.requestId,
4364
+ requestId: result.requestId,
4303
4365
  success: false,
4304
- errorMessage: query.errorMessage || "Failed to execute task.",
4305
- taskId,
4366
+ errorMessage: "Task ID not found in response",
4306
4367
  taskStatus: "failed",
4307
- taskResult: ""
4368
+ taskId: "",
4369
+ taskResult: "Invalid task ID."
4308
4370
  };
4309
- case "unsupported":
4371
+ }
4372
+ return {
4373
+ requestId: result.requestId,
4374
+ success: true,
4375
+ errorMessage: "",
4376
+ taskId,
4377
+ taskStatus: "running",
4378
+ taskResult: ""
4379
+ };
4380
+ } else {
4381
+ lastError = result.errorMessage || "Failed to execute task";
4382
+ if (attempt < maxStepRetries - 1) {
4383
+ logDebug(
4384
+ `Attempt ${attempt + 1}/${maxStepRetries} failed, retrying...`
4385
+ );
4386
+ await new Promise((resolve2) => setTimeout(resolve2, 1e3));
4387
+ continue;
4388
+ } else {
4310
4389
  return {
4311
- requestId: query.requestId,
4390
+ requestId: result.requestId,
4312
4391
  success: false,
4313
- errorMessage: query.errorMessage || "Unsupported task.",
4314
- taskId,
4315
- taskStatus: "unsupported",
4316
- taskResult: ""
4392
+ errorMessage: lastError,
4393
+ taskStatus: "failed",
4394
+ taskId: "",
4395
+ taskResult: "Task Failed"
4317
4396
  };
4397
+ }
4398
+ }
4399
+ } catch (error) {
4400
+ lastError = `Failed to execute: ${error}`;
4401
+ if (attempt < maxStepRetries - 1) {
4402
+ logDebug(
4403
+ `Attempt ${attempt + 1}/${maxStepRetries} raised exception, retrying...`
4404
+ );
4405
+ await new Promise((resolve2) => setTimeout(resolve2, 1e3));
4406
+ continue;
4407
+ } else {
4408
+ return {
4409
+ requestId: lastRequestId,
4410
+ success: false,
4411
+ errorMessage: lastError,
4412
+ taskStatus: "failed",
4413
+ taskId: "",
4414
+ taskResult: "Task Failed"
4415
+ };
4318
4416
  }
4319
- logDebug(`Task ${taskId} is still running, please wait for a while.`);
4320
- await new Promise((resolve2) => setTimeout(resolve2, 3e3));
4321
- triedTime++;
4322
4417
  }
4323
- return {
4324
- requestId: result.requestId,
4325
- success: false,
4326
- errorMessage: "Task execution timed out",
4327
- taskStatus: "timeout",
4328
- taskId,
4329
- taskResult: "Task Timed Out"
4330
- };
4331
- } catch (error) {
4332
- return {
4333
- requestId: "",
4334
- success: false,
4335
- errorMessage: `Failed to execute: ${error}`,
4336
- taskStatus: "failed",
4337
- taskId: "",
4338
- taskResult: "Task Failed"
4339
- };
4340
4418
  }
4419
+ return {
4420
+ requestId: lastRequestId,
4421
+ success: false,
4422
+ errorMessage: `Failed after ${maxStepRetries} attempts: ${lastError || "Unknown error"}`,
4423
+ taskStatus: "failed",
4424
+ taskId: "",
4425
+ taskResult: "Task Failed"
4426
+ };
4341
4427
  }
4342
4428
  /**
4343
- * Get the status of the task with the given task ID.
4429
+ * Execute a specific task described in human language synchronously.
4430
+ * This is a synchronous interface that blocks until the task is completed or
4431
+ * an error occurs, or timeout happens. The default polling interval is
4432
+ * 3 seconds.
4344
4433
  *
4345
- * @param taskId - Task ID
4346
- * @returns QueryResult containing the task status
4434
+ * @param task - Task description in human language.
4435
+ * @param maxSteps - Maximum number of steps (clicks/swipes/etc.) allowed.
4436
+ * Used to prevent infinite loops or excessive resource
4437
+ * consumption. Default is 50.
4438
+ * @param maxStepRetries - Maximum retry times for MCP tool call failures
4439
+ * at SDK level. Used to retry when callMcpTool fails
4440
+ * (e.g., network errors, timeouts). Default is 3.
4441
+ * @param maxTryTimes - Maximum number of polling attempts (each 3 seconds).
4442
+ * Used to control how long to wait for task completion.
4443
+ * Default is 300 (about 15 minutes).
4444
+ * @returns ExecutionResult containing success status, task ID, task status,
4445
+ * and error message if any.
4347
4446
  *
4348
4447
  * @example
4349
4448
  * ```typescript
4350
4449
  * const agentBay = new AgentBay({ apiKey: 'your_api_key' });
4351
- * const result = await agentBay.create({ imageId: 'linux_latest' });
4450
+ * const result = await agentBay.create({ imageId: 'mobile_latest' });
4352
4451
  * if (result.success) {
4353
- * const taskResult = await result.session.agent.browser.executeTask(
4354
- * 'Navigate to baidu and query the weather of Shanghai',
4355
- * 10
4452
+ * const execResult = await result.session.agent.mobile.executeTaskAndWait(
4453
+ * 'Open WeChat app', 100, 3, 200
4356
4454
  * );
4357
- * const statusResult = await result.session.agent.browser.getTaskStatus(
4358
- * taskResult.taskId
4359
- * );
4360
- * console.log(`Status: ${statusResult.taskStatus}`);
4455
+ * console.log(`Task result: ${execResult.taskResult}`);
4361
4456
  * await result.session.delete();
4362
4457
  * }
4363
4458
  * ```
4364
4459
  */
4365
- async getTaskStatus(taskId) {
4366
- try {
4367
- const args = { task_id: taskId };
4368
- const result = await this.session.callMcpTool("browser_use_get_task_status", args);
4369
- if (!result.success) {
4370
- return {
4371
- requestId: result.requestId,
4372
- success: false,
4373
- errorMessage: result.errorMessage,
4374
- taskAction: "",
4375
- taskProduct: "",
4376
- taskStatus: "failed"
4377
- };
4378
- }
4379
- let queryResult;
4460
+ async executeTaskAndWait(task, maxSteps = 50, maxStepRetries = 3, maxTryTimes = 300) {
4461
+ const args = {
4462
+ task,
4463
+ max_steps: maxSteps
4464
+ };
4465
+ let taskId;
4466
+ let lastError;
4467
+ let lastRequestId = "";
4468
+ for (let attempt = 0; attempt < maxStepRetries; attempt++) {
4380
4469
  try {
4381
- queryResult = JSON.parse(result.data);
4382
- return {
4383
- requestId: result.requestId,
4384
- success: true,
4385
- errorMessage: "",
4386
- taskAction: queryResult.action || "",
4387
- taskProduct: queryResult.product || "",
4388
- taskStatus: queryResult.status || "failed"
4389
- };
4470
+ const result = await this.session.callMcpTool(
4471
+ this.getToolName("execute"),
4472
+ args
4473
+ );
4474
+ lastRequestId = result.requestId;
4475
+ if (result.success) {
4476
+ let content;
4477
+ try {
4478
+ content = JSON.parse(result.data);
4479
+ } catch (err) {
4480
+ return {
4481
+ requestId: result.requestId,
4482
+ success: false,
4483
+ errorMessage: `Failed to parse response: ${err}`,
4484
+ taskStatus: "failed",
4485
+ taskId: "",
4486
+ taskResult: "Invalid execution response."
4487
+ };
4488
+ }
4489
+ taskId = content.taskId || content.task_id;
4490
+ if (!taskId) {
4491
+ return {
4492
+ requestId: result.requestId,
4493
+ success: false,
4494
+ errorMessage: "Task ID not found in response",
4495
+ taskStatus: "failed",
4496
+ taskId: "",
4497
+ taskResult: "Invalid task ID."
4498
+ };
4499
+ }
4500
+ break;
4501
+ } else {
4502
+ lastError = result.errorMessage || "Failed to execute task";
4503
+ if (attempt < maxStepRetries - 1) {
4504
+ logDebug(
4505
+ `Attempt ${attempt + 1}/${maxStepRetries} failed, retrying...`
4506
+ );
4507
+ await new Promise((resolve2) => setTimeout(resolve2, 1e3));
4508
+ continue;
4509
+ } else {
4510
+ return {
4511
+ requestId: result.requestId,
4512
+ success: false,
4513
+ errorMessage: lastError,
4514
+ taskStatus: "failed",
4515
+ taskId: "",
4516
+ taskResult: "Task Failed"
4517
+ };
4518
+ }
4519
+ }
4390
4520
  } catch (error) {
4391
- return {
4392
- requestId: result.requestId,
4393
- success: false,
4394
- errorMessage: `Failed to get task status: ${error}`,
4395
- taskAction: "",
4396
- taskProduct: "",
4397
- taskStatus: "failed"
4398
- };
4521
+ lastError = `Failed to execute: ${error}`;
4522
+ if (attempt < maxStepRetries - 1) {
4523
+ logDebug(
4524
+ `Attempt ${attempt + 1}/${maxStepRetries} raised exception, retrying...`
4525
+ );
4526
+ await new Promise((resolve2) => setTimeout(resolve2, 1e3));
4527
+ continue;
4528
+ } else {
4529
+ return {
4530
+ requestId: lastRequestId,
4531
+ success: false,
4532
+ errorMessage: lastError,
4533
+ taskStatus: "failed",
4534
+ taskId: "",
4535
+ taskResult: "Task Failed"
4536
+ };
4537
+ }
4399
4538
  }
4400
- } catch (error) {
4539
+ }
4540
+ if (!taskId) {
4401
4541
  return {
4402
- requestId: "",
4542
+ requestId: lastRequestId,
4403
4543
  success: false,
4404
- errorMessage: `Failed to get task status: ${error}`,
4405
- taskAction: "",
4406
- taskProduct: "",
4407
- taskStatus: "failed"
4544
+ errorMessage: `Failed to get task_id after ${maxStepRetries} attempts: ${lastError || "Unknown error"}`,
4545
+ taskStatus: "failed",
4546
+ taskId: "",
4547
+ taskResult: "Task Failed"
4408
4548
  };
4409
4549
  }
4410
- }
4411
- /**
4412
- * Terminate a task with a specified task ID.
4413
- *
4414
- * @param taskId - The ID of the running task.
4415
- * @returns ExecutionResult containing success status, task output, and
4416
- * error message if any.
4417
- *
4418
- * @example
4419
- * ```typescript
4420
- * const agentBay = new AgentBay({ apiKey: 'your_api_key' });
4421
- * const result = await agentBay.create({ imageId: 'linux_latest' });
4422
- * if (result.success) {
4423
- * const taskResult = await result.session.agent.browser.executeTask(
4424
- * 'Navigate to baidu and query the weather of Shanghai',
4425
- * 10
4426
- * );
4427
- * const terminateResult = await result.session.agent.browser.terminateTask(
4428
- * taskResult.taskId
4429
- * );
4430
- * console.log(`Terminated: ${terminateResult.taskStatus}`);
4431
- * await result.session.delete();
4432
- * }
4433
- * ```
4434
- */
4435
- async terminateTask(taskId) {
4436
- logDebug("Terminating task");
4437
- try {
4438
- const args = { task_id: taskId };
4439
- const result = await this.session.callMcpTool("browser_use_terminate_task", args);
4440
- let content;
4441
- try {
4442
- content = JSON.parse(result.data);
4443
- } catch (err) {
4550
+ let triedTime = 0;
4551
+ const processedTimestamps = /* @__PURE__ */ new Set();
4552
+ while (triedTime < maxTryTimes) {
4553
+ const query = await this.getTaskStatus(taskId);
4554
+ if (!query.success) {
4444
4555
  return {
4445
- requestId: result.requestId,
4556
+ requestId: query.requestId,
4446
4557
  success: false,
4447
- errorMessage: `Failed to parse response: ${err}`,
4448
- taskId,
4558
+ errorMessage: query.errorMessage,
4449
4559
  taskStatus: "failed",
4560
+ taskId,
4450
4561
  taskResult: ""
4451
4562
  };
4452
4563
  }
4453
- const terminatedTaskId = content.task_id || taskId;
4454
- const status = content.status || "unknown";
4455
- if (result.success) {
4456
- return {
4457
- requestId: result.requestId,
4458
- success: true,
4459
- errorMessage: "",
4460
- taskId: terminatedTaskId,
4461
- taskStatus: status,
4462
- taskResult: ""
4463
- };
4564
+ if (query.stream) {
4565
+ for (const streamItem of query.stream) {
4566
+ const timestamp = streamItem.timestamp_ms;
4567
+ if (timestamp !== void 0 && !processedTimestamps.has(timestamp)) {
4568
+ processedTimestamps.add(timestamp);
4569
+ const content = streamItem.content || "";
4570
+ const reasoning = streamItem.reasoning || "";
4571
+ if (content) {
4572
+ process.stdout.write(content);
4573
+ }
4574
+ if (reasoning) {
4575
+ }
4576
+ }
4577
+ }
4464
4578
  }
4465
- return {
4466
- requestId: result.requestId,
4467
- success: false,
4468
- errorMessage: result.errorMessage,
4469
- taskId: terminatedTaskId,
4470
- taskStatus: status,
4471
- taskResult: ""
4472
- };
4473
- } catch (error) {
4474
- return {
4475
- requestId: "",
4476
- success: false,
4477
- errorMessage: `Failed to terminate: ${error}`,
4478
- taskId,
4479
- taskStatus: "failed",
4480
- taskResult: ""
4481
- };
4579
+ if (query.error) {
4580
+ }
4581
+ switch (query.taskStatus) {
4582
+ case "completed":
4583
+ return {
4584
+ requestId: query.requestId,
4585
+ success: true,
4586
+ errorMessage: "",
4587
+ taskId,
4588
+ taskStatus: "completed",
4589
+ taskResult: query.taskProduct
4590
+ };
4591
+ case "failed":
4592
+ return {
4593
+ requestId: query.requestId,
4594
+ success: false,
4595
+ errorMessage: query.errorMessage || "Failed to execute task.",
4596
+ taskId,
4597
+ taskStatus: "failed",
4598
+ taskResult: ""
4599
+ };
4600
+ case "cancelled":
4601
+ return {
4602
+ requestId: query.requestId,
4603
+ success: false,
4604
+ errorMessage: query.errorMessage || "Task was cancelled.",
4605
+ taskId,
4606
+ taskStatus: "cancelled",
4607
+ taskResult: ""
4608
+ };
4609
+ case "unsupported":
4610
+ return {
4611
+ requestId: query.requestId,
4612
+ success: false,
4613
+ errorMessage: query.errorMessage || "Unsupported task.",
4614
+ taskId,
4615
+ taskStatus: "unsupported",
4616
+ taskResult: ""
4617
+ };
4618
+ }
4619
+ logDebug(`\u23F3 Task ${taskId} running \u{1F680}: ${query.taskAction}.`);
4620
+ await new Promise((resolve2) => setTimeout(resolve2, 3e3));
4621
+ triedTime++;
4482
4622
  }
4623
+ logDebug("\u26A0\uFE0F task execution timeout!");
4624
+ return {
4625
+ requestId: lastRequestId,
4626
+ success: false,
4627
+ errorMessage: "Task timeout.",
4628
+ taskStatus: "failed",
4629
+ taskId,
4630
+ taskResult: "Task timeout."
4631
+ };
4483
4632
  }
4484
4633
  };
4485
- __name(_BrowserUseAgent, "BrowserUseAgent");
4486
- var BrowserUseAgent = _BrowserUseAgent;
4634
+ __name(_MobileUseAgent, "MobileUseAgent");
4635
+ var MobileUseAgent = _MobileUseAgent;
4487
4636
  var _Agent = class _Agent {
4488
4637
  /**
4489
4638
  * Initialize an Agent object.
@@ -4493,6 +4642,7 @@ var _Agent = class _Agent {
4493
4642
  constructor(session) {
4494
4643
  this.computer = new ComputerUseAgent(session);
4495
4644
  this.browser = new BrowserUseAgent(session);
4645
+ this.mobile = new MobileUseAgent(session);
4496
4646
  }
4497
4647
  };
4498
4648
  __name(_Agent, "Agent");