dhalsim 1.6.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,4 +9,5 @@ export { GoBack, GoForward, Navigate, Reload } from "./navigation";
9
9
  export { ClosePage, ListPages, NewPage } from "./page";
10
10
  export { ExecuteScript } from "./script";
11
11
  export { Wait, WaitForElement } from "./wait";
12
- export { RequestUserAssistance } from "./user-input";
12
+ export { RequestUserAssistance, USER_ASSISTANCE_REASONS } from "./user-input";
13
+ export type { UserAssistanceReason } from "./user-input";
@@ -1,5 +1,7 @@
1
1
  import { z } from "llmist";
2
2
  import type { IBrowserSessionManager } from "../session";
3
+ export declare const USER_ASSISTANCE_REASONS: readonly ["captcha", "2fa_code", "sms_code", "manual_action", "confirmation", "other"];
4
+ export type UserAssistanceReason = (typeof USER_ASSISTANCE_REASONS)[number];
3
5
  declare const RequestUserAssistance_base: new () => {
4
6
  description: string;
5
7
  parameterSchema: z.ZodObject<{
package/dist/index.d.ts CHANGED
@@ -2,5 +2,7 @@ export declare const VERSION = "1.2.0";
2
2
  export { BrowserSessionManager, getSessionManager, TestBrowserSessionManager } from "./session";
3
3
  export type { IBrowserSessionManager } from "./session";
4
4
  export { createDhalsimGadgets, createGadgetsByPreset, createGadgetsByName, type DhalsimConfig, type DhalsimGadgets, type DhalsimPreset, } from "./factory";
5
- export { Dhalsim, DHALSIM_SYSTEM_PROMPT, DHALSIM_MINIMAL_PROMPT } from "./subagents";
6
- export type { DhalsimOptions, DhalsimSessionManager } from "./subagents";
5
+ export { Dhalsim, DHALSIM_SYSTEM_PROMPT, DHALSIM_MINIMAL_PROMPT, createDhalsimSystemPrompt, } from "./subagents";
6
+ export type { DhalsimOptions, DhalsimSessionManager, UserAssistanceParams, UserAssistanceCallback, } from "./subagents";
7
+ export type { UserAssistanceReason } from "./gadgets/user-input";
8
+ export { USER_ASSISTANCE_REASONS } from "./gadgets/user-input";
package/dist/index.js CHANGED
@@ -15362,11 +15362,19 @@ return items.length;`
15362
15362
 
15363
15363
  // src/gadgets/user-input.ts
15364
15364
  import { Gadget as Gadget11, z as z12, HumanInputRequiredException } from "llmist";
15365
+ var USER_ASSISTANCE_REASONS = [
15366
+ "captcha",
15367
+ "2fa_code",
15368
+ "sms_code",
15369
+ "manual_action",
15370
+ "confirmation",
15371
+ "other"
15372
+ ];
15365
15373
 
15366
15374
  class RequestUserAssistance extends Gadget11({
15367
15375
  description: "Requests input or confirmation from the user. Use when encountering captchas, 2FA codes, or other challenges requiring human intervention. The browser should be in headed mode (headless=false) if user needs to interact with it.",
15368
15376
  schema: z12.object({
15369
- reason: z12.enum(["captcha", "2fa_code", "sms_code", "manual_action", "confirmation", "other"]).describe("Type of assistance needed"),
15377
+ reason: z12.enum(USER_ASSISTANCE_REASONS).describe("Type of assistance needed"),
15370
15378
  message: z12.string().describe("Message to display to the user explaining what's needed")
15371
15379
  }),
15372
15380
  examples: [
@@ -16078,7 +16086,38 @@ ${states.join(`
16078
16086
  }
16079
16087
  }
16080
16088
  // src/subagents/prompts.ts
16081
- var DHALSIM_SYSTEM_PROMPT = `You are a browser automation agent focused on completing a specific web task.
16089
+ var GADGET_LIST_WITH_USER_ASSISTANCE = `## Available Gadgets
16090
+ - ReportResult: **REQUIRED** - Call this to return your findings when task is complete
16091
+ - Navigate: Go to a URL
16092
+ - Click: Click an element (auto-waits for element to be actionable)
16093
+ - Fill: Fill a form input
16094
+ - FillForm: Fill multiple fields and submit
16095
+ - Select: Select dropdown option
16096
+ - Check: Toggle checkboxes
16097
+ - GetFullPageContent: Read page text content
16098
+ - Screenshot: Capture the page (use when you need to show visual results)
16099
+ - DismissOverlays: Auto-dismiss cookie banners
16100
+ - Scroll: Scroll the page
16101
+ - WaitForElement: Wait for an element to appear
16102
+ - Wait: General wait
16103
+ - RequestUserAssistance: Ask user for help with CAPTCHAs, 2FA codes, or other human-only challenges`;
16104
+ var GADGET_LIST_WITHOUT_USER_ASSISTANCE = `## Available Gadgets
16105
+ - ReportResult: **REQUIRED** - Call this to return your findings when task is complete
16106
+ - Navigate: Go to a URL
16107
+ - Click: Click an element (auto-waits for element to be actionable)
16108
+ - Fill: Fill a form input
16109
+ - FillForm: Fill multiple fields and submit
16110
+ - Select: Select dropdown option
16111
+ - Check: Toggle checkboxes
16112
+ - GetFullPageContent: Read page text content
16113
+ - Screenshot: Capture the page (use when you need to show visual results)
16114
+ - DismissOverlays: Auto-dismiss cookie banners
16115
+ - Scroll: Scroll the page
16116
+ - WaitForElement: Wait for an element to appear
16117
+ - Wait: General wait`;
16118
+ function createDhalsimSystemPrompt(options) {
16119
+ const gadgetList = options.includeUserAssistance ? GADGET_LIST_WITH_USER_ASSISTANCE : GADGET_LIST_WITHOUT_USER_ASSISTANCE;
16120
+ return `You are a browser automation agent focused on completing a specific web task.
16082
16121
 
16083
16122
  ## Browser State (<CurrentBrowserState>)
16084
16123
  After each message, you receive a <CurrentBrowserState> block showing the LIVE browser state.
@@ -16118,21 +16157,7 @@ If an action doesn't produce the expected result after 2-3 attempts:
16118
16157
  3. Try a different approach or skip and continue
16119
16158
  NEVER click the same element more than 3 times in a row.
16120
16159
 
16121
- ## Available Gadgets
16122
- - ReportResult: **REQUIRED** - Call this to return your findings when task is complete
16123
- - Navigate: Go to a URL
16124
- - Click: Click an element (auto-waits for element to be actionable)
16125
- - Fill: Fill a form input
16126
- - FillForm: Fill multiple fields and submit
16127
- - Select: Select dropdown option
16128
- - Check: Toggle checkboxes
16129
- - GetFullPageContent: Read page text content
16130
- - Screenshot: Capture the page (use when you need to show visual results)
16131
- - DismissOverlays: Auto-dismiss cookie banners
16132
- - Scroll: Scroll the page
16133
- - WaitForElement: Wait for an element to appear
16134
- - Wait: General wait
16135
- - RequestUserAssistance: Ask user for help with CAPTCHAs, 2FA codes, or other human-only challenges
16160
+ ${gadgetList}
16136
16161
 
16137
16162
  ## Task Completion
16138
16163
  When you have accomplished the task, you MUST call ReportResult with your findings:
@@ -16141,6 +16166,10 @@ When you have accomplished the task, you MUST call ReportResult with your findin
16141
16166
  3. If you took screenshots, describe what they show in the result
16142
16167
 
16143
16168
  Remember: You are a focused automation agent. Complete the task, call ReportResult, then stop.`;
16169
+ }
16170
+ var DHALSIM_SYSTEM_PROMPT = createDhalsimSystemPrompt({
16171
+ includeUserAssistance: true
16172
+ });
16144
16173
  var DHALSIM_MINIMAL_PROMPT = `You are a browser agent. Complete the given task efficiently.
16145
16174
 
16146
16175
  ## Browser State
@@ -16189,10 +16218,14 @@ Use this for web research, data extraction, form filling, or any web-based task.
16189
16218
  }) {
16190
16219
  customSessionManager;
16191
16220
  customSystemPrompt;
16221
+ userAssistanceEnabled;
16222
+ customUserAssistanceCallback;
16192
16223
  constructor(options) {
16193
16224
  super();
16194
16225
  this.customSessionManager = options?.sessionManager;
16195
16226
  this.customSystemPrompt = options?.systemPrompt;
16227
+ this.userAssistanceEnabled = options?.userAssistance;
16228
+ this.customUserAssistanceCallback = options?.onUserAssistance;
16196
16229
  if (options?.timeoutMs !== undefined) {
16197
16230
  this.timeoutMs = options.timeoutMs === 0 ? undefined : options.timeoutMs;
16198
16231
  }
@@ -16222,6 +16255,8 @@ Use this for web research, data extraction, form filling, or any web-based task.
16222
16255
  subagentKey: "navigationTimeoutMs",
16223
16256
  defaultValue: 60000
16224
16257
  });
16258
+ const userAssistanceEnabled = this.userAssistanceEnabled ?? (this.customUserAssistanceCallback !== undefined || ctx?.requestHumanInput !== undefined);
16259
+ logger13?.debug(`[BrowseWeb] User assistance enabled=${userAssistanceEnabled}`);
16225
16260
  const collectedMedia = [];
16226
16261
  const manager = this.customSessionManager ?? new BrowserSessionManager(logger13);
16227
16262
  const isOwnedManager = !this.customSessionManager;
@@ -16275,11 +16310,12 @@ Use this for web research, data extraction, form filling, or any web-based task.
16275
16310
  new Scroll(manager),
16276
16311
  new WaitForElement(manager),
16277
16312
  new Wait(manager),
16278
- new RequestUserAssistance(manager)
16313
+ ...userAssistanceEnabled ? [new RequestUserAssistance(manager)] : []
16279
16314
  ];
16280
16315
  const { AgentBuilder, LLMist } = getHostExports(ctx);
16281
16316
  const client = new LLMist;
16282
- const builder = new AgentBuilder(client).withModel(model).withSystem(this.customSystemPrompt ?? DHALSIM_SYSTEM_PROMPT).withMaxIterations(maxIterations).withGadgets(...gadgets).withTrailingMessage((trailingCtx) => [
16317
+ const systemPrompt = this.customSystemPrompt ?? createDhalsimSystemPrompt({ includeUserAssistance: userAssistanceEnabled });
16318
+ const builder = new AgentBuilder(client).withModel(model).withSystem(systemPrompt).withMaxIterations(maxIterations).withGadgets(...gadgets).withTrailingMessage((trailingCtx) => [
16283
16319
  pageStateScanner.getCachedState(),
16284
16320
  "",
16285
16321
  `[Iteration ${trailingCtx.iteration + 1}/${trailingCtx.maxIterations}]`,
@@ -16294,8 +16330,23 @@ Use this for web research, data extraction, form filling, or any web-based task.
16294
16330
  });
16295
16331
  if (ctx) {
16296
16332
  builder.withParentContext(ctx);
16297
- if (ctx.requestHumanInput) {
16298
- builder.onHumanInput(ctx.requestHumanInput);
16333
+ if (userAssistanceEnabled) {
16334
+ if (this.customUserAssistanceCallback) {
16335
+ builder.onHumanInput(async (prompt) => {
16336
+ const match = prompt.match(/^\[([A-Z0-9_]+)\]\s*(.*)$/s);
16337
+ if (match) {
16338
+ const reason = match[1].toLowerCase();
16339
+ const message = match[2];
16340
+ return this.customUserAssistanceCallback({ reason, message });
16341
+ }
16342
+ return this.customUserAssistanceCallback({
16343
+ reason: "other",
16344
+ message: prompt
16345
+ });
16346
+ });
16347
+ } else if (ctx.requestHumanInput) {
16348
+ builder.onHumanInput(ctx.requestHumanInput);
16349
+ }
16299
16350
  }
16300
16351
  }
16301
16352
  if (dismissResult !== null) {
@@ -16345,8 +16396,10 @@ export {
16345
16396
  getSessionManager,
16346
16397
  createGadgetsByPreset,
16347
16398
  createGadgetsByName,
16399
+ createDhalsimSystemPrompt,
16348
16400
  createDhalsimGadgets,
16349
16401
  VERSION,
16402
+ USER_ASSISTANCE_REASONS,
16350
16403
  TestBrowserSessionManager,
16351
16404
  Dhalsim,
16352
16405
  DHALSIM_SYSTEM_PROMPT,
@@ -1,6 +1,7 @@
1
1
  import { z } from "llmist";
2
2
  import type { ExecutionContext, GadgetMediaOutput } from "llmist";
3
3
  import type { IBrowserSessionManager, StartBrowserOptions, StartBrowserResult } from "../session";
4
+ import { type UserAssistanceReason } from "../gadgets";
4
5
  /**
5
6
  * Session manager type with the required methods for browser automation.
6
7
  * Compatible with both BrowserSessionManager and TestBrowserSessionManager.
@@ -9,6 +10,20 @@ export type DhalsimSessionManager = IBrowserSessionManager & {
9
10
  startBrowser(options: StartBrowserOptions): Promise<StartBrowserResult>;
10
11
  closeAll(): Promise<void>;
11
12
  };
13
+ /**
14
+ * Parameters passed to the custom user assistance callback.
15
+ */
16
+ export interface UserAssistanceParams {
17
+ /** Type of assistance needed */
18
+ reason: UserAssistanceReason;
19
+ /** Message describing what's needed */
20
+ message: string;
21
+ }
22
+ /**
23
+ * Custom callback for handling user assistance requests.
24
+ * Return the user's response (e.g., 2FA code, "done" for captchas).
25
+ */
26
+ export type UserAssistanceCallback = (params: UserAssistanceParams) => Promise<string>;
12
27
  /**
13
28
  * Options for configuring the Dhalsim subagent.
14
29
  */
@@ -19,6 +34,21 @@ export interface DhalsimOptions {
19
34
  systemPrompt?: string;
20
35
  /** Overall timeout in milliseconds (default: 300000 = 5 min, 0 = disabled) */
21
36
  timeoutMs?: number;
37
+ /**
38
+ * Enable or disable RequestUserAssistance gadget.
39
+ * - true: Always include the gadget
40
+ * - false: Never include the gadget (agent won't know it exists)
41
+ * - undefined (default): Auto-detect based on callback availability
42
+ */
43
+ userAssistance?: boolean;
44
+ /**
45
+ * Custom callback for handling user assistance requests.
46
+ * When provided, this receives structured params instead of the raw message.
47
+ * The callback's return value is passed back to the agent.
48
+ *
49
+ * If not provided but userAssistance is enabled, falls back to ctx.requestHumanInput.
50
+ */
51
+ onUserAssistance?: UserAssistanceCallback;
22
52
  }
23
53
  declare const Dhalsim_base: new () => {
24
54
  description: string;
@@ -117,6 +147,8 @@ declare const Dhalsim_base: new () => {
117
147
  export declare class Dhalsim extends Dhalsim_base {
118
148
  private customSessionManager?;
119
149
  private customSystemPrompt?;
150
+ private userAssistanceEnabled?;
151
+ private customUserAssistanceCallback?;
120
152
  constructor(options?: DhalsimOptions);
121
153
  execute(params: this["params"], ctx?: ExecutionContext): Promise<{
122
154
  result: string;
@@ -1,3 +1,3 @@
1
1
  export { Dhalsim } from "./dhalsim";
2
- export type { DhalsimOptions, DhalsimSessionManager } from "./dhalsim";
3
- export { DHALSIM_SYSTEM_PROMPT, DHALSIM_MINIMAL_PROMPT } from "./prompts";
2
+ export type { DhalsimOptions, DhalsimSessionManager, UserAssistanceParams, UserAssistanceCallback, } from "./dhalsim";
3
+ export { DHALSIM_SYSTEM_PROMPT, DHALSIM_MINIMAL_PROMPT, createDhalsimSystemPrompt, } from "./prompts";
@@ -1,8 +1,14 @@
1
+ /**
2
+ * Creates a system prompt with optional RequestUserAssistance gadget mention.
3
+ */
4
+ export declare function createDhalsimSystemPrompt(options: {
5
+ includeUserAssistance: boolean;
6
+ }): string;
1
7
  /**
2
8
  * System prompt for the Dhalsim subagent.
3
9
  * This is a focused version of the CLI prompt, optimized for task completion.
4
10
  */
5
- export declare const DHALSIM_SYSTEM_PROMPT = "You are a browser automation agent focused on completing a specific web task.\n\n## Browser State (<CurrentBrowserState>)\nAfter each message, you receive a <CurrentBrowserState> block showing the LIVE browser state.\nThis is your source of truth for what's on screen. It contains:\n- OPEN PAGES: List of available pageIds (e.g., \"p1\")\n- URL and title of each page\n- INPUTS: Form fields with CSS selectors\n- BUTTONS: Clickable buttons with CSS selectors\n- LINKS: Navigation links with CSS selectors\n- CHECKBOXES: Checkbox/radio inputs\n- MENUITEMS: Dropdown options (only visible when dropdown is open)\n\n## CRITICAL Rules\n1. You have ONE page (p1) already open. Use Navigate to go to URLs.\n2. ONLY use selectors exactly as shown in <CurrentBrowserState>\n3. NEVER guess selectors - use GetFullPageContent if you need more info\n4. Focus on completing the task efficiently - avoid unnecessary actions\n5. If a selector matches multiple elements, you'll get an error with a \"suggestions\" array containing valid selectors. USE ONE OF THESE SUGGESTIONS DIRECTLY - don't guess or modify them.\n6. For batch extraction: GetFullPageContent returns ALL matches when a selector matches multiple elements (as \"texts\" array). Use this instead of querying each element separately.\n\n## Efficient Pattern\nOn first call: Navigate and DismissOverlays are ALREADY done. Take action immediately.\nAfter any Navigate call: DismissOverlays, then interact with elements.\n\nIf an action doesn't produce expected results, use GetFullPageContent to diagnose before retrying.\n\n## Dropdown/Toggle Behavior\nDropdowns are TOGGLES - clicking the same trigger twice will close it!\n- After Click on a dropdown trigger, check <CurrentBrowserState> for MENUITEMS\n- If menuitems appear, click the menuitem ONCE - do NOT click the trigger again\n- One click opens, second click closes\n\n## Avoid Infinite Loops\nIf an action doesn't produce the expected result after 2-3 attempts:\n1. Stop retrying the same action\n2. Use GetFullPageContent or Screenshot to diagnose\n3. Try a different approach or skip and continue\nNEVER click the same element more than 3 times in a row.\n\n## Available Gadgets\n- ReportResult: **REQUIRED** - Call this to return your findings when task is complete\n- Navigate: Go to a URL\n- Click: Click an element (auto-waits for element to be actionable)\n- Fill: Fill a form input\n- FillForm: Fill multiple fields and submit\n- Select: Select dropdown option\n- Check: Toggle checkboxes\n- GetFullPageContent: Read page text content\n- Screenshot: Capture the page (use when you need to show visual results)\n- DismissOverlays: Auto-dismiss cookie banners\n- Scroll: Scroll the page\n- WaitForElement: Wait for an element to appear\n- Wait: General wait\n- RequestUserAssistance: Ask user for help with CAPTCHAs, 2FA codes, or other human-only challenges\n\n## Task Completion\nWhen you have accomplished the task, you MUST call ReportResult with your findings:\n1. Call ReportResult(result=\"...\") with all extracted data and findings\n2. Include any relevant URLs, text content, or structured data\n3. If you took screenshots, describe what they show in the result\n\nRemember: You are a focused automation agent. Complete the task, call ReportResult, then stop.";
11
+ export declare const DHALSIM_SYSTEM_PROMPT: string;
6
12
  /**
7
13
  * Truncated prompt for simpler tasks (fewer gadgets, less context).
8
14
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dhalsim",
3
- "version": "1.6.1",
3
+ "version": "1.7.0",
4
4
  "description": "Browser automation for llmist agents using Camoufox anti-detect browser",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",