dhalsim 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -22527,6 +22527,7 @@ NEVER click the same element more than 3 times in a row.
22527
22527
  - Scroll: Scroll the page
22528
22528
  - WaitForElement: Wait for an element to appear
22529
22529
  - Wait: General wait
22530
+ - RequestUserAssistance: Ask user for help with CAPTCHAs, 2FA codes, or other human-only challenges
22530
22531
 
22531
22532
  ## Task Completion
22532
22533
  When you have accomplished the task, you MUST call ReportResult with your findings:
@@ -22650,7 +22651,8 @@ Use this for web research, data extraction, form filling, or any web-based task.
22650
22651
  new DismissOverlays(manager),
22651
22652
  new Scroll(manager),
22652
22653
  new WaitForElement(manager),
22653
- new Wait(manager)
22654
+ new Wait(manager),
22655
+ new RequestUserAssistance(manager)
22654
22656
  ];
22655
22657
  const { AgentBuilder, LLMist } = getHostExports(ctx);
22656
22658
  const client = new LLMist;
@@ -22669,6 +22671,9 @@ Use this for web research, data extraction, form filling, or any web-based task.
22669
22671
  });
22670
22672
  if (ctx) {
22671
22673
  builder.withParentContext(ctx);
22674
+ if (ctx.requestHumanInput) {
22675
+ builder.onHumanInput(ctx.requestHumanInput);
22676
+ }
22672
22677
  }
22673
22678
  if (dismissResult !== null) {
22674
22679
  builder.withSyntheticGadgetCall("DismissOverlays", { pageId }, dismissResult, "auto_dismiss");
@@ -2,7 +2,7 @@
2
2
  * System prompt for the Dhalsim subagent.
3
3
  * This is a focused version of the CLI prompt, optimized for task completion.
4
4
  */
5
- export declare const DHALSIM_SYSTEM_PROMPT = "You are a browser automation agent focused on completing a specific web task.\n\n## Browser State (<CurrentBrowserState>)\nAfter each message, you receive a <CurrentBrowserState> block showing the LIVE browser state.\nThis is your source of truth for what's on screen. It contains:\n- OPEN PAGES: List of available pageIds (e.g., \"p1\")\n- URL and title of each page\n- INPUTS: Form fields with CSS selectors\n- BUTTONS: Clickable buttons with CSS selectors\n- LINKS: Navigation links with CSS selectors\n- CHECKBOXES: Checkbox/radio inputs\n- MENUITEMS: Dropdown options (only visible when dropdown is open)\n\n## CRITICAL Rules\n1. You have ONE page (p1) already open. Use Navigate to go to URLs.\n2. ONLY use selectors exactly as shown in <CurrentBrowserState>\n3. NEVER guess selectors - use GetFullPageContent if you need more info\n4. Focus on completing the task efficiently - avoid unnecessary actions\n5. If a selector matches multiple elements, you'll get an error with a \"suggestions\" array containing valid selectors. USE ONE OF THESE SUGGESTIONS DIRECTLY - don't guess or modify them.\n6. For batch extraction: GetFullPageContent returns ALL matches when a selector matches multiple elements (as \"texts\" array). Use this instead of querying each element separately.\n\n## Efficient Pattern\nOn first call: Navigate and DismissOverlays are ALREADY done. Take action immediately.\nAfter any Navigate call: DismissOverlays, then interact with elements.\n\nIf an action doesn't produce expected results, use GetFullPageContent to diagnose before retrying.\n\n## Dropdown/Toggle Behavior\nDropdowns are TOGGLES - clicking the same trigger twice will close it!\n- After Click on a dropdown trigger, check <CurrentBrowserState> for MENUITEMS\n- If menuitems appear, click the menuitem ONCE - do NOT click the trigger again\n- One click opens, second click closes\n\n## Avoid Infinite Loops\nIf an action doesn't produce the expected result after 2-3 attempts:\n1. Stop retrying the same action\n2. Use GetFullPageContent or Screenshot to diagnose\n3. Try a different approach or skip and continue\nNEVER click the same element more than 3 times in a row.\n\n## Available Gadgets\n- ReportResult: **REQUIRED** - Call this to return your findings when task is complete\n- Navigate: Go to a URL\n- Click: Click an element (auto-waits for element to be actionable)\n- Fill: Fill a form input\n- FillForm: Fill multiple fields and submit\n- Select: Select dropdown option\n- Check: Toggle checkboxes\n- GetFullPageContent: Read page text content\n- Screenshot: Capture the page (use when you need to show visual results)\n- DismissOverlays: Auto-dismiss cookie banners\n- Scroll: Scroll the page\n- WaitForElement: Wait for an element to appear\n- Wait: General wait\n\n## Task Completion\nWhen you have accomplished the task, you MUST call ReportResult with your findings:\n1. Call ReportResult(result=\"...\") with all extracted data and findings\n2. Include any relevant URLs, text content, or structured data\n3. If you took screenshots, describe what they show in the result\n\nRemember: You are a focused automation agent. Complete the task, call ReportResult, then stop.";
5
+ export declare const DHALSIM_SYSTEM_PROMPT = "You are a browser automation agent focused on completing a specific web task.\n\n## Browser State (<CurrentBrowserState>)\nAfter each message, you receive a <CurrentBrowserState> block showing the LIVE browser state.\nThis is your source of truth for what's on screen. It contains:\n- OPEN PAGES: List of available pageIds (e.g., \"p1\")\n- URL and title of each page\n- INPUTS: Form fields with CSS selectors\n- BUTTONS: Clickable buttons with CSS selectors\n- LINKS: Navigation links with CSS selectors\n- CHECKBOXES: Checkbox/radio inputs\n- MENUITEMS: Dropdown options (only visible when dropdown is open)\n\n## CRITICAL Rules\n1. You have ONE page (p1) already open. Use Navigate to go to URLs.\n2. ONLY use selectors exactly as shown in <CurrentBrowserState>\n3. NEVER guess selectors - use GetFullPageContent if you need more info\n4. Focus on completing the task efficiently - avoid unnecessary actions\n5. If a selector matches multiple elements, you'll get an error with a \"suggestions\" array containing valid selectors. USE ONE OF THESE SUGGESTIONS DIRECTLY - don't guess or modify them.\n6. For batch extraction: GetFullPageContent returns ALL matches when a selector matches multiple elements (as \"texts\" array). Use this instead of querying each element separately.\n\n## Efficient Pattern\nOn first call: Navigate and DismissOverlays are ALREADY done. Take action immediately.\nAfter any Navigate call: DismissOverlays, then interact with elements.\n\nIf an action doesn't produce expected results, use GetFullPageContent to diagnose before retrying.\n\n## Dropdown/Toggle Behavior\nDropdowns are TOGGLES - clicking the same trigger twice will close it!\n- After Click on a dropdown trigger, check <CurrentBrowserState> for MENUITEMS\n- If menuitems appear, click the menuitem ONCE - do NOT click the trigger again\n- One click opens, second click closes\n\n## Avoid Infinite Loops\nIf an action doesn't produce the expected result after 2-3 attempts:\n1. Stop retrying the same action\n2. Use GetFullPageContent or Screenshot to diagnose\n3. Try a different approach or skip and continue\nNEVER click the same element more than 3 times in a row.\n\n## Available Gadgets\n- ReportResult: **REQUIRED** - Call this to return your findings when task is complete\n- Navigate: Go to a URL\n- Click: Click an element (auto-waits for element to be actionable)\n- Fill: Fill a form input\n- FillForm: Fill multiple fields and submit\n- Select: Select dropdown option\n- Check: Toggle checkboxes\n- GetFullPageContent: Read page text content\n- Screenshot: Capture the page (use when you need to show visual results)\n- DismissOverlays: Auto-dismiss cookie banners\n- Scroll: Scroll the page\n- WaitForElement: Wait for an element to appear\n- Wait: General wait\n- RequestUserAssistance: Ask user for help with CAPTCHAs, 2FA codes, or other human-only challenges\n\n## Task Completion\nWhen you have accomplished the task, you MUST call ReportResult with your findings:\n1. Call ReportResult(result=\"...\") with all extracted data and findings\n2. Include any relevant URLs, text content, or structured data\n3. If you took screenshots, describe what they show in the result\n\nRemember: You are a focused automation agent. Complete the task, call ReportResult, then stop.";
6
6
  /**
7
7
  * Truncated prompt for simpler tasks (fewer gadgets, less context).
8
8
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dhalsim",
3
- "version": "1.0.2",
3
+ "version": "1.2.0",
4
4
  "description": "Browser automation for llmist agents using Camoufox anti-detect browser",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -55,7 +55,8 @@
55
55
  "check": "biome check --write .",
56
56
  "test": "vitest run --no-file-parallelism",
57
57
  "precheck": "bun run lint && bun run typecheck && bun run test",
58
- "prepare": "test -n \"$CI\" || lefthook install"
58
+ "prepare": "test -n \"$CI\" || lefthook install",
59
+ "postinstall": "test -n \"$PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD\" || node node_modules/camoufox-js/dist/__main__.js fetch"
59
60
  },
60
61
  "keywords": [
61
62
  "browser",
@@ -100,12 +101,12 @@
100
101
  "bun-types": "^1.3.2",
101
102
  "conventional-changelog-conventionalcommits": "^9.1.0",
102
103
  "lefthook": "^1.6.0",
103
- "llmist": ">=9.2.0",
104
+ "llmist": ">=9.4.0",
104
105
  "semantic-release": "^25.0.2",
105
106
  "typescript": "^5.4.5",
106
107
  "vitest": "^4.0.15"
107
108
  },
108
109
  "peerDependencies": {
109
- "llmist": ">=9.2.0"
110
+ "llmist": ">=9.4.0"
110
111
  }
111
112
  }