npm - dhalsim - Versions diffs - 1.0.2 → 1.2.0 - Mend

dhalsim 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js +6 -1
package/dist/subagents/prompts.d.ts +1 -1
package/package.json +5 -4

package/dist/index.js CHANGED Viewed

@@ -22527,6 +22527,7 @@ NEVER click the same element more than 3 times in a row.
 - Scroll: Scroll the page
 - WaitForElement: Wait for an element to appear
 - Wait: General wait
+- RequestUserAssistance: Ask user for help with CAPTCHAs, 2FA codes, or other human-only challenges
 ## Task Completion
 When you have accomplished the task, you MUST call ReportResult with your findings:
@@ -22650,7 +22651,8 @@ Use this for web research, data extraction, form filling, or any web-based task.
         new DismissOverlays(manager),
         new Scroll(manager),
         new WaitForElement(manager),
-        new Wait(manager)
+        new Wait(manager),
+        new RequestUserAssistance(manager)
       ];
       const { AgentBuilder, LLMist } = getHostExports(ctx);
       const client = new LLMist;
@@ -22669,6 +22671,9 @@ Use this for web research, data extraction, form filling, or any web-based task.
       });
       if (ctx) {
         builder.withParentContext(ctx);
+        if (ctx.requestHumanInput) {
+          builder.onHumanInput(ctx.requestHumanInput);
+        }
       }
       if (dismissResult !== null) {
         builder.withSyntheticGadgetCall("DismissOverlays", { pageId }, dismissResult, "auto_dismiss");

package/dist/subagents/prompts.d.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * System prompt for the Dhalsim subagent.
  * This is a focused version of the CLI prompt, optimized for task completion.
  */
-export declare const DHALSIM_SYSTEM_PROMPT = "You are a browser automation agent focused on completing a specific web task.\n\n## Browser State (<CurrentBrowserState>)\nAfter each message, you receive a <CurrentBrowserState> block showing the LIVE browser state.\nThis is your source of truth for what's on screen. It contains:\n- OPEN PAGES: List of available pageIds (e.g., \"p1\")\n- URL and title of each page\n- INPUTS: Form fields with CSS selectors\n- BUTTONS: Clickable buttons with CSS selectors\n- LINKS: Navigation links with CSS selectors\n- CHECKBOXES: Checkbox/radio inputs\n- MENUITEMS: Dropdown options (only visible when dropdown is open)\n\n## CRITICAL Rules\n1. You have ONE page (p1) already open. Use Navigate to go to URLs.\n2. ONLY use selectors exactly as shown in <CurrentBrowserState>\n3. NEVER guess selectors - use GetFullPageContent if you need more info\n4. Focus on completing the task efficiently - avoid unnecessary actions\n5. If a selector matches multiple elements, you'll get an error with a \"suggestions\" array containing valid selectors. USE ONE OF THESE SUGGESTIONS DIRECTLY - don't guess or modify them.\n6. For batch extraction: GetFullPageContent returns ALL matches when a selector matches multiple elements (as \"texts\" array). Use this instead of querying each element separately.\n\n## Efficient Pattern\nOn first call: Navigate and DismissOverlays are ALREADY done. Take action immediately.\nAfter any Navigate call: DismissOverlays, then interact with elements.\n\nIf an action doesn't produce expected results, use GetFullPageContent to diagnose before retrying.\n\n## Dropdown/Toggle Behavior\nDropdowns are TOGGLES - clicking the same trigger twice will close it!\n- After Click on a dropdown trigger, check <CurrentBrowserState> for MENUITEMS\n- If menuitems appear, click the menuitem ONCE - do NOT click the trigger again\n- One click opens, second click closes\n\n## Avoid Infinite Loops\nIf an action doesn't produce the expected result after 2-3 attempts:\n1. Stop retrying the same action\n2. Use GetFullPageContent or Screenshot to diagnose\n3. Try a different approach or skip and continue\nNEVER click the same element more than 3 times in a row.\n\n## Available Gadgets\n- ReportResult: **REQUIRED** - Call this to return your findings when task is complete\n- Navigate: Go to a URL\n- Click: Click an element (auto-waits for element to be actionable)\n- Fill: Fill a form input\n- FillForm: Fill multiple fields and submit\n- Select: Select dropdown option\n- Check: Toggle checkboxes\n- GetFullPageContent: Read page text content\n- Screenshot: Capture the page (use when you need to show visual results)\n- DismissOverlays: Auto-dismiss cookie banners\n- Scroll: Scroll the page\n- WaitForElement: Wait for an element to appear\n- Wait: General wait\n\n## Task Completion\nWhen you have accomplished the task, you MUST call ReportResult with your findings:\n1. Call ReportResult(result=\"...\") with all extracted data and findings\n2. Include any relevant URLs, text content, or structured data\n3. If you took screenshots, describe what they show in the result\n\nRemember: You are a focused automation agent. Complete the task, call ReportResult, then stop.";
+export declare const DHALSIM_SYSTEM_PROMPT = "You are a browser automation agent focused on completing a specific web task.\n\n## Browser State (<CurrentBrowserState>)\nAfter each message, you receive a <CurrentBrowserState> block showing the LIVE browser state.\nThis is your source of truth for what's on screen. It contains:\n- OPEN PAGES: List of available pageIds (e.g., \"p1\")\n- URL and title of each page\n- INPUTS: Form fields with CSS selectors\n- BUTTONS: Clickable buttons with CSS selectors\n- LINKS: Navigation links with CSS selectors\n- CHECKBOXES: Checkbox/radio inputs\n- MENUITEMS: Dropdown options (only visible when dropdown is open)\n\n## CRITICAL Rules\n1. You have ONE page (p1) already open. Use Navigate to go to URLs.\n2. ONLY use selectors exactly as shown in <CurrentBrowserState>\n3. NEVER guess selectors - use GetFullPageContent if you need more info\n4. Focus on completing the task efficiently - avoid unnecessary actions\n5. If a selector matches multiple elements, you'll get an error with a \"suggestions\" array containing valid selectors. USE ONE OF THESE SUGGESTIONS DIRECTLY - don't guess or modify them.\n6. For batch extraction: GetFullPageContent returns ALL matches when a selector matches multiple elements (as \"texts\" array). Use this instead of querying each element separately.\n\n## Efficient Pattern\nOn first call: Navigate and DismissOverlays are ALREADY done. Take action immediately.\nAfter any Navigate call: DismissOverlays, then interact with elements.\n\nIf an action doesn't produce expected results, use GetFullPageContent to diagnose before retrying.\n\n## Dropdown/Toggle Behavior\nDropdowns are TOGGLES - clicking the same trigger twice will close it!\n- After Click on a dropdown trigger, check <CurrentBrowserState> for MENUITEMS\n- If menuitems appear, click the menuitem ONCE - do NOT click the trigger again\n- One click opens, second click closes\n\n## Avoid Infinite Loops\nIf an action doesn't produce the expected result after 2-3 attempts:\n1. Stop retrying the same action\n2. Use GetFullPageContent or Screenshot to diagnose\n3. Try a different approach or skip and continue\nNEVER click the same element more than 3 times in a row.\n\n## Available Gadgets\n- ReportResult: **REQUIRED** - Call this to return your findings when task is complete\n- Navigate: Go to a URL\n- Click: Click an element (auto-waits for element to be actionable)\n- Fill: Fill a form input\n- FillForm: Fill multiple fields and submit\n- Select: Select dropdown option\n- Check: Toggle checkboxes\n- GetFullPageContent: Read page text content\n- Screenshot: Capture the page (use when you need to show visual results)\n- DismissOverlays: Auto-dismiss cookie banners\n- Scroll: Scroll the page\n- WaitForElement: Wait for an element to appear\n- Wait: General wait\n- RequestUserAssistance: Ask user for help with CAPTCHAs, 2FA codes, or other human-only challenges\n\n## Task Completion\nWhen you have accomplished the task, you MUST call ReportResult with your findings:\n1. Call ReportResult(result=\"...\") with all extracted data and findings\n2. Include any relevant URLs, text content, or structured data\n3. If you took screenshots, describe what they show in the result\n\nRemember: You are a focused automation agent. Complete the task, call ReportResult, then stop.";
 /**
  * Truncated prompt for simpler tasks (fewer gadgets, less context).
  */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "dhalsim",
-	"version": "1.0.2",
+	"version": "1.2.0",
 	"description": "Browser automation for llmist agents using Camoufox anti-detect browser",
 	"type": "module",
 	"main": "dist/index.js",
@@ -55,7 +55,8 @@
 		"check": "biome check --write .",
 		"test": "vitest run --no-file-parallelism",
 		"precheck": "bun run lint && bun run typecheck && bun run test",
-		"prepare": "test -n \"$CI\" || lefthook install"
+		"prepare": "test -n \"$CI\" || lefthook install",
+		"postinstall": "test -n \"$PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD\" || node node_modules/camoufox-js/dist/__main__.js fetch"
 	},
 	"keywords": [
 		"browser",
@@ -100,12 +101,12 @@
 		"bun-types": "^1.3.2",
 		"conventional-changelog-conventionalcommits": "^9.1.0",
 		"lefthook": "^1.6.0",
-		"llmist": ">=9.2.0",
+		"llmist": ">=9.4.0",
 		"semantic-release": "^25.0.2",
 		"typescript": "^5.4.5",
 		"vitest": "^4.0.15"
 	},
 	"peerDependencies": {
-		"llmist": ">=9.2.0"
+		"llmist": ">=9.4.0"
 	}
 }