dhalsim 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -15962,7 +15962,7 @@ function createGadgetsByName(names, config2) {
15962
15962
  }
15963
15963
 
15964
15964
  // src/subagents/dhalsim.ts
15965
- import { Gadget as Gadget13, z as z14, getHostExports, resolveSubagentModel, resolveValue } from "llmist";
15965
+ import { Gadget as Gadget13, z as z14, getHostExports, resolveSubagentModel, resolveValue, TaskCompletionSignal } from "llmist";
15966
15966
 
15967
15967
  // src/state/page-state.ts
15968
15968
  var DEFAULT_CONFIG = {
@@ -16553,25 +16553,7 @@ ${states.join("\n\n")}
16553
16553
  };
16554
16554
 
16555
16555
  // src/subagents/prompts.ts
16556
- var GADGET_LIST_WITH_USER_ASSISTANCE = `## Available Gadgets
16557
- - ReportResult: **REQUIRED** - Call this to return your findings when task is complete
16558
- - Navigate: Go to a URL
16559
- - Click: Click an element (auto-waits for element to be actionable)
16560
- - Fill: Fill a form input
16561
- - FillForm: Fill multiple fields and submit
16562
- - Select: Select dropdown option
16563
- - Check: Toggle checkboxes
16564
- - GetFullPageContent: Read page text content
16565
- - Screenshot: Capture the page (use when you need to show visual results)
16566
- - DismissOverlays: Auto-dismiss cookie banners
16567
- - Scroll: Scroll the page
16568
- - WaitForElement: Wait for an element to appear
16569
- - Wait: General wait
16570
- - RequestUserAssistance: Ask user for help with CAPTCHAs, 2FA codes, or other human-only challenges`;
16571
- var GADGET_LIST_WITHOUT_USER_ASSISTANCE = `## Available Gadgets
16572
- - ReportResult: **REQUIRED** - Call this to return your findings when task is complete
16573
- - Navigate: Go to a URL
16574
- - Click: Click an element (auto-waits for element to be actionable)
16556
+ var GADGETS_CORE = `- Click: Click an element (auto-waits for element to be actionable)
16575
16557
  - Fill: Fill a form input
16576
16558
  - FillForm: Fill multiple fields and submit
16577
16559
  - Select: Select dropdown option
@@ -16582,8 +16564,44 @@ var GADGET_LIST_WITHOUT_USER_ASSISTANCE = `## Available Gadgets
16582
16564
  - Scroll: Scroll the page
16583
16565
  - WaitForElement: Wait for an element to appear
16584
16566
  - Wait: General wait`;
16567
+ function buildGadgetList(options) {
16568
+ const lines = ["## Available Gadgets", "- ReportResult: **REQUIRED** - Call this to return your findings when task is complete"];
16569
+ if (options.includeNavigation) {
16570
+ lines.push("- Navigate: Go to a URL");
16571
+ }
16572
+ lines.push(GADGETS_CORE);
16573
+ if (options.includeUserAssistance) {
16574
+ lines.push("- RequestUserAssistance: Ask user for help with CAPTCHAs, 2FA codes, or other human-only challenges");
16575
+ }
16576
+ return lines.join("\n");
16577
+ }
16585
16578
  function createDhalsimSystemPrompt(options) {
16586
- const gadgetList = options.includeUserAssistance ? GADGET_LIST_WITH_USER_ASSISTANCE : GADGET_LIST_WITHOUT_USER_ASSISTANCE;
16579
+ const includeNavigation = !options.disableNavigation;
16580
+ const gadgetList = buildGadgetList({
16581
+ includeNavigation,
16582
+ includeUserAssistance: options.includeUserAssistance
16583
+ });
16584
+ const criticalRules = includeNavigation ? `## CRITICAL Rules
16585
+ 1. You have ONE page (p1) already open. Use Navigate to go to URLs.
16586
+ 2. ONLY use selectors exactly as shown in <CurrentBrowserState>
16587
+ 3. NEVER guess selectors - use GetFullPageContent if you need more info
16588
+ 4. Focus on completing the task efficiently - avoid unnecessary actions
16589
+ 5. If a selector matches multiple elements, you'll get an error with a "suggestions" array containing valid selectors. USE ONE OF THESE SUGGESTIONS DIRECTLY - don't guess or modify them.
16590
+ 6. For batch extraction: GetFullPageContent returns ALL matches when a selector matches multiple elements (as "texts" array). Use this instead of querying each element separately.` : `## CRITICAL Rules
16591
+ 1. You have ONE page (p1) open at the target URL. You cannot navigate to other URLs.
16592
+ 2. ONLY use selectors exactly as shown in <CurrentBrowserState>
16593
+ 3. NEVER guess selectors - use GetFullPageContent if you need more info
16594
+ 4. Focus on completing the task efficiently - avoid unnecessary actions
16595
+ 5. If a selector matches multiple elements, you'll get an error with a "suggestions" array containing valid selectors. USE ONE OF THESE SUGGESTIONS DIRECTLY - don't guess or modify them.
16596
+ 6. For batch extraction: GetFullPageContent returns ALL matches when a selector matches multiple elements (as "texts" array). Use this instead of querying each element separately.`;
16597
+ const efficientPattern = includeNavigation ? `## Efficient Pattern
16598
+ On first call: Navigate and DismissOverlays are ALREADY done. Take action immediately.
16599
+ After any Navigate call: DismissOverlays, then interact with elements.
16600
+
16601
+ If an action doesn't produce expected results, use GetFullPageContent to diagnose before retrying.` : `## Efficient Pattern
16602
+ On first call: DismissOverlays is ALREADY done. Take action immediately.
16603
+
16604
+ If an action doesn't produce expected results, use GetFullPageContent to diagnose before retrying.`;
16587
16605
  return `You are a browser automation agent focused on completing a specific web task.
16588
16606
 
16589
16607
  ## Browser State (<CurrentBrowserState>)
@@ -16597,19 +16615,9 @@ This is your source of truth for what's on screen. It contains:
16597
16615
  - CHECKBOXES: Checkbox/radio inputs
16598
16616
  - MENUITEMS: Dropdown options (only visible when dropdown is open)
16599
16617
 
16600
- ## CRITICAL Rules
16601
- 1. You have ONE page (p1) already open. Use Navigate to go to URLs.
16602
- 2. ONLY use selectors exactly as shown in <CurrentBrowserState>
16603
- 3. NEVER guess selectors - use GetFullPageContent if you need more info
16604
- 4. Focus on completing the task efficiently - avoid unnecessary actions
16605
- 5. If a selector matches multiple elements, you'll get an error with a "suggestions" array containing valid selectors. USE ONE OF THESE SUGGESTIONS DIRECTLY - don't guess or modify them.
16606
- 6. For batch extraction: GetFullPageContent returns ALL matches when a selector matches multiple elements (as "texts" array). Use this instead of querying each element separately.
16618
+ ${criticalRules}
16607
16619
 
16608
- ## Efficient Pattern
16609
- On first call: Navigate and DismissOverlays are ALREADY done. Take action immediately.
16610
- After any Navigate call: DismissOverlays, then interact with elements.
16611
-
16612
- If an action doesn't produce expected results, use GetFullPageContent to diagnose before retrying.
16620
+ ${efficientPattern}
16613
16621
 
16614
16622
  ## Dropdown/Toggle Behavior
16615
16623
  Dropdowns are TOGGLES - clicking the same trigger twice will close it!
@@ -16660,10 +16668,8 @@ var ReportResult = class extends Gadget13({
16660
16668
  )
16661
16669
  })
16662
16670
  }) {
16663
- result = null;
16664
16671
  execute(params) {
16665
- this.result = params.result;
16666
- return "Result reported successfully.";
16672
+ throw new TaskCompletionSignal(params.result);
16667
16673
  }
16668
16674
  };
16669
16675
  var Dhalsim = class extends Gadget13({
@@ -16680,7 +16686,8 @@ Use this for web research, data extraction, form filling, or any web-based task.
16680
16686
  headless: z14.boolean().optional().describe("Run browser in headless mode (default: true, configurable via CLI)"),
16681
16687
  timeoutMs: z14.number().optional().describe("Overall timeout in ms (default: 300000 = 5 min, 0 = disabled, configurable via CLI)"),
16682
16688
  disableCache: z14.boolean().optional().describe("Disable browser cache for lower memory usage (default: false, configurable via CLI)"),
16683
- navigationTimeoutMs: z14.number().optional().describe("Navigation timeout in ms (default: 60000, configurable via CLI)")
16689
+ navigationTimeoutMs: z14.number().optional().describe("Navigation timeout in ms (default: 60000, configurable via CLI)"),
16690
+ disableNavigation: z14.boolean().optional().describe("Disable Navigate gadget to restrict agent to the initial URL (default: false, configurable via CLI)")
16684
16691
  }),
16685
16692
  timeoutMs: 3e5
16686
16693
  // 5 minutes - web browsing can take time
@@ -16688,12 +16695,14 @@ Use this for web research, data extraction, form filling, or any web-based task.
16688
16695
  customSessionManager;
16689
16696
  customSystemPrompt;
16690
16697
  userAssistanceEnabled;
16698
+ navigationDisabled;
16691
16699
  customUserAssistanceCallback;
16692
16700
  constructor(options) {
16693
16701
  super();
16694
16702
  this.customSessionManager = options?.sessionManager;
16695
16703
  this.customSystemPrompt = options?.systemPrompt;
16696
16704
  this.userAssistanceEnabled = options?.userAssistance;
16705
+ this.navigationDisabled = options?.disableNavigation;
16697
16706
  this.customUserAssistanceCallback = options?.onUserAssistance;
16698
16707
  if (options?.timeoutMs !== void 0) {
16699
16708
  this.timeoutMs = options.timeoutMs === 0 ? void 0 : options.timeoutMs;
@@ -16724,8 +16733,13 @@ Use this for web research, data extraction, form filling, or any web-based task.
16724
16733
  subagentKey: "navigationTimeoutMs",
16725
16734
  defaultValue: 6e4
16726
16735
  });
16736
+ const disableNavigation = resolveValue(ctx, "BrowseWeb", {
16737
+ runtime: params.disableNavigation,
16738
+ subagentKey: "disableNavigation",
16739
+ defaultValue: this.navigationDisabled ?? false
16740
+ });
16727
16741
  const userAssistanceEnabled = this.userAssistanceEnabled ?? (this.customUserAssistanceCallback !== void 0 || ctx?.requestHumanInput !== void 0);
16728
- logger13?.debug(`[BrowseWeb] User assistance enabled=${userAssistanceEnabled}`);
16742
+ logger13?.debug(`[BrowseWeb] User assistance enabled=${userAssistanceEnabled}, disableNavigation=${disableNavigation}`);
16729
16743
  const collectedMedia = [];
16730
16744
  const manager = this.customSessionManager ?? new BrowserSessionManager(logger13);
16731
16745
  const isOwnedManager = !this.customSessionManager;
@@ -16771,7 +16785,8 @@ Use this for web research, data extraction, form filling, or any web-based task.
16771
16785
  const gadgets = [
16772
16786
  reportResult,
16773
16787
  // First so it's prominent in the list
16774
- new Navigate(manager),
16788
+ // Conditionally include Navigate (excluded when disableNavigation is true)
16789
+ ...disableNavigation ? [] : [new Navigate(manager)],
16775
16790
  new Click(manager),
16776
16791
  new Fill(manager),
16777
16792
  new FillForm(manager),
@@ -16788,7 +16803,7 @@ Use this for web research, data extraction, form filling, or any web-based task.
16788
16803
  ];
16789
16804
  const { AgentBuilder, LLMist } = getHostExports(ctx);
16790
16805
  const client = new LLMist();
16791
- const systemPrompt = this.customSystemPrompt ?? createDhalsimSystemPrompt({ includeUserAssistance: userAssistanceEnabled });
16806
+ const systemPrompt = this.customSystemPrompt ?? createDhalsimSystemPrompt({ includeUserAssistance: userAssistanceEnabled, disableNavigation });
16792
16807
  const builder = new AgentBuilder(client).withModel(model).withSystem(systemPrompt).withMaxIterations(maxIterations).withGadgets(...gadgets).withTrailingMessage((trailingCtx) => [
16793
16808
  pageStateScanner.getCachedState(),
16794
16809
  "",
@@ -16844,6 +16859,7 @@ Use this for web research, data extraction, form filling, or any web-based task.
16844
16859
  Task: ${task}`);
16845
16860
  logger13?.debug(`[BrowseWeb] Starting agent loop model=${model} maxIterations=${maxIterations}`);
16846
16861
  let finalResult = "";
16862
+ let reportedResult;
16847
16863
  for await (const event of agent.run()) {
16848
16864
  if (ctx?.signal?.aborted) {
16849
16865
  break;
@@ -16854,15 +16870,15 @@ Task: ${task}`);
16854
16870
  collectedMedia.push(media);
16855
16871
  }
16856
16872
  }
16857
- if (reportResult.result !== null) {
16858
- break;
16873
+ if (event.result.gadgetName === "ReportResult" && event.result.breaksLoop) {
16874
+ reportedResult = event.result.result;
16859
16875
  }
16860
16876
  } else if (event.type === "text") {
16861
16877
  finalResult = event.content;
16862
16878
  }
16863
16879
  }
16864
16880
  return {
16865
- result: reportResult.result || finalResult || "Task completed but no result text was generated.",
16881
+ result: reportedResult || finalResult || "Task completed but no result text was generated.",
16866
16882
  media: collectedMedia.length > 0 ? collectedMedia : void 0
16867
16883
  };
16868
16884
  } finally {