browser-use 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/agent/service.js +2 -0
  2. package/dist/agent/system_prompt.md +269 -0
  3. package/dist/agent/system_prompt_anthropic_flash.md +240 -0
  4. package/dist/agent/system_prompt_browser_use.md +18 -0
  5. package/dist/agent/system_prompt_browser_use_flash.md +15 -0
  6. package/dist/agent/system_prompt_browser_use_no_thinking.md +17 -0
  7. package/dist/agent/system_prompt_flash.md +16 -0
  8. package/dist/agent/system_prompt_flash_anthropic.md +30 -0
  9. package/dist/agent/system_prompt_no_thinking.md +245 -0
  10. package/dist/browser/cloud/index.d.ts +1 -0
  11. package/dist/browser/cloud/index.js +1 -0
  12. package/dist/browser/cloud/management.d.ts +130 -0
  13. package/dist/browser/cloud/management.js +140 -0
  14. package/dist/browser/events.d.ts +61 -3
  15. package/dist/browser/events.js +66 -0
  16. package/dist/browser/profile.d.ts +1 -0
  17. package/dist/browser/profile.js +25 -8
  18. package/dist/browser/session.d.ts +59 -2
  19. package/dist/browser/session.js +943 -131
  20. package/dist/browser/watchdogs/base.js +34 -1
  21. package/dist/browser/watchdogs/captcha-watchdog.d.ts +26 -0
  22. package/dist/browser/watchdogs/captcha-watchdog.js +151 -0
  23. package/dist/browser/watchdogs/index.d.ts +1 -0
  24. package/dist/browser/watchdogs/index.js +1 -0
  25. package/dist/browser/watchdogs/screenshot-watchdog.js +4 -3
  26. package/dist/cli.d.ts +120 -0
  27. package/dist/cli.js +1816 -4
  28. package/dist/controller/service.js +106 -362
  29. package/dist/controller/views.d.ts +9 -6
  30. package/dist/controller/views.js +8 -5
  31. package/dist/dom/dom_tree/index.js +24 -11
  32. package/dist/filesystem/file-system.js +1 -1
  33. package/dist/llm/litellm/chat.d.ts +11 -0
  34. package/dist/llm/litellm/chat.js +16 -0
  35. package/dist/llm/litellm/index.d.ts +1 -0
  36. package/dist/llm/litellm/index.js +1 -0
  37. package/dist/llm/models.js +29 -3
  38. package/dist/llm/oci-raw/chat.d.ts +64 -0
  39. package/dist/llm/oci-raw/chat.js +350 -0
  40. package/dist/llm/oci-raw/index.d.ts +2 -0
  41. package/dist/llm/oci-raw/index.js +2 -0
  42. package/dist/llm/oci-raw/serializer.d.ts +12 -0
  43. package/dist/llm/oci-raw/serializer.js +128 -0
  44. package/dist/mcp/server.d.ts +1 -0
  45. package/dist/mcp/server.js +62 -13
  46. package/dist/skill-cli/direct.d.ts +100 -0
  47. package/dist/skill-cli/direct.js +984 -0
  48. package/dist/skill-cli/index.d.ts +2 -0
  49. package/dist/skill-cli/index.js +2 -0
  50. package/dist/skill-cli/server.d.ts +2 -0
  51. package/dist/skill-cli/server.js +472 -11
  52. package/dist/skill-cli/tunnel.d.ts +61 -0
  53. package/dist/skill-cli/tunnel.js +257 -0
  54. package/dist/sync/auth.d.ts +8 -0
  55. package/dist/sync/auth.js +12 -0
  56. package/dist/utils.d.ts +1 -1
  57. package/dist/utils.js +2 -1
  58. package/package.json +22 -4
@@ -5,8 +5,8 @@ import { ActionResult } from '../agent/views.js';
5
5
  import { ClickCoordinateEvent, ClickElementEvent, CloseTabEvent, GetDropdownOptionsEvent, GoBackEvent, NavigateToUrlEvent, ScrollEvent, ScrollToTextEvent, ScreenshotEvent, SelectDropdownOptionEvent, SendKeysEvent, SwitchTabEvent, TypeTextEvent, UploadFileEvent, WaitEvent, } from '../browser/events.js';
6
6
  import { BrowserError } from '../browser/views.js';
7
7
  import { chunkMarkdownByStructure, extractCleanMarkdownFromHtml, } from '../dom/markdown-extractor.js';
8
- import { extractPdfText, FileSystem } from '../filesystem/file-system.js';
9
- import { ClickElementActionIndexOnlySchema, ClickElementActionSchema, CloseTabActionSchema, DoneActionSchema, EvaluateActionSchema, ExtractStructuredDataActionSchema, FindElementsActionSchema, DropdownOptionsActionSchema, SelectDropdownActionSchema, GoToUrlActionSchema, InputTextActionSchema, NoParamsActionSchema, ReadLongContentActionSchema, ReadFileActionSchema, ReplaceFileStrActionSchema, ScrollActionSchema, ScrollToTextActionSchema, SearchActionSchema, SearchPageActionSchema, SearchGoogleActionSchema, ScreenshotActionSchema, StructuredOutputActionSchema, SwitchTabActionSchema, UploadFileActionSchema, WaitActionSchema, WriteFileActionSchema, SendKeysActionSchema, SheetsRangeActionSchema, SheetsUpdateActionSchema, SheetsInputActionSchema, } from './views.js';
8
+ import { FileSystem } from '../filesystem/file-system.js';
9
+ import { ClickElementActionIndexOnlySchema, ClickElementActionSchema, CloseTabActionSchema, DoneActionSchema, EvaluateActionSchema, ExtractStructuredDataActionSchema, FindElementsActionSchema, DropdownOptionsActionSchema, SelectDropdownActionSchema, GoToUrlActionSchema, InputTextActionSchema, NoParamsActionSchema, ReadFileActionSchema, ReplaceFileStrActionSchema, ScrollActionSchema, ScrollToTextActionSchema, SearchActionSchema, SearchPageActionSchema, SearchGoogleActionSchema, ScreenshotActionSchema, SaveAsPdfActionSchema, StructuredOutputActionSchema, SwitchTabActionSchema, UploadFileActionSchema, WaitActionSchema, WriteFileActionSchema, SendKeysActionSchema, SheetsRangeActionSchema, SheetsUpdateActionSchema, SheetsInputActionSchema, } from './views.js';
10
10
  import { Registry } from './registry/service.js';
11
11
  import { SystemMessage, UserMessage } from '../llm/messages.js';
12
12
  import { createLogger } from '../logging-config.js';
@@ -32,6 +32,16 @@ const createAbortError = (reason) => {
32
32
  const isAbortError = (error) => {
33
33
  return error instanceof Error && error.name === 'AbortError';
34
34
  };
35
+ const resolveUniqueOutputPath = async (directory, fileName) => {
36
+ const parsed = path.parse(fileName);
37
+ let candidate = path.join(directory, fileName);
38
+ let counter = 1;
39
+ while (fs.existsSync(candidate)) {
40
+ candidate = path.join(directory, `${parsed.name} (${counter})${parsed.ext}`);
41
+ counter += 1;
42
+ }
43
+ return candidate;
44
+ };
35
45
  const throwIfAborted = (signal) => {
36
46
  if (signal?.aborted) {
37
47
  throw createAbortError(signal.reason);
@@ -864,7 +874,7 @@ export class Controller {
864
874
  registerContentActions() {
865
875
  const registry = this.registry;
866
876
  const contentLogger = this.logger;
867
- const extractStructuredDescription = "LLM extracts structured data from page markdown. Use when: on right page, know what to extract, haven't called before on same page+query. Can't get interactive elements. Set extract_links=True for URLs. Use start_from_char if previous extraction was truncated to extract data further down the page.";
877
+ const extractStructuredDescription = "LLM extracts structured data from page markdown. Use when: on right page, know what to extract, haven't called before on same page+query. Can't get interactive elements. Set extract_links=True for URLs. Use start_from_char if previous extraction was truncated to extract data further down the page. When paginating across pages, pass already_collected with item identifiers (names/URLs) from prior pages to avoid duplicates.";
868
878
  this.registry.action(extractStructuredDescription, {
869
879
  param_model: ExtractStructuredDataActionSchema,
870
880
  })(async function extract_structured_data(params, { page, page_extraction_llm, extraction_schema, file_system, signal }) {
@@ -989,6 +999,11 @@ export class Controller {
989
999
  }
990
1000
  content = sanitize_surrogates(content);
991
1001
  const sanitizedQuery = sanitize_surrogates(params.query);
1002
+ const alreadyCollected = Array.isArray(params.already_collected)
1003
+ ? params.already_collected
1004
+ .map((item) => sanitize_surrogates(String(item)).trim())
1005
+ .filter(Boolean)
1006
+ : [];
992
1007
  const parseJsonFromCompletion = (completion) => {
993
1008
  const trimmed = completion.trim();
994
1009
  const fencedMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i);
@@ -1018,12 +1033,20 @@ You will be given a query, a JSON Schema, and the markdown of a webpage that has
1018
1033
  - Your response MUST conform to the provided JSON Schema exactly.
1019
1034
  - If a required field's value cannot be found on the page, use null (if the schema allows it) or an empty string / empty array as appropriate.
1020
1035
  - If the content was truncated, extract what is available from the visible portion.
1036
+ - If <already_collected> items are provided, skip any items whose name/title/URL matches those listed. Do not include duplicates.
1021
1037
  </instructions>`.trim();
1022
1038
  const schemaJson = JSON.stringify(effectiveOutputSchema, null, 2);
1039
+ const alreadyCollectedSection = alreadyCollected.length > 0
1040
+ ? `\n\n<already_collected>\nSkip items whose name/title/URL matches any of these already-collected identifiers:\n${alreadyCollected
1041
+ .slice(0, 100)
1042
+ .map((item) => `- ${item}`)
1043
+ .join('\n')}\n</already_collected>`
1044
+ : '';
1023
1045
  const prompt = `<query>\n${sanitizedQuery}\n</query>\n\n` +
1024
1046
  `<output_schema>\n${schemaJson}\n</output_schema>\n\n` +
1025
1047
  `<content_stats>\n${statsSummary}\n</content_stats>\n\n` +
1026
- `<webpage_content>\n${content}\n</webpage_content>`;
1048
+ `<webpage_content>\n${content}\n</webpage_content>` +
1049
+ alreadyCollectedSection;
1027
1050
  const response = await page_extraction_llm.ainvoke([new SystemMessage(systemPrompt), new UserMessage(prompt)], undefined, { signal: signal ?? undefined });
1028
1051
  throwIfAborted(signal);
1029
1052
  const completion = response?.completion;
@@ -1086,6 +1109,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
1086
1109
  - If the information relevant to the query is not available in the page, your response should mention that.
1087
1110
  - If the query asks for all items, products, etc., make sure to directly list all of them.
1088
1111
  - If the content was truncated and you need more information, note that the user can use start_from_char parameter to continue from where truncation occurred.
1112
+ - If <already_collected> items are provided, exclude any results whose name/title/URL matches those already collected. Do not include duplicates.
1089
1113
  </instructions>
1090
1114
 
1091
1115
  <output>
@@ -1094,7 +1118,13 @@ You will be given a query and the markdown of a webpage that has been filtered t
1094
1118
  </output>`.trim();
1095
1119
  const prompt = `<query>\n${sanitizedQuery}\n</query>\n\n` +
1096
1120
  `<content_stats>\n${statsSummary}\n</content_stats>\n\n` +
1097
- `<webpage_content>\n${content}\n</webpage_content>`;
1121
+ `<webpage_content>\n${content}\n</webpage_content>` +
1122
+ (alreadyCollected.length > 0
1123
+ ? `\n\n<already_collected>\nSkip items whose name/title/URL matches any of these already-collected identifiers:\n${alreadyCollected
1124
+ .slice(0, 100)
1125
+ .map((item) => `- ${item}`)
1126
+ .join('\n')}\n</already_collected>`
1127
+ : '');
1098
1128
  const response = await page_extraction_llm.ainvoke([new SystemMessage(systemPrompt), new UserMessage(prompt)], undefined, { signal: signal ?? undefined });
1099
1129
  throwIfAborted(signal);
1100
1130
  const completion = response?.completion;
@@ -1660,363 +1690,6 @@ You will be given a query and the markdown of a webpage that has been filtered t
1660
1690
  include_extracted_content_only_once: true,
1661
1691
  });
1662
1692
  });
1663
- this.registry.action('Intelligently read long content to find specific information. Works on current page (source="page") or files. For large content, uses search to identify relevant sections. Best for long articles, documents, or any content where you know what you are looking for.', { param_model: ReadLongContentActionSchema })(async function read_long_content(params, { browser_session, page_extraction_llm, available_file_paths, signal }) {
1664
- throwIfAborted(signal);
1665
- const goal = params.goal.trim();
1666
- const source = (params.source || 'page').trim();
1667
- const context = (params.context || '').trim();
1668
- const maxChars = 50000;
1669
- const chunkSize = 2000;
1670
- const fallbackSearchTerms = (() => {
1671
- const tokens = `${goal} ${context}`
1672
- .toLowerCase()
1673
- .match(/[a-z0-9][a-z0-9-]{2,}/g);
1674
- if (!tokens?.length) {
1675
- return goal ? [goal] : ['content'];
1676
- }
1677
- return Array.from(new Set(tokens)).slice(0, 5);
1678
- })();
1679
- const extractSearchTerms = async () => {
1680
- const extractionLlm = page_extraction_llm;
1681
- if (!extractionLlm || typeof extractionLlm.ainvoke !== 'function') {
1682
- return fallbackSearchTerms;
1683
- }
1684
- const prompt = `Extract 3-5 key search terms from this goal that would help find relevant sections.
1685
- Return only the terms, one per line, no numbering or bullets.
1686
-
1687
- Goal: ${goal}
1688
-
1689
- Context: ${context}`;
1690
- try {
1691
- const response = await runWithTimeoutAndSignal(async () => (await extractionLlm.ainvoke([new UserMessage(prompt)], undefined, { signal: signal ?? undefined })), 12000, signal, 'Timed out extracting search terms');
1692
- const parsed = (response?.completion ?? '')
1693
- .split('\n')
1694
- .map((line) => line
1695
- .trim()
1696
- .replace(/^[-\d.)\s]+/, '')
1697
- .trim())
1698
- .filter(Boolean);
1699
- const unique = Array.from(new Set(parsed)).slice(0, 5);
1700
- return unique.length ? unique : fallbackSearchTerms;
1701
- }
1702
- catch (error) {
1703
- if (isAbortError(error)) {
1704
- throw error;
1705
- }
1706
- return fallbackSearchTerms;
1707
- }
1708
- };
1709
- const escapeRegExp = (value) => value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
1710
- const searchText = (value, pattern, contextChars = 100) => {
1711
- let regex;
1712
- try {
1713
- regex = new RegExp(pattern, 'gi');
1714
- }
1715
- catch {
1716
- regex = new RegExp(escapeRegExp(pattern), 'gi');
1717
- }
1718
- const matches = [];
1719
- let match = regex.exec(value);
1720
- while (match != null) {
1721
- const start = Math.max(0, match.index - contextChars);
1722
- const end = Math.min(value.length, match.index + match[0].length + contextChars);
1723
- matches.push({
1724
- position: match.index,
1725
- snippet: value.slice(start, end),
1726
- });
1727
- if (!regex.global) {
1728
- break;
1729
- }
1730
- match = regex.exec(value);
1731
- }
1732
- return matches;
1733
- };
1734
- const chunkContent = (value, size = chunkSize) => {
1735
- const chunks = [];
1736
- for (let start = 0; start < value.length; start += size) {
1737
- chunks.push({
1738
- start,
1739
- end: Math.min(start + size, value.length),
1740
- text: value.slice(start, start + size),
1741
- });
1742
- }
1743
- return chunks;
1744
- };
1745
- const getCurrentPageUrl = (page) => {
1746
- const value = page?.url;
1747
- if (typeof value === 'function') {
1748
- return String(value.call(page) ?? '');
1749
- }
1750
- return typeof value === 'string' ? value : '';
1751
- };
1752
- const readPdfByPage = async (filePath) => {
1753
- const buffer = await fsp.readFile(filePath);
1754
- try {
1755
- const pdfParseModule = (await import('pdf-parse'));
1756
- if (typeof pdfParseModule.PDFParse === 'function') {
1757
- const Parser = pdfParseModule.PDFParse;
1758
- const parser = new Parser({ data: buffer });
1759
- try {
1760
- let numPages = 0;
1761
- try {
1762
- const info = await parser.getInfo?.({ parsePageInfo: false });
1763
- numPages = Number(info?.total ?? 0);
1764
- }
1765
- catch {
1766
- numPages = 0;
1767
- }
1768
- if (!Number.isFinite(numPages) || numPages <= 0) {
1769
- const full = await parser.getText();
1770
- const text = typeof full?.text === 'string' ? full.text : '';
1771
- return {
1772
- numPages: 1,
1773
- pageTexts: [text],
1774
- totalChars: text.length,
1775
- };
1776
- }
1777
- const pageTexts = [];
1778
- let totalChars = 0;
1779
- for (let pageNumber = 1; pageNumber <= numPages; pageNumber += 1) {
1780
- const pageResult = await parser.getText({
1781
- partial: [pageNumber],
1782
- });
1783
- const text = typeof pageResult?.text === 'string' ? pageResult.text : '';
1784
- pageTexts.push(text);
1785
- totalChars += text.length;
1786
- }
1787
- return {
1788
- numPages,
1789
- pageTexts,
1790
- totalChars,
1791
- };
1792
- }
1793
- finally {
1794
- if (typeof parser.destroy === 'function') {
1795
- await parser.destroy();
1796
- }
1797
- }
1798
- }
1799
- }
1800
- catch {
1801
- // Fall back to the compatibility parser.
1802
- }
1803
- const parsed = await extractPdfText(buffer);
1804
- const text = parsed.text ?? '';
1805
- return {
1806
- numPages: Math.max(parsed.totalPages, 1),
1807
- pageTexts: [text],
1808
- totalChars: text.length,
1809
- };
1810
- };
1811
- try {
1812
- let content = '';
1813
- let sourceName = 'content';
1814
- if (source.toLowerCase() === 'page') {
1815
- if (!browser_session) {
1816
- throw new BrowserError('Browser session missing for page content.');
1817
- }
1818
- const page = await browser_session.get_current_page();
1819
- if (!page?.content) {
1820
- throw new BrowserError('No active page available to read content.');
1821
- }
1822
- const html = await page.content();
1823
- const pageUrl = getCurrentPageUrl(page);
1824
- const markdown = extractCleanMarkdownFromHtml(html || '', {
1825
- extract_links: false,
1826
- method: 'page_content',
1827
- url: pageUrl || undefined,
1828
- });
1829
- content = markdown.content;
1830
- sourceName = 'current page';
1831
- if (!content) {
1832
- return new ActionResult({
1833
- extracted_content: 'Error: No page content available',
1834
- long_term_memory: 'Failed to read page: no content',
1835
- });
1836
- }
1837
- }
1838
- else {
1839
- const allowedPaths = new Set(Array.isArray(available_file_paths) ? available_file_paths : []);
1840
- const downloadedFiles = Array.isArray(browser_session?.downloaded_files)
1841
- ? browser_session.downloaded_files
1842
- : [];
1843
- for (const filePath of downloadedFiles) {
1844
- allowedPaths.add(filePath);
1845
- }
1846
- if (!allowedPaths.has(source)) {
1847
- const message = `Error: File path not in available_file_paths: ${source}. ` +
1848
- 'The user must add this path to available_file_paths when creating the Agent.';
1849
- return new ActionResult({
1850
- extracted_content: message,
1851
- long_term_memory: `Failed to read: file path not allowed: ${source}`,
1852
- });
1853
- }
1854
- if (!fs.existsSync(source)) {
1855
- return new ActionResult({
1856
- extracted_content: `Error: File not found: ${source}`,
1857
- long_term_memory: 'Failed to read: file not found',
1858
- });
1859
- }
1860
- const ext = path.extname(source).toLowerCase();
1861
- sourceName = path.basename(source);
1862
- if (ext === '.pdf') {
1863
- const pdfData = await readPdfByPage(source);
1864
- const numPages = pdfData.numPages;
1865
- const pageTexts = pdfData.pageTexts;
1866
- const totalChars = pdfData.totalChars;
1867
- if (totalChars <= maxChars) {
1868
- const contentParts = [];
1869
- for (let pageIndex = 0; pageIndex < pageTexts.length; pageIndex += 1) {
1870
- const pageText = pageTexts[pageIndex] ?? '';
1871
- if (!pageText.trim()) {
1872
- continue;
1873
- }
1874
- contentParts.push(`--- Page ${pageIndex + 1} ---\n${pageText}`);
1875
- }
1876
- const allContent = contentParts.join('\n\n');
1877
- return new ActionResult({
1878
- extracted_content: `PDF: ${sourceName} (${numPages} pages)\n\n${allContent}`,
1879
- long_term_memory: `Read ${sourceName} (${numPages} pages, ${totalChars.toLocaleString()} chars) for goal: ${goal.slice(0, 50)}`,
1880
- include_extracted_content_only_once: true,
1881
- });
1882
- }
1883
- const searchTerms = await extractSearchTerms();
1884
- const pageScores = new Map();
1885
- for (const term of searchTerms) {
1886
- if (!term.trim()) {
1887
- continue;
1888
- }
1889
- const pattern = new RegExp(escapeRegExp(term), 'i');
1890
- for (let pageIndex = 0; pageIndex < pageTexts.length; pageIndex += 1) {
1891
- const pageText = pageTexts[pageIndex] ?? '';
1892
- if (pattern.test(pageText)) {
1893
- const pageNumber = pageIndex + 1;
1894
- pageScores.set(pageNumber, (pageScores.get(pageNumber) ?? 0) + 1);
1895
- }
1896
- }
1897
- }
1898
- const pagesToRead = [1];
1899
- const sortedPages = Array.from(pageScores.entries()).sort((a, b) => b[1] - a[1]);
1900
- for (const [pageNumber] of sortedPages) {
1901
- if (!pagesToRead.includes(pageNumber)) {
1902
- pagesToRead.push(pageNumber);
1903
- }
1904
- }
1905
- const contentParts = [];
1906
- let charsUsed = 0;
1907
- const pagesIncluded = [];
1908
- const pageOrder = Array.from(new Set(pagesToRead)).sort((a, b) => a - b);
1909
- for (const pageNumber of pageOrder) {
1910
- const pageText = pageTexts[pageNumber - 1] ?? '';
1911
- const pageHeader = `--- Page ${pageNumber} ---\n`;
1912
- const remaining = maxChars - charsUsed;
1913
- if (remaining < pageHeader.length + 50) {
1914
- break;
1915
- }
1916
- let pageContent = `${pageHeader}${pageText}`;
1917
- if (pageContent.length > remaining) {
1918
- const truncationSuffix = '\n[...truncated]';
1919
- pageContent =
1920
- pageContent.slice(0, remaining - truncationSuffix.length) +
1921
- truncationSuffix;
1922
- }
1923
- contentParts.push(pageContent);
1924
- charsUsed += pageContent.length;
1925
- pagesIncluded.push(pageNumber);
1926
- }
1927
- const partialPdfContent = contentParts.join('\n\n');
1928
- return new ActionResult({
1929
- extracted_content: `PDF: ${sourceName} (${numPages} pages, showing ${pagesIncluded.length} relevant)\n\n` +
1930
- partialPdfContent,
1931
- long_term_memory: `Read ${sourceName} (${pagesIncluded.length} relevant pages of ${numPages}) ` +
1932
- `for goal: ${goal.slice(0, 50)}`,
1933
- include_extracted_content_only_once: true,
1934
- });
1935
- }
1936
- const fileBuffer = await fsp.readFile(source);
1937
- content = fileBuffer.toString('utf-8');
1938
- }
1939
- if (!content.trim()) {
1940
- return new ActionResult({
1941
- extracted_content: `Error: No readable content found in ${sourceName}`,
1942
- long_term_memory: `Failed to read ${sourceName}: no content`,
1943
- });
1944
- }
1945
- if (content.length <= maxChars) {
1946
- return new ActionResult({
1947
- extracted_content: `Content from ${sourceName} (${content.length.toLocaleString()} chars):\n\n${content}`,
1948
- long_term_memory: `Read ${sourceName} (${content.length.toLocaleString()} chars) for goal: ${goal.slice(0, 50)}`,
1949
- include_extracted_content_only_once: true,
1950
- });
1951
- }
1952
- const searchTerms = await extractSearchTerms();
1953
- const chunks = chunkContent(content, chunkSize);
1954
- const chunkScores = new Map();
1955
- for (const term of searchTerms) {
1956
- const matches = searchText(content, term);
1957
- for (const match of matches) {
1958
- for (let index = 0; index < chunks.length; index += 1) {
1959
- const chunk = chunks[index];
1960
- if (chunk &&
1961
- chunk.start <= match.position &&
1962
- match.position < chunk.end) {
1963
- chunkScores.set(index, (chunkScores.get(index) ?? 0) + 1);
1964
- break;
1965
- }
1966
- }
1967
- }
1968
- }
1969
- if (!chunkScores.size) {
1970
- const truncated = content.slice(0, maxChars);
1971
- return new ActionResult({
1972
- extracted_content: `Content from ${sourceName} (first ${maxChars.toLocaleString()} of ${content.length.toLocaleString()} chars):\n\n${truncated}`,
1973
- long_term_memory: `Read ${sourceName} (truncated to ${maxChars.toLocaleString()} chars, no matches for search terms)`,
1974
- include_extracted_content_only_once: true,
1975
- });
1976
- }
1977
- const sortedChunks = Array.from(chunkScores.entries()).sort((a, b) => b[1] - a[1]);
1978
- const selectedIndices = new Set([0]);
1979
- for (const [chunkIndex] of sortedChunks) {
1980
- selectedIndices.add(chunkIndex);
1981
- }
1982
- const resultParts = [];
1983
- let totalChars = 0;
1984
- const orderedIndices = Array.from(selectedIndices).sort((a, b) => a - b);
1985
- for (const index of orderedIndices) {
1986
- const chunk = chunks[index];
1987
- if (!chunk) {
1988
- continue;
1989
- }
1990
- if (totalChars + chunk.text.length > maxChars) {
1991
- break;
1992
- }
1993
- const previousIndex = index - 1;
1994
- if (index > 0 && !selectedIndices.has(previousIndex)) {
1995
- resultParts.push('\n[...]\n');
1996
- }
1997
- resultParts.push(chunk.text);
1998
- totalChars += chunk.text.length;
1999
- }
2000
- const resultContent = resultParts.join('');
2001
- return new ActionResult({
2002
- extracted_content: `Content from ${sourceName} (relevant sections, ${totalChars.toLocaleString()} of ${content.length.toLocaleString()} chars):\n\n` +
2003
- resultContent,
2004
- long_term_memory: `Read ${sourceName} (${selectedIndices.size} relevant sections of ${chunks.length}) ` +
2005
- `for goal: ${goal.slice(0, 50)}`,
2006
- include_extracted_content_only_once: true,
2007
- });
2008
- }
2009
- catch (error) {
2010
- if (isAbortError(error)) {
2011
- throw error;
2012
- }
2013
- const errorMessage = `Error reading content: ${String(error.message ?? error)}`;
2014
- return new ActionResult({
2015
- extracted_content: errorMessage,
2016
- long_term_memory: errorMessage,
2017
- });
2018
- }
2019
- });
2020
1693
  this.registry.action('Write content to a file. By default this OVERWRITES the entire file - use append=true to add to an existing file, or use replace_file for targeted edits within a file. ' +
2021
1694
  'FILENAME RULES: Use only letters, numbers, underscores, hyphens, dots, parentheses. Spaces are auto-converted to hyphens. ' +
2022
1695
  'SUPPORTED EXTENSIONS: .txt, .md, .json, .jsonl, .csv, .html, .xml, .pdf, .docx. ' +
@@ -2096,6 +1769,77 @@ Context: ${context}`;
2096
1769
  },
2097
1770
  });
2098
1771
  });
1772
+ this.registry.action('Save the current page as a PDF file. Returns the file path of the saved PDF. Use this to capture the full page content as a printable document.', { param_model: SaveAsPdfActionSchema })(async function save_as_pdf(params, { browser_session, file_system, signal }) {
1773
+ if (!browser_session)
1774
+ throw new Error('Browser session missing');
1775
+ throwIfAborted(signal);
1776
+ const paperSizes = {
1777
+ letter: { width: 8.5, height: 11 },
1778
+ legal: { width: 8.5, height: 14 },
1779
+ a4: { width: 8.27, height: 11.69 },
1780
+ a3: { width: 11.69, height: 16.54 },
1781
+ tabloid: { width: 11, height: 17 },
1782
+ };
1783
+ const page = await browser_session.get_current_page?.();
1784
+ if (!page) {
1785
+ throw new BrowserError('No active page available for save_as_pdf.');
1786
+ }
1787
+ const paperKey = String(params.paper_format ?? 'Letter').toLowerCase();
1788
+ const paperSize = paperSizes[paperKey] ?? paperSizes.letter;
1789
+ const cdpSession = await browser_session.get_or_create_cdp_session?.(page);
1790
+ if (!cdpSession?.send) {
1791
+ throw new BrowserError('CDP session unavailable for save_as_pdf.');
1792
+ }
1793
+ const result = await cdpSession.send('Page.printToPDF', {
1794
+ printBackground: params.print_background,
1795
+ landscape: params.landscape,
1796
+ scale: params.scale,
1797
+ paperWidth: paperSize.width,
1798
+ paperHeight: paperSize.height,
1799
+ preferCSSPageSize: true,
1800
+ });
1801
+ const pdfData = result && typeof result.data === 'string' ? result.data : null;
1802
+ if (!pdfData) {
1803
+ throw new BrowserError('CDP Page.printToPDF returned no data.');
1804
+ }
1805
+ const fsInstance = file_system ?? new FileSystem(process.cwd(), false);
1806
+ let fileName = params.file_name?.trim();
1807
+ if (!fileName) {
1808
+ try {
1809
+ const titlePromise = typeof page.title === 'function'
1810
+ ? page.title()
1811
+ : Promise.resolve('');
1812
+ const pageTitle = await Promise.race([
1813
+ titlePromise,
1814
+ new Promise((_, reject) => {
1815
+ setTimeout(() => reject(new Error('timeout')), 2000);
1816
+ }),
1817
+ ]);
1818
+ const safeTitle = String(pageTitle)
1819
+ .replace(/[^\w\s-]+/g, '')
1820
+ .trim()
1821
+ .slice(0, 50);
1822
+ fileName = safeTitle || 'page';
1823
+ }
1824
+ catch {
1825
+ fileName = 'page';
1826
+ }
1827
+ }
1828
+ if (!fileName.toLowerCase().endsWith('.pdf')) {
1829
+ fileName = `${fileName}.pdf`;
1830
+ }
1831
+ fileName = FileSystem.sanitize_filename(fileName);
1832
+ const filePath = await resolveUniqueOutputPath(fsInstance.get_dir(), fileName);
1833
+ await fsp.writeFile(filePath, Buffer.from(pdfData, 'base64'));
1834
+ const fileSize = (await fsp.stat(filePath)).size;
1835
+ const baseName = path.basename(filePath);
1836
+ const msg = `Saved page as PDF: ${baseName} (${fileSize.toLocaleString()} bytes)`;
1837
+ return new ActionResult({
1838
+ extracted_content: msg,
1839
+ long_term_memory: `${msg}. Full path: ${filePath}`,
1840
+ attachments: [filePath],
1841
+ });
1842
+ });
2099
1843
  this.registry.action('Execute browser JavaScript on the current page and return the result.', { param_model: EvaluateActionSchema })(async function evaluate(params, { browser_session, signal }) {
2100
1844
  if (!browser_session)
2101
1845
  throw new Error('Browser session missing');
@@ -77,6 +77,14 @@ export declare const ScreenshotActionSchema: z.ZodObject<{
77
77
  file_name: z.ZodOptional<z.ZodString>;
78
78
  }, z.core.$strip>;
79
79
  export type ScreenshotAction = z.infer<typeof ScreenshotActionSchema>;
80
+ export declare const SaveAsPdfActionSchema: z.ZodObject<{
81
+ file_name: z.ZodOptional<z.ZodString>;
82
+ print_background: z.ZodDefault<z.ZodBoolean>;
83
+ landscape: z.ZodDefault<z.ZodBoolean>;
84
+ scale: z.ZodDefault<z.ZodNumber>;
85
+ paper_format: z.ZodDefault<z.ZodString>;
86
+ }, z.core.$strip>;
87
+ export type SaveAsPdfAction = z.infer<typeof SaveAsPdfActionSchema>;
80
88
  export declare const EvaluateActionSchema: z.ZodObject<{
81
89
  code: z.ZodString;
82
90
  }, z.core.$strip>;
@@ -90,6 +98,7 @@ export declare const ExtractStructuredDataActionSchema: z.ZodObject<{
90
98
  extract_links: z.ZodDefault<z.ZodBoolean>;
91
99
  start_from_char: z.ZodDefault<z.ZodNumber>;
92
100
  output_schema: z.ZodOptional<z.ZodNullable<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
101
+ already_collected: z.ZodDefault<z.ZodArray<z.ZodString>>;
93
102
  }, z.core.$strip>;
94
103
  export type ExtractStructuredDataAction = z.infer<typeof ExtractStructuredDataActionSchema>;
95
104
  export declare const SearchPageActionSchema: z.ZodObject<{
@@ -112,12 +121,6 @@ export declare const ReadFileActionSchema: z.ZodObject<{
112
121
  file_name: z.ZodString;
113
122
  }, z.core.$strip>;
114
123
  export type ReadFileAction = z.infer<typeof ReadFileActionSchema>;
115
- export declare const ReadLongContentActionSchema: z.ZodObject<{
116
- goal: z.ZodString;
117
- source: z.ZodDefault<z.ZodString>;
118
- context: z.ZodDefault<z.ZodString>;
119
- }, z.core.$strip>;
120
- export type ReadLongContentAction = z.infer<typeof ReadLongContentActionSchema>;
121
124
  export declare const WriteFileActionSchema: z.ZodObject<{
122
125
  file_name: z.ZodString;
123
126
  content: z.ZodString;
@@ -64,6 +64,13 @@ export const UploadFileActionSchema = z.object({
64
64
  export const ScreenshotActionSchema = z.object({
65
65
  file_name: z.string().optional(),
66
66
  });
67
+ export const SaveAsPdfActionSchema = z.object({
68
+ file_name: z.string().optional(),
69
+ print_background: z.boolean().default(true),
70
+ landscape: z.boolean().default(false),
71
+ scale: z.number().min(0.1).max(2.0).default(1.0),
72
+ paper_format: z.string().default('Letter'),
73
+ });
67
74
  export const EvaluateActionSchema = z.object({
68
75
  code: z.string(),
69
76
  });
@@ -75,6 +82,7 @@ export const ExtractStructuredDataActionSchema = z.object({
75
82
  extract_links: z.boolean().default(false),
76
83
  start_from_char: z.number().int().default(0),
77
84
  output_schema: z.record(z.string(), z.unknown()).nullable().optional(),
85
+ already_collected: z.array(z.string()).default([]),
78
86
  });
79
87
  export const SearchPageActionSchema = z.object({
80
88
  pattern: z.string(),
@@ -93,11 +101,6 @@ export const FindElementsActionSchema = z.object({
93
101
  export const ReadFileActionSchema = z.object({
94
102
  file_name: z.string(),
95
103
  });
96
- export const ReadLongContentActionSchema = z.object({
97
- goal: z.string(),
98
- source: z.string().default('page'),
99
- context: z.string().default(''),
100
- });
101
104
  export const WriteFileActionSchema = z.object({
102
105
  file_name: z.string(),
103
106
  content: z.string(),
@@ -343,17 +343,29 @@
343
343
  }
344
344
  }
345
345
 
346
- // // Add this function to perform cleanup when needed
347
- // function cleanupHighlights() {
348
- // if (window._highlightCleanupFunctions && window._highlightCleanupFunctions.length) {
349
- // window._highlightCleanupFunctions.forEach(fn => fn());
350
- // window._highlightCleanupFunctions = [];
351
- // }
352
-
353
- // // Also remove the container
354
- // const container = document.getElementById(HIGHLIGHT_CONTAINER_ID);
355
- // if (container) container.remove();
356
- // }
346
+ function cleanupHighlights() {
347
+ try {
348
+ const cleanupFns = Array.isArray(window._highlightCleanupFunctions)
349
+ ? window._highlightCleanupFunctions
350
+ : [];
351
+ for (const fn of cleanupFns) {
352
+ try {
353
+ if (typeof fn === 'function') {
354
+ fn();
355
+ }
356
+ } catch (error) {
357
+ // Ignore cleanup callback failures to keep extraction resilient.
358
+ }
359
+ }
360
+ window._highlightCleanupFunctions = [];
361
+ const container = document.getElementById(HIGHLIGHT_CONTAINER_ID);
362
+ if (container) {
363
+ container.remove();
364
+ }
365
+ } catch (error) {
366
+ // Ignore cleanup failures and continue with DOM extraction.
367
+ }
368
+ }
357
369
 
358
370
  /**
359
371
  * Gets the position of an element in its parent.
@@ -1391,6 +1403,7 @@
1391
1403
  return id;
1392
1404
  }
1393
1405
 
1406
+ cleanupHighlights();
1394
1407
  const rootId = buildDomTree(document.body);
1395
1408
 
1396
1409
  // Clear the cache before starting
@@ -638,7 +638,7 @@ export class FileSystem {
638
638
  truncationNote =
639
639
  `\n\n[Showing ${pagesIncluded.length} of ${numPages} pages. ` +
640
640
  `Skipped pages: [${skippedPreview}${skippedSuffix}]. ` +
641
- 'Use read_long_content with a specific goal to find relevant sections.]';
641
+ 'Use extract with start_from_char to read further into the file.]';
642
642
  }
643
643
  result.message =
644
644
  `Read from file ${filename} (${numPages} pages, ${totalChars.toLocaleString()} chars total).\n` +