@agent-infra/browser-context 0.1.6 → 0.2.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -32,7 +32,7 @@ await page.goto('https://example.com/article');
32
32
 
33
33
  // Extract content as Markdown
34
34
  const result = await extractContent(page);
35
- console.log(result.title); // Article title
35
+ console.log(result.title); // Article title
36
36
  console.log(result.content); // Clean Markdown content
37
37
 
38
38
  await browser.close();
@@ -43,16 +43,19 @@ await browser.close();
43
43
  Extract content from HTML strings:
44
44
 
45
45
  ```typescript
46
- import { extractWithDefuddle, extractWithReadability } from '@agent-infra/browser-context';
46
+ import {
47
+ extractWithDefuddle,
48
+ extractWithReadability,
49
+ } from '@agent-infra/browser-context';
47
50
 
48
51
  // Using Defuddle (primary method)
49
52
  const result1 = await extractWithDefuddle(htmlString, url, {
50
- markdown: true
53
+ markdown: true,
51
54
  });
52
55
 
53
56
  // Using Readability (fallback method)
54
57
  const result2 = await extractWithReadability(page, {
55
- markdown: true
58
+ markdown: true,
56
59
  });
57
60
  ```
58
61
 
@@ -65,21 +68,24 @@ import { toMarkdown } from '@agent-infra/browser-context';
65
68
 
66
69
  const html = '<h1>Title</h1><p>Content with <strong>bold</strong> text</p>';
67
70
  const markdown = toMarkdown(html, {
68
- gfmExtension: true, // Enable GitHub Flavored Markdown
69
- codeBlockStyle: 'fenced', // Use fenced code blocks
70
- headingStyle: 'atx' // Use # style headings
71
+ gfmExtension: true, // Enable GitHub Flavored Markdown
72
+ codeBlockStyle: 'fenced', // Use fenced code blocks
73
+ headingStyle: 'atx', // Use # style headings
71
74
  });
72
75
 
73
76
  console.log(markdown);
74
77
  // # Title
75
- //
78
+ //
76
79
  // Content with **bold** text
77
80
  ```
78
81
 
79
82
  ### Advanced HTML to Markdown Options
80
83
 
81
84
  ```typescript
82
- import { toMarkdown, DEFAULT_TAGS_TO_REMOVE } from '@agent-infra/browser-context';
85
+ import {
86
+ toMarkdown,
87
+ DEFAULT_TAGS_TO_REMOVE,
88
+ } from '@agent-infra/browser-context';
83
89
 
84
90
  const options = {
85
91
  gfmExtension: true,
@@ -87,7 +93,7 @@ const options = {
87
93
  headingStyle: 'atx' as const,
88
94
  emDelimiter: '*',
89
95
  strongDelimiter: '**',
90
- removeTags: [...DEFAULT_TAGS_TO_REMOVE, 'footer', 'nav'] // Remove additional tags
96
+ removeTags: [...DEFAULT_TAGS_TO_REMOVE, 'footer', 'nav'], // Remove additional tags
91
97
  };
92
98
 
93
99
  const markdown = toMarkdown(htmlContent, options);
@@ -100,9 +106,11 @@ const markdown = toMarkdown(htmlContent, options);
100
106
  Main extraction function that automatically tries Defuddle first, then falls back to Readability.
101
107
 
102
108
  **Parameters:**
109
+
103
110
  - `page`: Puppeteer page instance
104
111
 
105
112
  **Returns:**
113
+
106
114
  - `Promise<{title: string, content: string}>`: Extracted title and Markdown content
107
115
 
108
116
  ### `extractWithDefuddle(html: string, url: string, options: DefuddleOptions)`
@@ -110,6 +118,7 @@ Main extraction function that automatically tries Defuddle first, then falls bac
110
118
  Extract content using the Defuddle library.
111
119
 
112
120
  **Parameters:**
121
+
113
122
  - `html`: HTML content string
114
123
  - `url`: Page URL
115
124
  - `options`: Defuddle configuration options
@@ -119,6 +128,7 @@ Extract content using the Defuddle library.
119
128
  Extract content using Mozilla's Readability algorithm.
120
129
 
121
130
  **Parameters:**
131
+
122
132
  - `page`: Puppeteer page instance
123
133
  - `options.markdown`: Whether to convert to Markdown (default: false)
124
134
 
@@ -127,10 +137,12 @@ Extract content using Mozilla's Readability algorithm.
127
137
  Convert HTML to Markdown format.
128
138
 
129
139
  **Parameters:**
140
+
130
141
  - `html`: HTML content string
131
142
  - `options`: Conversion options
132
143
 
133
144
  **ToMarkdownOptions:**
145
+
134
146
  - `gfmExtension`: Enable GitHub Flavored Markdown (default: true)
135
147
  - `removeTags`: Array of HTML tags to remove
136
148
  - `codeBlockStyle`: 'indented' | 'fenced'
@@ -161,14 +173,17 @@ You can customize this list using the `removeTags` option.
161
173
  ## Browser Compatibility
162
174
 
163
175
  This library is designed to work with:
176
+
164
177
  - Puppeteer
165
178
  - Playwright
166
179
  - Any browser automation tool that provides a Page-like interface
167
180
 
168
181
  ## License
169
182
 
170
- Apache-2.0
183
+ Apache License 2.0.
184
+
185
+ ## Credits
171
186
 
172
- ## Contributing
187
+ Special thanks to the open source projects that inspired this toolkit:
173
188
 
174
- Contributions are welcome! Please feel free to submit a Pull Request.
189
+ - [readability](https://github.com/mozilla/readability/) - A standalone version of the readability lib
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/content/index.ts"],"names":[],"mappings":"AAAA,cAAc,sBAAsB,CAAA;AACpC,cAAc,kBAAkB,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/content/index.ts"],"names":[],"mappings":"AAIA,cAAc,sBAAsB,CAAC;AACrC,cAAc,kBAAkB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"to-markdown.d.ts","sourceRoot":"","sources":["../../src/content/to-markdown.ts"],"names":[],"mappings":"AAKA,OAAO,QAAQ,EAAE,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAG7C,eAAO,MAAM,sBAAsB,EAAE,OAAO,EAc3C,CAAC;AAEF,MAAM,WAAW,iBAAkB,SAAQ,QAAQ,CAAC,OAAO;IACzD,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,UAAU,CAAC,EAAE,OAAO,EAAE,CAAC;CACxB;AAED;;;;;GAKG;AACH,wBAAgB,UAAU,CACxB,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,iBAAsB,GAC9B,MAAM,CAgCR"}
1
+ {"version":3,"file":"to-markdown.d.ts","sourceRoot":"","sources":["../../src/content/to-markdown.ts"],"names":[],"mappings":"AAIA,OAAO,QAAQ,EAAE,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAG7C,eAAO,MAAM,sBAAsB,EAAE,OAAO,EAc3C,CAAC;AAEF,MAAM,WAAW,iBAAkB,SAAQ,QAAQ,CAAC,OAAO;IACzD,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,UAAU,CAAC,EAAE,OAAO,EAAE,CAAC;CACxB;AAED;;;;;GAKG;AACH,wBAAgB,UAAU,CACxB,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,iBAAsB,GAC9B,MAAM,CAgCR"}
@@ -1 +1 @@
1
- {"version":3,"file":"content/to-markdown.js","sources":["webpack://@agent-infra/browser-context/webpack/runtime/compat_get_default_export","webpack://@agent-infra/browser-context/webpack/runtime/define_property_getters","webpack://@agent-infra/browser-context/webpack/runtime/has_own_property","webpack://@agent-infra/browser-context/webpack/runtime/make_namespace_object","webpack://@agent-infra/browser-context/./src/content/to-markdown.ts"],"sourcesContent":["// getDefaultExport function for compatibility with non-ESM modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};\n","__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","\n/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\nimport Turndown, { TagName } from 'turndown';\nimport { gfm } from 'turndown-plugin-gfm';\n\nexport const DEFAULT_TAGS_TO_REMOVE: TagName[] = [\n 'script',\n 'style',\n 'link',\n 'head',\n 'iframe',\n 'video',\n 'audio',\n 'canvas',\n 'object',\n 'embed',\n 'noscript',\n 'aside',\n 'dialog',\n];\n\nexport interface ToMarkdownOptions extends Turndown.Options {\n gfmExtension?: boolean;\n removeTags?: TagName[];\n}\n\n/**\n * Convert HTML content to Markdown format\n * @param html HTML string\n * @param options Conversion options\n * @returns Markdown string\n */\nexport function toMarkdown(\n html: string,\n options: ToMarkdownOptions = {},\n): string {\n if (!html) return '';\n\n try {\n const {\n codeBlockStyle = 'fenced',\n headingStyle = 'atx',\n emDelimiter = '*',\n strongDelimiter = '**',\n gfmExtension = true,\n removeTags = DEFAULT_TAGS_TO_REMOVE,\n } = options;\n\n const turndown = new Turndown({\n codeBlockStyle,\n headingStyle,\n emDelimiter,\n strongDelimiter,\n });\n\n // issue: https://github.com/mixmark-io/turndown/issues/210#issuecomment-353666857\n turndown.remove(removeTags);\n\n if (gfmExtension) {\n turndown.use(gfm);\n }\n\n return turndown.turndown(html);\n } catch (error) {\n console.error('HTML to Markdown conversion failed:', error);\n return html;\n }\n}\n"],"names":["__webpack_require__","module","getter","definition","key","Object","obj","prop","Symbol","DEFAULT_TAGS_TO_REMOVE","toMarkdown","html","options","codeBlockStyle","headingStyle","emDelimiter","strongDelimiter","gfmExtension","removeTags","turndown","Turndown","gfm","error","console"],"mappings":";;;;;;;IACAA,oBAAoB,CAAC,GAAG,CAACC;QACxB,IAAIC,SAASD,UAAUA,OAAO,UAAU,GACvC,IAAOA,MAAM,CAAC,UAAU,GACxB,IAAOA;QACRD,oBAAoB,CAAC,CAACE,QAAQ;YAAE,GAAGA;QAAO;QAC1C,OAAOA;IACR;;;ICPAF,oBAAoB,CAAC,GAAG,CAAC,UAASG;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGH,oBAAoB,CAAC,CAACG,YAAYC,QAAQ,CAACJ,oBAAoB,CAAC,CAAC,UAASI,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAJ,oBAAoB,CAAC,GAAG,CAACM,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFP,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOQ,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;ACEO,MAAMI,yBAAoC;IAC/C;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;CACD;AAaM,SAASC,WACdC,IAAY,EACZC,UAA6B,CAAC,CAAC;IAE/B,IAAI,CAACD,MAAM,OAAO;IAElB,IAAI;QACF,MAAM,EACJE,iBAAiB,QAAQ,EACzBC,eAAe,KAAK,EACpBC,cAAc,GAAG,EACjBC,kBAAkB,IAAI,EACtBC,eAAe,IAAI,EACnBC,aAAaT,sBAAsB,EACpC,GAAGG;QAEJ,MAAMO,WAAW,IAAIC,CAAAA,2BAAAA,EAAS;YAC5BP;YACAC;YACAC;YACAC;QACF;QAGAG,SAAS,MAAM,CAACD;QAEhB,IAAID,cACFE,SAAS,GAAG,CAACE,6CAAAA,GAAGA;QAGlB,OAAOF,SAAS,QAAQ,CAACR;IAC3B,EAAE,OAAOW,OAAO;QACdC,QAAQ,KAAK,CAAC,uCAAuCD;QACrD,OAAOX;IACT;AACF"}
1
+ {"version":3,"file":"content/to-markdown.js","sources":["webpack://@agent-infra/browser-context/webpack/runtime/compat_get_default_export","webpack://@agent-infra/browser-context/webpack/runtime/define_property_getters","webpack://@agent-infra/browser-context/webpack/runtime/has_own_property","webpack://@agent-infra/browser-context/webpack/runtime/make_namespace_object","webpack://@agent-infra/browser-context/./src/content/to-markdown.ts"],"sourcesContent":["// getDefaultExport function for compatibility with non-ESM modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};\n","__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\nimport Turndown, { TagName } from 'turndown';\nimport { gfm } from 'turndown-plugin-gfm';\n\nexport const DEFAULT_TAGS_TO_REMOVE: TagName[] = [\n 'script',\n 'style',\n 'link',\n 'head',\n 'iframe',\n 'video',\n 'audio',\n 'canvas',\n 'object',\n 'embed',\n 'noscript',\n 'aside',\n 'dialog',\n];\n\nexport interface ToMarkdownOptions extends Turndown.Options {\n gfmExtension?: boolean;\n removeTags?: TagName[];\n}\n\n/**\n * Convert HTML content to Markdown format\n * @param html HTML string\n * @param options Conversion options\n * @returns Markdown string\n */\nexport function toMarkdown(\n html: string,\n options: ToMarkdownOptions = {},\n): string {\n if (!html) return '';\n\n try {\n const {\n codeBlockStyle = 'fenced',\n headingStyle = 'atx',\n emDelimiter = '*',\n strongDelimiter = '**',\n gfmExtension = true,\n removeTags = DEFAULT_TAGS_TO_REMOVE,\n } = options;\n\n const turndown = new Turndown({\n codeBlockStyle,\n headingStyle,\n emDelimiter,\n strongDelimiter,\n });\n\n // issue: https://github.com/mixmark-io/turndown/issues/210#issuecomment-353666857\n turndown.remove(removeTags);\n\n if (gfmExtension) {\n turndown.use(gfm);\n }\n\n return turndown.turndown(html);\n } catch (error) {\n console.error('HTML to Markdown conversion failed:', error);\n return html;\n }\n}\n"],"names":["__webpack_require__","module","getter","definition","key","Object","obj","prop","Symbol","DEFAULT_TAGS_TO_REMOVE","toMarkdown","html","options","codeBlockStyle","headingStyle","emDelimiter","strongDelimiter","gfmExtension","removeTags","turndown","Turndown","gfm","error","console"],"mappings":";;;;;;;IACAA,oBAAoB,CAAC,GAAG,CAACC;QACxB,IAAIC,SAASD,UAAUA,OAAO,UAAU,GACvC,IAAOA,MAAM,CAAC,UAAU,GACxB,IAAOA;QACRD,oBAAoB,CAAC,CAACE,QAAQ;YAAE,GAAGA;QAAO;QAC1C,OAAOA;IACR;;;ICPAF,oBAAoB,CAAC,GAAG,CAAC,UAASG;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGH,oBAAoB,CAAC,CAACG,YAAYC,QAAQ,CAACJ,oBAAoB,CAAC,CAAC,UAASI,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAJ,oBAAoB,CAAC,GAAG,CAACM,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFP,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOQ,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;ACCO,MAAMI,yBAAoC;IAC/C;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;CACD;AAaM,SAASC,WACdC,IAAY,EACZC,UAA6B,CAAC,CAAC;IAE/B,IAAI,CAACD,MAAM,OAAO;IAElB,IAAI;QACF,MAAM,EACJE,iBAAiB,QAAQ,EACzBC,eAAe,KAAK,EACpBC,cAAc,GAAG,EACjBC,kBAAkB,IAAI,EACtBC,eAAe,IAAI,EACnBC,aAAaT,sBAAsB,EACpC,GAAGG;QAEJ,MAAMO,WAAW,IAAIC,CAAAA,2BAAAA,EAAS;YAC5BP;YACAC;YACAC;YACAC;QACF;QAGAG,SAAS,MAAM,CAACD;QAEhB,IAAID,cACFE,SAAS,GAAG,CAACE,6CAAAA,GAAGA;QAGlB,OAAOF,SAAS,QAAQ,CAACR;IAC3B,EAAE,OAAOW,OAAO;QACdC,QAAQ,KAAK,CAAC,uCAAuCD;QACrD,OAAOX;IACT;AACF"}
@@ -1 +1 @@
1
- {"version":3,"file":"content/to-markdown.mjs","sources":["webpack://@agent-infra/browser-context/./src/content/to-markdown.ts"],"sourcesContent":["\n/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\nimport Turndown, { TagName } from 'turndown';\nimport { gfm } from 'turndown-plugin-gfm';\n\nexport const DEFAULT_TAGS_TO_REMOVE: TagName[] = [\n 'script',\n 'style',\n 'link',\n 'head',\n 'iframe',\n 'video',\n 'audio',\n 'canvas',\n 'object',\n 'embed',\n 'noscript',\n 'aside',\n 'dialog',\n];\n\nexport interface ToMarkdownOptions extends Turndown.Options {\n gfmExtension?: boolean;\n removeTags?: TagName[];\n}\n\n/**\n * Convert HTML content to Markdown format\n * @param html HTML string\n * @param options Conversion options\n * @returns Markdown string\n */\nexport function toMarkdown(\n html: string,\n options: ToMarkdownOptions = {},\n): string {\n if (!html) return '';\n\n try {\n const {\n codeBlockStyle = 'fenced',\n headingStyle = 'atx',\n emDelimiter = '*',\n strongDelimiter = '**',\n gfmExtension = true,\n removeTags = DEFAULT_TAGS_TO_REMOVE,\n } = options;\n\n const turndown = new Turndown({\n codeBlockStyle,\n headingStyle,\n emDelimiter,\n strongDelimiter,\n });\n\n // issue: https://github.com/mixmark-io/turndown/issues/210#issuecomment-353666857\n turndown.remove(removeTags);\n\n if (gfmExtension) {\n turndown.use(gfm);\n }\n\n return turndown.turndown(html);\n } catch (error) {\n console.error('HTML to Markdown conversion failed:', error);\n return html;\n }\n}\n"],"names":["DEFAULT_TAGS_TO_REMOVE","toMarkdown","html","options","codeBlockStyle","headingStyle","emDelimiter","strongDelimiter","gfmExtension","removeTags","turndown","Turndown","gfm","error","console"],"mappings":";;;;;;AAQO,MAAMA,yBAAoC;IAC/C;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;CACD;AAaM,SAASC,WACdC,IAAY,EACZC,UAA6B,CAAC,CAAC;IAE/B,IAAI,CAACD,MAAM,OAAO;IAElB,IAAI;QACF,MAAM,EACJE,iBAAiB,QAAQ,EACzBC,eAAe,KAAK,EACpBC,cAAc,GAAG,EACjBC,kBAAkB,IAAI,EACtBC,eAAe,IAAI,EACnBC,aAAaT,sBAAsB,EACpC,GAAGG;QAEJ,MAAMO,WAAW,IAAIC,WAAS;YAC5BP;YACAC;YACAC;YACAC;QACF;QAGAG,SAAS,MAAM,CAACD;QAEhB,IAAID,cACFE,SAAS,GAAG,CAACE;QAGf,OAAOF,SAAS,QAAQ,CAACR;IAC3B,EAAE,OAAOW,OAAO;QACdC,QAAQ,KAAK,CAAC,uCAAuCD;QACrD,OAAOX;IACT;AACF"}
1
+ {"version":3,"file":"content/to-markdown.mjs","sources":["webpack://@agent-infra/browser-context/./src/content/to-markdown.ts"],"sourcesContent":["/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\nimport Turndown, { TagName } from 'turndown';\nimport { gfm } from 'turndown-plugin-gfm';\n\nexport const DEFAULT_TAGS_TO_REMOVE: TagName[] = [\n 'script',\n 'style',\n 'link',\n 'head',\n 'iframe',\n 'video',\n 'audio',\n 'canvas',\n 'object',\n 'embed',\n 'noscript',\n 'aside',\n 'dialog',\n];\n\nexport interface ToMarkdownOptions extends Turndown.Options {\n gfmExtension?: boolean;\n removeTags?: TagName[];\n}\n\n/**\n * Convert HTML content to Markdown format\n * @param html HTML string\n * @param options Conversion options\n * @returns Markdown string\n */\nexport function toMarkdown(\n html: string,\n options: ToMarkdownOptions = {},\n): string {\n if (!html) return '';\n\n try {\n const {\n codeBlockStyle = 'fenced',\n headingStyle = 'atx',\n emDelimiter = '*',\n strongDelimiter = '**',\n gfmExtension = true,\n removeTags = DEFAULT_TAGS_TO_REMOVE,\n } = options;\n\n const turndown = new Turndown({\n codeBlockStyle,\n headingStyle,\n emDelimiter,\n strongDelimiter,\n });\n\n // issue: https://github.com/mixmark-io/turndown/issues/210#issuecomment-353666857\n turndown.remove(removeTags);\n\n if (gfmExtension) {\n turndown.use(gfm);\n }\n\n return turndown.turndown(html);\n } catch (error) {\n console.error('HTML to Markdown conversion failed:', error);\n return html;\n }\n}\n"],"names":["DEFAULT_TAGS_TO_REMOVE","toMarkdown","html","options","codeBlockStyle","headingStyle","emDelimiter","strongDelimiter","gfmExtension","removeTags","turndown","Turndown","gfm","error","console"],"mappings":";;;;;;AAOO,MAAMA,yBAAoC;IAC/C;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;CACD;AAaM,SAASC,WACdC,IAAY,EACZC,UAA6B,CAAC,CAAC;IAE/B,IAAI,CAACD,MAAM,OAAO;IAElB,IAAI;QACF,MAAM,EACJE,iBAAiB,QAAQ,EACzBC,eAAe,KAAK,EACpBC,cAAc,GAAG,EACjBC,kBAAkB,IAAI,EACtBC,eAAe,IAAI,EACnBC,aAAaT,sBAAsB,EACpC,GAAGG;QAEJ,MAAMO,WAAW,IAAIC,WAAS;YAC5BP;YACAC;YACAC;YACAC;QACF;QAGAG,SAAS,MAAM,CAACD;QAEhB,IAAID,cACFE,SAAS,GAAG,CAACE;QAGf,OAAOF,SAAS,QAAQ,CAACR;IAC3B,EAAE,OAAOW,OAAO;QACdC,QAAQ,KAAK,CAAC,uCAAuCD;QACrD,OAAOX;IACT;AACF"}
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAIA,cAAc,oBAAoB,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAIA,cAAc,oBAAoB,CAAC"}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@agent-infra/browser-context",
3
3
  "description": "get browser context for AI Agent",
4
- "version": "0.1.6",
4
+ "version": "0.2.0-alpha.3",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
7
7
  "types": "dist/index.d.ts",
@@ -23,18 +23,18 @@
23
23
  "access": "public"
24
24
  },
25
25
  "dependencies": {
26
- "defuddle": "0.6.4",
27
- "puppeteer-core": "24.15.0",
28
- "turndown": "7.2.0",
26
+ "defuddle": "0.6.6",
27
+ "puppeteer-core": "24.23.0",
28
+ "turndown": "7.2.1",
29
29
  "turndown-plugin-gfm": "1.0.2"
30
30
  },
31
31
  "devDependencies": {
32
32
  "@types/turndown": "5.0.5",
33
- "@types/node": "24.1.0",
34
- "typescript": "5.8.3",
33
+ "@types/node": "24.7.1",
34
+ "typescript": "5.9.3",
35
35
  "vitest": "3.2.4",
36
36
  "@vitest/coverage-v8": "3.2.4",
37
- "@rslib/core": "0.11.0"
37
+ "@rslib/core": "0.15.0"
38
38
  },
39
39
  "scripts": {
40
40
  "dev": "rslib build --watch",