webpeel 0.12.0 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/README.md +82 -9
  2. package/dist/cli.js +97 -6
  3. package/dist/cli.js.map +1 -1
  4. package/dist/core/actions.d.ts +28 -0
  5. package/dist/core/actions.d.ts.map +1 -1
  6. package/dist/core/actions.js +60 -0
  7. package/dist/core/actions.js.map +1 -1
  8. package/dist/core/bm25-filter.d.ts +10 -0
  9. package/dist/core/bm25-filter.d.ts.map +1 -1
  10. package/dist/core/bm25-filter.js +40 -0
  11. package/dist/core/bm25-filter.js.map +1 -1
  12. package/dist/core/content-pruner.d.ts +12 -5
  13. package/dist/core/content-pruner.d.ts.map +1 -1
  14. package/dist/core/content-pruner.js +247 -190
  15. package/dist/core/content-pruner.js.map +1 -1
  16. package/dist/core/research.d.ts +67 -0
  17. package/dist/core/research.d.ts.map +1 -0
  18. package/dist/core/research.js +254 -0
  19. package/dist/core/research.js.map +1 -0
  20. package/dist/index.d.ts.map +1 -1
  21. package/dist/index.js +37 -3
  22. package/dist/index.js.map +1 -1
  23. package/dist/mcp/server.js +107 -2
  24. package/dist/mcp/server.js.map +1 -1
  25. package/dist/server/app.d.ts +14 -0
  26. package/dist/server/app.d.ts.map +1 -0
  27. package/dist/server/app.js +189 -0
  28. package/dist/server/app.js.map +1 -0
  29. package/dist/server/auth-store.d.ts +28 -0
  30. package/dist/server/auth-store.d.ts.map +1 -0
  31. package/dist/server/auth-store.js +89 -0
  32. package/dist/server/auth-store.js.map +1 -0
  33. package/dist/server/job-queue.d.ts +93 -0
  34. package/dist/server/job-queue.d.ts.map +1 -0
  35. package/dist/server/job-queue.js +144 -0
  36. package/dist/server/job-queue.js.map +1 -0
  37. package/dist/server/middleware/auth.d.ts +28 -0
  38. package/dist/server/middleware/auth.d.ts.map +1 -0
  39. package/dist/server/middleware/auth.js +183 -0
  40. package/dist/server/middleware/auth.js.map +1 -0
  41. package/dist/server/middleware/rate-limit.d.ts +23 -0
  42. package/dist/server/middleware/rate-limit.d.ts.map +1 -0
  43. package/dist/server/middleware/rate-limit.js +126 -0
  44. package/dist/server/middleware/rate-limit.js.map +1 -0
  45. package/dist/server/middleware/url-validator.d.ts +16 -0
  46. package/dist/server/middleware/url-validator.d.ts.map +1 -0
  47. package/dist/server/middleware/url-validator.js +187 -0
  48. package/dist/server/middleware/url-validator.js.map +1 -0
  49. package/dist/server/pg-auth-store.d.ts +129 -0
  50. package/dist/server/pg-auth-store.d.ts.map +1 -0
  51. package/dist/server/pg-auth-store.js +457 -0
  52. package/dist/server/pg-auth-store.js.map +1 -0
  53. package/dist/server/pg-job-queue.d.ts +60 -0
  54. package/dist/server/pg-job-queue.d.ts.map +1 -0
  55. package/dist/server/pg-job-queue.js +365 -0
  56. package/dist/server/pg-job-queue.js.map +1 -0
  57. package/dist/server/premium/domain-intel.d.ts +17 -0
  58. package/dist/server/premium/domain-intel.d.ts.map +1 -0
  59. package/dist/server/premium/domain-intel.js +134 -0
  60. package/dist/server/premium/domain-intel.js.map +1 -0
  61. package/dist/server/premium/index.d.ts +18 -0
  62. package/dist/server/premium/index.d.ts.map +1 -0
  63. package/dist/server/premium/index.js +36 -0
  64. package/dist/server/premium/index.js.map +1 -0
  65. package/dist/server/premium/swr-cache.d.ts +15 -0
  66. package/dist/server/premium/swr-cache.d.ts.map +1 -0
  67. package/dist/server/premium/swr-cache.js +35 -0
  68. package/dist/server/premium/swr-cache.js.map +1 -0
  69. package/dist/server/routes/activity.d.ts +7 -0
  70. package/dist/server/routes/activity.d.ts.map +1 -0
  71. package/dist/server/routes/activity.js +66 -0
  72. package/dist/server/routes/activity.js.map +1 -0
  73. package/dist/server/routes/agent.d.ts +12 -0
  74. package/dist/server/routes/agent.d.ts.map +1 -0
  75. package/dist/server/routes/agent.js +356 -0
  76. package/dist/server/routes/agent.js.map +1 -0
  77. package/dist/server/routes/answer.d.ts +6 -0
  78. package/dist/server/routes/answer.d.ts.map +1 -0
  79. package/dist/server/routes/answer.js +124 -0
  80. package/dist/server/routes/answer.js.map +1 -0
  81. package/dist/server/routes/batch.d.ts +7 -0
  82. package/dist/server/routes/batch.d.ts.map +1 -0
  83. package/dist/server/routes/batch.js +287 -0
  84. package/dist/server/routes/batch.js.map +1 -0
  85. package/dist/server/routes/cli-usage.d.ts +7 -0
  86. package/dist/server/routes/cli-usage.d.ts.map +1 -0
  87. package/dist/server/routes/cli-usage.js +121 -0
  88. package/dist/server/routes/cli-usage.js.map +1 -0
  89. package/dist/server/routes/compat.d.ts +24 -0
  90. package/dist/server/routes/compat.d.ts.map +1 -0
  91. package/dist/server/routes/compat.js +651 -0
  92. package/dist/server/routes/compat.js.map +1 -0
  93. package/dist/server/routes/extract.d.ts +9 -0
  94. package/dist/server/routes/extract.d.ts.map +1 -0
  95. package/dist/server/routes/extract.js +121 -0
  96. package/dist/server/routes/extract.js.map +1 -0
  97. package/dist/server/routes/fetch.d.ts +7 -0
  98. package/dist/server/routes/fetch.d.ts.map +1 -0
  99. package/dist/server/routes/fetch.js +537 -0
  100. package/dist/server/routes/fetch.js.map +1 -0
  101. package/dist/server/routes/health.d.ts +8 -0
  102. package/dist/server/routes/health.d.ts.map +1 -0
  103. package/dist/server/routes/health.js +36 -0
  104. package/dist/server/routes/health.js.map +1 -0
  105. package/dist/server/routes/jobs.d.ts +8 -0
  106. package/dist/server/routes/jobs.d.ts.map +1 -0
  107. package/dist/server/routes/jobs.js +374 -0
  108. package/dist/server/routes/jobs.js.map +1 -0
  109. package/dist/server/routes/mcp.d.ts +16 -0
  110. package/dist/server/routes/mcp.d.ts.map +1 -0
  111. package/dist/server/routes/mcp.js +475 -0
  112. package/dist/server/routes/mcp.js.map +1 -0
  113. package/dist/server/routes/oauth.d.ts +10 -0
  114. package/dist/server/routes/oauth.d.ts.map +1 -0
  115. package/dist/server/routes/oauth.js +296 -0
  116. package/dist/server/routes/oauth.js.map +1 -0
  117. package/dist/server/routes/screenshot.d.ts +10 -0
  118. package/dist/server/routes/screenshot.d.ts.map +1 -0
  119. package/dist/server/routes/screenshot.js +217 -0
  120. package/dist/server/routes/screenshot.js.map +1 -0
  121. package/dist/server/routes/search.d.ts +7 -0
  122. package/dist/server/routes/search.d.ts.map +1 -0
  123. package/dist/server/routes/search.js +287 -0
  124. package/dist/server/routes/search.js.map +1 -0
  125. package/dist/server/routes/stats.d.ts +7 -0
  126. package/dist/server/routes/stats.d.ts.map +1 -0
  127. package/dist/server/routes/stats.js +65 -0
  128. package/dist/server/routes/stats.js.map +1 -0
  129. package/dist/server/routes/stripe.d.ts +9 -0
  130. package/dist/server/routes/stripe.d.ts.map +1 -0
  131. package/dist/server/routes/stripe.js +233 -0
  132. package/dist/server/routes/stripe.js.map +1 -0
  133. package/dist/server/routes/users.d.ts +9 -0
  134. package/dist/server/routes/users.d.ts.map +1 -0
  135. package/dist/server/routes/users.js +954 -0
  136. package/dist/server/routes/users.js.map +1 -0
  137. package/dist/server/routes/webhooks.d.ts +15 -0
  138. package/dist/server/routes/webhooks.d.ts.map +1 -0
  139. package/dist/server/routes/webhooks.js +73 -0
  140. package/dist/server/routes/webhooks.js.map +1 -0
  141. package/dist/server/sentry.d.ts +14 -0
  142. package/dist/server/sentry.d.ts.map +1 -0
  143. package/dist/server/sentry.js +39 -0
  144. package/dist/server/sentry.js.map +1 -0
  145. package/dist/types.d.ts +13 -0
  146. package/dist/types.d.ts.map +1 -1
  147. package/dist/types.js.map +1 -1
  148. package/package.json +3 -2
@@ -1,240 +1,297 @@
1
1
  /**
2
2
  * Content Density Pruner
3
3
  *
4
- * Scores HTML block elements by text quality and removes low-value blocks
5
- * (sidebars, footers, navigation, ads) that CSS selectors miss.
4
+ * Two-pass pruning to reduce HTML before markdown conversion:
6
5
  *
7
- * Inspired by Crawl4AI's fit_markdown approach typical 40-60% token savings.
6
+ * Pass 1 Semantic removal: strip elements whose tag or class/id clearly
7
+ * mark them as page chrome (nav, footer, sidebar, cookie banners, ads).
8
+ *
9
+ * Pass 2 — Density scoring: score remaining block elements by text density,
10
+ * link density, tag importance, and word count. Remove low-scorers.
11
+ *
12
+ * Inspired by Crawl4AI's PruningContentFilter — targets 40-60% token savings.
8
13
  */
9
14
  import * as cheerio from 'cheerio';
10
- /** Block-level elements we score */
11
- const BLOCK_ELEMENTS = new Set([
12
- 'div', 'section', 'article', 'aside', 'nav', 'footer', 'header',
13
- 'main', 'p', 'ul', 'ol', 'table', 'blockquote', 'figure', 'form', 'details',
15
+ // -----------------------------------------------------------------------
16
+ // Pass 1 Semantic removal: tags and class/id patterns
17
+ // -----------------------------------------------------------------------
18
+ /** Tags that are almost always page chrome, not article content. */
19
+ const CHROME_TAGS = new Set([
20
+ 'nav', 'footer', 'aside', 'noscript',
14
21
  ]);
15
22
  /**
16
- * Elements that should NEVER be removed — they are content containers.
17
- * Scoring them would be wrong: if we remove <main>, we lose everything.
23
+ * Class/id patterns that indicate page chrome.
24
+ * Tested against lowercased class/id strings.
18
25
  */
19
- const PROTECTED_ELEMENTS = new Set(['main', 'article', 'body']);
26
+ const CHROME_PATTERNS = [
27
+ /\bsidebar\b/,
28
+ /\bcookie/,
29
+ /\bbanner\b/,
30
+ /\b(ad|ads|advert)\b/,
31
+ /\bpopup\b/,
32
+ /\bmodal\b/,
33
+ /\boverlay\b/,
34
+ /\bsocial/,
35
+ /\bshare\b/,
36
+ /\bbreadcrumb/,
37
+ /\bskip-?link/,
38
+ /\bfootnote/,
39
+ /\brelated-?(post|article)/,
40
+ /\bnewsletter/,
41
+ /\bsubscri/,
42
+ /\bcomment/,
43
+ /\b(sign-?up|sign-?in|log-?in)\b/,
44
+ /\btoc\b/,
45
+ /\btable-?of-?contents\b/,
46
+ /\bgdpr\b/,
47
+ /\bconsent\b/,
48
+ ];
20
49
  /**
21
- * Tag importance scores (-2 to +3).
22
- * These reflect semantic value of the element type.
50
+ * Tags we never remove (they likely wrap main content).
51
+ * We recurse into them but never strip the element itself.
23
52
  */
24
- const TAG_IMPORTANCE = {
25
- article: 3,
26
- main: 3,
27
- p: 2,
28
- h1: 2, h2: 2, h3: 2, h4: 2, h5: 2, h6: 2,
29
- blockquote: 2,
30
- pre: 2,
31
- code: 2,
32
- figure: 2,
33
- figcaption: 2,
34
- section: 1,
35
- td: 1,
36
- th: 1,
37
- li: 1,
38
- dd: 1,
39
- dt: 1,
40
- div: 0,
41
- span: 0,
42
- aside: -1,
43
- header: -1,
44
- form: -1,
45
- nav: -2,
46
- footer: -2,
47
- };
48
- /** Normalize tag importance (-2..+3) to 0..1 range */
49
- function normalizeTagScore(rawScore) {
50
- // Range is 5 units (-2 to +3), shift by +2 and divide
51
- return (rawScore + 2) / 5;
52
- }
53
- function getTagImportance(tagName) {
54
- return TAG_IMPORTANCE[tagName.toLowerCase()] ?? 0;
55
- }
56
- /** Word count bonus using log scale (0-1) */
57
- function wordCountBonus(text) {
58
- const words = text.trim().split(/\s+/).filter((w) => w.length > 0);
59
- if (words.length === 0)
60
- return 0;
61
- return Math.min(Math.log(words.length + 1) / Math.log(1000), 1.0);
62
- }
53
+ const PROTECTED_TAGS = new Set(['main', 'article', 'body']);
63
54
  /**
64
- * Position weight based on normalized position in document (0-1).
65
- * Middle 60% of the page (0.2–0.8) scores 1.0.
66
- * Top/bottom 20% scores linearly from 0 to 1.
55
+ * Tags we never remove during density scoring (Pass 2).
56
+ * Headings, paragraphs, and semantic content elements should survive
57
+ * even if they're small they carry essential meaning.
67
58
  */
68
- function positionWeight(normalizedPos) {
69
- if (normalizedPos >= 0.2 && normalizedPos <= 0.8)
70
- return 1.0;
71
- if (normalizedPos < 0.2)
72
- return normalizedPos / 0.2;
73
- // normalizedPos > 0.8
74
- return (1.0 - normalizedPos) / 0.2;
75
- }
76
- /** Max HTML length for a "leaf" block — blocks larger than this are recursed into */
77
- const MAX_LEAF_BLOCK_HTML = 5000;
59
+ const DENSITY_SAFE_TAGS = new Set([
60
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
61
+ 'p', 'pre', 'code', 'blockquote', 'figcaption',
62
+ 'main', 'article', 'body',
63
+ ]);
78
64
  /**
79
- * Score a single element and return its BlockData.
65
+ * Class/id patterns that protect an element from removal.
80
66
  */
81
- function scoreElement($, el) {
67
+ const CONTENT_PATTERNS = [
68
+ /\barticle/,
69
+ /\bpost-?content/,
70
+ /\bentry-?content/,
71
+ /\bmain-?content/,
72
+ /\bstory/,
73
+ /\bblog/,
74
+ /\bpage-?content/,
75
+ /\bcontent-?area/,
76
+ ];
77
+ function isChromeBySemantic(el, $) {
82
78
  const tagName = el.tagName?.toLowerCase() ?? '';
83
- const $el = $(el);
84
- const outerHtml = $.html($el) ?? '';
85
- // Clone to compute visible text (strip scripts/styles)
86
- const clone = $el.clone();
87
- clone.find('script, style, noscript').remove();
88
- const visibleText = clone.text() ?? '';
89
- const visibleTextLen = visibleText.trim().length;
90
- const totalHtmlLen = Math.max(outerHtml.length, 1);
91
- // Text density: ratio of visible text to total HTML length
92
- const textDensity = Math.min(visibleTextLen / totalHtmlLen, 1.0);
93
- // Link density: ratio of link text to visible text
94
- let linkTextLen = 0;
95
- $el.find('a').each((_i, aEl) => {
96
- linkTextLen += ($(aEl).text() ?? '').trim().length;
97
- });
98
- const linkDensity = visibleTextLen > 0
99
- ? Math.min(linkTextLen / visibleTextLen, 1.0)
100
- : 0;
101
- return {
102
- element: el,
103
- tagName,
104
- htmlLength: outerHtml.length,
105
- visibleText,
106
- textDensity,
107
- linkDensity,
108
- normalizedTagScore: normalizeTagScore(getTagImportance(tagName)),
109
- wordBonus: wordCountBonus(visibleText),
110
- score: 0,
111
- };
79
+ if (CHROME_TAGS.has(tagName))
80
+ return true;
81
+ const cls = ($(el).attr('class') ?? '').toLowerCase();
82
+ const id = ($(el).attr('id') ?? '').toLowerCase();
83
+ const combined = cls + ' ' + id;
84
+ // Don't remove if it matches a content pattern
85
+ for (const p of CONTENT_PATTERNS) {
86
+ if (p.test(combined))
87
+ return false;
88
+ }
89
+ for (const p of CHROME_PATTERNS) {
90
+ if (p.test(combined))
91
+ return true;
92
+ }
93
+ // Role attribute
94
+ const role = ($(el).attr('role') ?? '').toLowerCase();
95
+ if (['navigation', 'banner', 'complementary', 'contentinfo', 'search'].includes(role)) {
96
+ return true;
97
+ }
98
+ return false;
99
+ }
100
+ // -----------------------------------------------------------------------
101
+ // Pass 2 — Density scoring
102
+ // -----------------------------------------------------------------------
103
+ /** Tag importance scores for density scoring (-2 to +3) */
104
+ const TAG_IMPORTANCE = {
105
+ article: 3, main: 3,
106
+ p: 2, h1: 2, h2: 2, h3: 2, h4: 2, h5: 2, h6: 2,
107
+ blockquote: 2, pre: 2, code: 2, figure: 2, figcaption: 2,
108
+ section: 1, td: 1, th: 1, li: 1, dd: 1, dt: 1,
109
+ div: 0, span: 0, table: 0, ul: 0, ol: 0, dl: 0,
110
+ aside: -1, header: -1, form: -1,
111
+ nav: -2, footer: -2,
112
+ };
113
+ function normalizeTagScore(rawScore) {
114
+ return (rawScore + 2) / 5; // -2..+3 → 0..1
112
115
  }
113
116
  /**
114
- * Recursively collect block elements for scoring.
117
+ * Collect scoreable blocks from a DOM tree.
115
118
  *
116
- * Key insight: if a block is very large (>MAX_LEAF_BLOCK_HTML chars), we recurse
117
- * into its children instead of treating it as one unit. This handles sites like HN
118
- * (table-based layout) and sites wrapped in a single <div>.
119
+ * Strategy: walk the tree top-down. For each element:
120
+ * - If it's a "leaf-ish" block (< threshold size), score it as one unit.
121
+ * - If it's large and a wrapper (div/section/table), recurse into children.
122
+ * - Protected elements are always recursed.
119
123
  *
120
- * Protected elements (main, article, body) are always recursed into.
124
+ * This finds the right granularity: not scoring a 200KB wrapper div,
125
+ * but scoring the divs/sections/p's nested 3-4 levels deep that carry
126
+ * actual content or chrome.
121
127
  */
122
- function collectBlocks($, parent, blocks, totalHtmlLength, depth = 0) {
128
+ function collectBlocks($, parent, blocks, maxLeafSize) {
123
129
  const children = 'children' in parent ? parent.children : [];
124
130
  for (const child of children) {
125
131
  if (child.type !== 'tag')
126
132
  continue;
127
133
  const el = child;
128
134
  const tagName = el.tagName?.toLowerCase() ?? '';
129
- if (BLOCK_ELEMENTS.has(tagName)) {
130
- const data = scoreElement($, el);
131
- // Recurse into large blocks, protected elements, and layout containers
132
- // to find the actual content sub-blocks
133
- const isLarge = data.htmlLength > MAX_LEAF_BLOCK_HTML;
134
- const isProtected = PROTECTED_ELEMENTS.has(tagName);
135
- const isLayoutContainer = tagName === 'div' || tagName === 'section' || tagName === 'table';
136
- if ((isLarge && isLayoutContainer) || isProtected) {
137
- // Recurse into children to find sub-blocks
138
- collectBlocks($, el, blocks, totalHtmlLength, depth + 1);
139
- }
140
- else {
141
- // Score this block as a leaf
142
- blocks.push(data);
143
- }
144
- }
145
- else if (tagName === 'tr' || tagName === 'td' || tagName === 'th' || tagName === 'tbody' || tagName === 'thead') {
146
- // Table layout elements — recurse through them to find block content
147
- collectBlocks($, el, blocks, totalHtmlLength, depth + 1);
135
+ // Skip script/style
136
+ if (tagName === 'script' || tagName === 'style' || tagName === 'link' || tagName === 'meta')
137
+ continue;
138
+ const $el = $(el);
139
+ const outerHtml = $.html($el) ?? '';
140
+ const htmlLen = outerHtml.length;
141
+ // Skip extremely tiny elements (bare tags like <br>)
142
+ if (htmlLen < 10)
143
+ continue;
144
+ const isProtected = PROTECTED_TAGS.has(tagName);
145
+ const isWrapper = ['div', 'section', 'table', 'tbody', 'thead', 'tr',
146
+ 'center', 'details', 'summary'].includes(tagName);
147
+ if (isProtected || (isWrapper && htmlLen > maxLeafSize)) {
148
+ // Too large or protected — recurse deeper
149
+ collectBlocks($, el, blocks, maxLeafSize);
148
150
  }
149
- else {
150
- // Non-block element — recurse to find nested blocks
151
- collectBlocks($, el, blocks, totalHtmlLength, depth + 1);
151
+ else if (htmlLen > 0) {
152
+ // Score this element
153
+ const clone = $el.clone();
154
+ clone.find('script, style, noscript, svg, path').remove();
155
+ const visibleText = clone.text() ?? '';
156
+ const visibleTextLen = visibleText.trim().length;
157
+ const textDensity = Math.min(visibleTextLen / Math.max(htmlLen, 1), 1.0);
158
+ let linkTextLen = 0;
159
+ $el.find('a').each((_i, a) => {
160
+ linkTextLen += ($(a).text() ?? '').trim().length;
161
+ });
162
+ const linkDensity = visibleTextLen > 0
163
+ ? Math.min(linkTextLen / visibleTextLen, 1.0)
164
+ : 0;
165
+ const rawTagScore = TAG_IMPORTANCE[tagName] ?? 0;
166
+ const normalizedTag = normalizeTagScore(rawTagScore);
167
+ const words = visibleText.trim().split(/\s+/).filter(w => w.length > 0);
168
+ const wordBonus = words.length > 0
169
+ ? Math.min(Math.log(words.length + 1) / Math.log(1000), 1.0)
170
+ : 0;
171
+ const score = (textDensity * 0.35 +
172
+ (1 - linkDensity) * 0.25 +
173
+ normalizedTag * 0.2 +
174
+ wordBonus * 0.1 +
175
+ 0.1 // baseline position score (removed position bias — not useful for deep nesting)
176
+ );
177
+ blocks.push({
178
+ element: el,
179
+ tagName,
180
+ htmlLength: htmlLen,
181
+ visibleText,
182
+ score,
183
+ });
152
184
  }
153
185
  }
154
186
  }
187
+ // -----------------------------------------------------------------------
188
+ // Main export
189
+ // -----------------------------------------------------------------------
155
190
  /**
156
- * Compute the max of an array of numbers.
157
- */
158
- function maxValue(values) {
159
- if (values.length === 0)
160
- return 0;
161
- return Math.max(...values);
162
- }
163
- /**
164
- * Prune low-value HTML blocks using content density scoring.
191
+ * Prune low-value HTML blocks using two-pass approach:
192
+ * 1. Semantic tag/class removal
193
+ * 2. Density scoring of remaining blocks
165
194
  *
166
195
  * @param html - Raw HTML to prune
167
196
  * @param options - Pruning configuration
168
197
  * @returns Pruned HTML with stats
169
198
  */
170
199
  export function pruneContent(html, options = {}) {
171
- const { threshold = 0.4, minWords = 3, dynamic = true, } = options;
200
+ const { threshold = 0.3, minWords = 3, dynamic = true, } = options;
172
201
  const originalLength = html.length;
173
202
  if (!html.trim()) {
174
203
  return { html, nodesRemoved: 0, reductionPercent: 0 };
175
204
  }
176
205
  const $ = cheerio.load(html);
177
- // Collect top-level block elements from the body
178
- const blocks = [];
179
- const bodyEl = $('body').get(0);
180
- if (bodyEl) {
181
- collectBlocks($, bodyEl, blocks, originalLength);
182
- }
183
- // If no blocks found (very sparse HTML), return as-is
184
- if (blocks.length === 0) {
185
- return { html, nodesRemoved: 0, reductionPercent: 0 };
206
+ let nodesRemoved = 0;
207
+ // =====================================================================
208
+ // Pass 1: Semantic removal
209
+ // =====================================================================
210
+ // Walk top-down; remove entire subtrees that are clearly chrome.
211
+ // We look at direct children of body, and one level deeper, to catch
212
+ // both <body> <nav> and <body> <div> <nav> patterns.
213
+ const toRemoveSemantic = [];
214
+ function walkForChrome(parent, depth) {
215
+ const children = 'children' in parent ? parent.children : [];
216
+ for (const child of children) {
217
+ if (child.type !== 'tag')
218
+ continue;
219
+ const el = child;
220
+ const tagName = el.tagName?.toLowerCase() ?? '';
221
+ if (tagName === 'script' || tagName === 'style')
222
+ continue;
223
+ if (PROTECTED_TAGS.has(tagName)) {
224
+ // Recurse into protected — there might be chrome inside <article>
225
+ walkForChrome(el, depth + 1);
226
+ continue;
227
+ }
228
+ if (isChromeBySemantic(el, $)) {
229
+ toRemoveSemantic.push(el);
230
+ continue; // don't recurse into something we'll remove
231
+ }
232
+ // Recurse up to a reasonable depth
233
+ if (depth < 6) {
234
+ walkForChrome(el, depth + 1);
235
+ }
236
+ }
186
237
  }
187
- // Assign position weights and compute composite scores
188
- const n = blocks.length;
189
- for (let i = 0; i < n; i++) {
190
- const block = blocks[i];
191
- const normalizedPos = n > 1 ? i / (n - 1) : 0.5;
192
- const posWeight = positionWeight(normalizedPos);
193
- block.score = (block.textDensity * 0.35 +
194
- (1 - block.linkDensity) * 0.25 +
195
- block.normalizedTagScore * 0.2 +
196
- block.wordBonus * 0.1 +
197
- posWeight * 0.1);
238
+ const body = $('body').get(0);
239
+ if (body) {
240
+ walkForChrome(body, 0);
198
241
  }
199
- // Determine effective threshold
200
- let effectiveThreshold = threshold;
201
- if (dynamic) {
202
- // Use the best-block score as the reference: remove blocks that score below
203
- // 40% of the highest-quality block. This handles the common bimodal case
204
- // (one great article block + several low-quality nav/sidebar blocks) much
205
- // better than median/mean approaches.
206
- const scores = blocks.map((b) => b.score);
207
- const best = maxValue(scores);
208
- effectiveThreshold = best * 0.4;
242
+ for (const el of toRemoveSemantic) {
243
+ $(el).remove();
244
+ nodesRemoved++;
209
245
  }
210
- // Safety floor: we must retain at least 30% of the original HTML
211
- const minRetainLength = Math.ceil(originalLength * 0.3);
212
- // Sort ascending by score so we remove worst blocks first
213
- const sortedAsc = [...blocks].sort((a, b) => a.score - b.score);
214
- const toRemove = new Set();
215
- let removedLength = 0;
216
- for (const block of sortedAsc) {
217
- // Never remove protected containers
218
- if (PROTECTED_ELEMENTS.has(block.tagName))
219
- continue;
220
- const words = block.visibleText.trim().split(/\s+/).filter((w) => w.length > 0);
221
- const isTinyBlock = words.length < minWords;
222
- const isLowScore = block.score < effectiveThreshold;
223
- // Keep blocks that pass both checks
224
- if (!isTinyBlock && !isLowScore)
225
- continue;
226
- // Always check safety floor before removing — even for empty blocks.
227
- // This prevents over-pruning when every block is low quality.
228
- const remainingLength = originalLength - (removedLength + block.htmlLength);
229
- if (remainingLength >= minRetainLength) {
230
- toRemove.add(block.element);
231
- removedLength += block.htmlLength;
246
+ // =====================================================================
247
+ // Pass 2: Density scoring (on the remaining HTML)
248
+ // =====================================================================
249
+ const postPass1Html = $.html();
250
+ const postPass1Length = postPass1Html.length;
251
+ // Run density scoring on remaining content
252
+ if (postPass1Length > 100 && body) {
253
+ const blocks = [];
254
+ // Max leaf size: ~5KB or 30% of remaining content (whichever is smaller)
255
+ // This ensures we find leaf blocks even in small documents.
256
+ const maxLeafSize = Math.min(5000, Math.ceil(postPass1Length * 0.3));
257
+ collectBlocks($, body, blocks, maxLeafSize);
258
+ if (blocks.length >= 2) {
259
+ const scores = blocks.map(b => b.score);
260
+ const bestScore = Math.max(...scores);
261
+ let effectiveThreshold = threshold;
262
+ if (dynamic) {
263
+ // Blocks scoring below 50% of the best block are candidates for removal
264
+ effectiveThreshold = bestScore * 0.5;
265
+ }
266
+ // Safety: retain at least 40% of post-pass1 content
267
+ const minRetainLength = Math.ceil(postPass1Length * 0.4);
268
+ // Sort ascending by score — remove worst first
269
+ const sorted = blocks
270
+ .map((b, i) => ({ b, i, score: b.score }))
271
+ .sort((a, b) => a.score - b.score);
272
+ const toRemoveDensity = new Set();
273
+ let removedLength = 0;
274
+ for (const { b } of sorted) {
275
+ if (PROTECTED_TAGS.has(b.tagName) || DENSITY_SAFE_TAGS.has(b.tagName))
276
+ continue;
277
+ const words = b.visibleText.trim().split(/\s+/).filter(w => w.length > 0);
278
+ const isTiny = words.length < minWords;
279
+ const isLow = b.score < effectiveThreshold;
280
+ if (!isTiny && !isLow)
281
+ continue;
282
+ // Check safety floor
283
+ const remaining = postPass1Length - (removedLength + b.htmlLength);
284
+ if (remaining < minRetainLength)
285
+ continue;
286
+ toRemoveDensity.add(b.element);
287
+ removedLength += b.htmlLength;
288
+ }
289
+ for (const el of toRemoveDensity) {
290
+ $(el).remove();
291
+ nodesRemoved++;
292
+ }
232
293
  }
233
294
  }
234
- // Remove selected elements from the DOM
235
- for (const el of toRemove) {
236
- $(el).remove();
237
- }
238
295
  const resultHtml = $.html() ?? html;
239
296
  const resultLength = resultHtml.length;
240
297
  const reductionPercent = originalLength > 0
@@ -242,7 +299,7 @@ export function pruneContent(html, options = {}) {
242
299
  : 0;
243
300
  return {
244
301
  html: resultHtml,
245
- nodesRemoved: toRemove.size,
302
+ nodesRemoved,
246
303
  reductionPercent,
247
304
  };
248
305
  }
@@ -1 +1 @@
1
- {"version":3,"file":"content-pruner.js","sourceRoot":"","sources":["../../src/core/content-pruner.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAqBnC,oCAAoC;AACpC,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC;IAC7B,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,QAAQ;IAC/D,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS;CAC5E,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,kBAAkB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC;AAEhE;;;GAGG;AACH,MAAM,cAAc,GAA2B;IAC7C,OAAO,EAAE,CAAC;IACV,IAAI,EAAE,CAAC;IACP,CAAC,EAAE,CAAC;IACJ,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;IACxC,UAAU,EAAE,CAAC;IACb,GAAG,EAAE,CAAC;IACN,IAAI,EAAE,CAAC;IACP,MAAM,EAAE,CAAC;IACT,UAAU,EAAE,CAAC;IACb,OAAO,EAAE,CAAC;IACV,EAAE,EAAE,CAAC;IACL,EAAE,EAAE,CAAC;IACL,EAAE,EAAE,CAAC;IACL,EAAE,EAAE,CAAC;IACL,EAAE,EAAE,CAAC;IACL,GAAG,EAAE,CAAC;IACN,IAAI,EAAE,CAAC;IACP,KAAK,EAAE,CAAC,CAAC;IACT,MAAM,EAAE,CAAC,CAAC;IACV,IAAI,EAAE,CAAC,CAAC;IACR,GAAG,EAAE,CAAC,CAAC;IACP,MAAM,EAAE,CAAC,CAAC;CACX,CAAC;AAEF,sDAAsD;AACtD,SAAS,iBAAiB,CAAC,QAAgB;IACzC,sDAAsD;IACtD,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;AAC5B,CAAC;AAED,SAAS,gBAAgB,CAAC,OAAe;IACvC,OAAO,cAAc,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,IAAI,CAAC,CAAC;AACpD,CAAC;AAED,6CAA6C;AAC7C,SAAS,cAAc,CAAC,IAAY;IAClC,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACnE,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACjC,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,CAAC;AACpE,CAAC;AAED;;;;GAIG;AACH,SAAS,cAAc,CAAC,aAAqB;IAC3C,IAAI,aAAa,IAAI,GAAG,IAAI,aAAa,IAAI,GAAG;QAAE,OAAO,GAAG,CAAC;IAC7D,IAAI,aAAa,GAAG,GAAG;QAAE,OAAO,aAAa,GAAG,GAAG,CAAC;IACpD,sBAAsB;IACtB,OAAO,CAAC,GAAG,GAAG,aAAa,CAAC,GAAG,GAAG,CAAC;AACrC,CAAC;AAeD,qFAAqF;AACrF,MAAM,mBAAmB,GAAG,IAAI,CAAC;AAEjC;;GAEG;AACH,SAAS,YAAY,CACnB,CAAqB,EACrB,EAAW;IAEX,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IAChD,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;IAClB,MAAM,SAAS,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;IAEpC,uDAAuD;IACvD,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,EAAE,CAAC;IAC1B,KAAK,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC,MAAM,EAAE,CAAC;IAC/C,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;IACvC,MAAM,cAAc,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC;IACjD,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAEnD,2DAA2D;IAC3D,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,cAAc,GAAG,YAAY,EAAE,GAAG,CAAC,CAAC;IAEjE,mDAAmD;IACnD,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,GAAG,EAAE,EAAE;QAC7B,WAAW,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC;IACrD,CAAC,CAAC,CAAC;IACH,MAAM,WAAW,GAAG,cAAc,GAAG,CAAC;QACpC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,GAAG,cAAc,EAAE,GAAG,CAAC;QAC7C,CAAC,CAAC,CAAC,CAAC;IAEN,OAAO;QACL,OAAO,EAAE,EAAE;QACX,OAAO;QACP,UAAU,EAAE,SAAS,CAAC,MAAM;QAC5B,WAAW;QACX,WAAW;QACX,WAAW;QACX,kBAAkB,EAAE,iBAAiB,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;QAChE,SAAS,EAAE,cAAc,CAAC,WAAW,CAAC;QACtC,KAAK,EAAE,CAAC;KACT,CAAC;AACJ,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,aAAa,CACpB,CAAqB,EACrB,MAAe,EACf,MAAmB,EACnB,eAAuB,EACvB,QAAgB,CAAC;IAEjB,MAAM,QAAQ,GAAG,UAAU,IAAI,MAAM,CAAC,CAAC,CAAE,MAAM,CAAC,QAAsB,CAAC,CAAC,CAAC,EAAE,CAAC;IAC5E,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;QAC7B,IAAI,KAAK,CAAC,IAAI,KAAK,KAAK;YAAE,SAAS;QACnC,MAAM,EAAE,GAAG,KAAgB,CAAC;QAC5B,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;QAEhD,IAAI,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YAChC,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAEjC,uEAAuE;YACvE,wCAAwC;YACxC,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,GAAG,mBAAmB,CAAC;YACtD,MAAM,WAAW,GAAG,kBAAkB,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACpD,MAAM,iBAAiB,GAAG,OAAO,KAAK,KAAK,IAAI,OAAO,KAAK,SAAS,IAAI,OAAO,KAAK,OAAO,CAAC;YAE5F,IAAI,CAAC,OAAO,IAAI,iBAAiB,CAAC,IAAI,WAAW,EAAE,CAAC;gBAClD,2CAA2C;gBAC3C,aAAa,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;YAC3D,CAAC;iBAAM,CAAC;gBACN,6BAA6B;gBAC7B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACpB,CAAC;QACH,CAAC;aAAM,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,OAAO,IAAI,OAAO,KAAK,OAAO,EAAE,CAAC;YAClH,qEAAqE;YACrE,aAAa,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;QAC3D,CAAC;aAAM,CAAC;YACN,oDAAoD;YACpD,aAAa,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,QAAQ,CAAC,MAAgB;IAChC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAClC,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,UAAwB,EAAE;IACnE,MAAM,EACJ,SAAS,GAAG,GAAG,EACf,QAAQ,GAAG,CAAC,EACZ,OAAO,GAAG,IAAI,GACf,GAAG,OAAO,CAAC;IAEZ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC;IAEnC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QACjB,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,EAAE,gBAAgB,EAAE,CAAC,EAAE,CAAC;IACxD,CAAC;IAED,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,iDAAiD;IACjD,MAAM,MAAM,GAAgB,EAAE,CAAC;IAC/B,MAAM,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAChC,IAAI,MAAM,EAAE,CAAC;QACX,aAAa,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,CAAC,CAAC;IACnD,CAAC;IAED,sDAAsD;IACtD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,EAAE,gBAAgB,EAAE,CAAC,EAAE,CAAC;IACxD,CAAC;IAED,uDAAuD;IACvD,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACxB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;QACzB,MAAM,aAAa,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;QAChD,MAAM,SAAS,GAAG,cAAc,CAAC,aAAa,CAAC,CAAC;QAEhD,KAAK,CAAC,KAAK,GAAG,CACZ,KAAK,CAAC,WAAW,GAAG,IAAI;YACxB,CAAC,CAAC,GAAG,KAAK,CAAC,WAAW,CAAC,GAAG,IAAI;YAC9B,KAAK,CAAC,kBAAkB,GAAG,GAAG;YAC9B,KAAK,CAAC,SAAS,GAAG,GAAG;YACrB,SAAS,GAAG,GAAG,CAChB,CAAC;IACJ,CAAC;IAED,gCAAgC;IAChC,IAAI,kBAAkB,GAAG,SAAS,CAAC;IACnC,IAAI,OAAO,EAAE,CAAC;QACZ,4EAA4E;QAC5E,yEAAyE;QACzE,0EAA0E;QAC1E,sCAAsC;QACtC,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAC1C,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC9B,kBAAkB,GAAG,IAAI,GAAG,GAAG,CAAC;IAClC,CAAC;IAED,iEAAiE;IACjE,MAAM,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,GAAG,GAAG,CAAC,CAAC;IAExD,0DAA0D;IAC1D,MAAM,SAAS,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAEhE,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAW,CAAC;IACpC,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,KAAK,MAAM,KAAK,IAAI,SAAS,EAAE,CAAC;QAC9B,oCAAoC;QACpC,IAAI,kBAAkB,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;YAAE,SAAS;QAEpD,MAAM,KAAK,GAAG,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAChF,MAAM,WAAW,GAAG,KAAK,CAAC,MAAM,GAAG,QAAQ,CAAC;QAC5C,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,GAAG,kBAAkB,CAAC;QAEpD,oCAAoC;QACpC,IAAI,CAAC,WAAW,IAAI,CAAC,UAAU;YAAE,SAAS;QAE1C,qEAAqE;QACrE,8DAA8D;QAC9D,MAAM,eAAe,GAAG,cAAc,GAAG,CAAC,aAAa,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC;QAC5E,IAAI,eAAe,IAAI,eAAe,EAAE,CAAC;YACvC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC5B,aAAa,IAAI,KAAK,CAAC,UAAU,CAAC;QACpC,CAAC;IACH,CAAC;IAED,wCAAwC;IACxC,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;QAC1B,CAAC,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC;IACjB,CAAC;IAED,MAAM,UAAU,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,IAAI,CAAC;IACpC,MAAM,YAAY,GAAG,UAAU,CAAC,MAAM,CAAC;IACvC,MAAM,gBAAgB,GAAG,cAAc,GAAG,CAAC;QACzC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,cAAc,GAAG,YAAY,CAAC,GAAG,cAAc,CAAC,GAAG,GAAG,CAAC,CAAC;QACnF,CAAC,CAAC,CAAC,CAAC;IAEN,OAAO;QACL,IAAI,EAAE,UAAU;QAChB,YAAY,EAAE,QAAQ,CAAC,IAAI;QAC3B,gBAAgB;KACjB,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"content-pruner.js","sourceRoot":"","sources":["../../src/core/content-pruner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAqBnC,0EAA0E;AAC1E,wDAAwD;AACxD,0EAA0E;AAE1E,oEAAoE;AACpE,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC;IAC1B,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU;CACrC,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,eAAe,GAAG;IACtB,aAAa;IACb,UAAU;IACV,YAAY;IACZ,qBAAqB;IACrB,WAAW;IACX,WAAW;IACX,aAAa;IACb,UAAU;IACV,WAAW;IACX,cAAc;IACd,cAAc;IACd,YAAY;IACZ,2BAA2B;IAC3B,cAAc;IACd,WAAW;IACX,WAAW;IACX,iCAAiC;IACjC,SAAS;IACT,yBAAyB;IACzB,UAAU;IACV,aAAa;CACd,CAAC;AAEF;;;GAGG;AACH,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC;AAE5D;;;;GAIG;AACH,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC;IAChC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAClC,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,YAAY;IAC9C,MAAM,EAAE,SAAS,EAAE,MAAM;CAC1B,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,gBAAgB,GAAG;IACvB,WAAW;IACX,iBAAiB;IACjB,kBAAkB;IAClB,iBAAiB;IACjB,SAAS;IACT,QAAQ;IACR,iBAAiB;IACjB,iBAAiB;CAClB,CAAC;AAEF,SAAS,kBAAkB,CAAC,EAAW,EAAE,CAAqB;IAC5D,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IAChD,IAAI,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IAE1C,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IACtD,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IAClD,MAAM,QAAQ,GAAG,GAAG,GAAG,GAAG,GAAG,EAAE,CAAC;IAEhC,+CAA+C;IAC/C,KAAK,MAAM,CAAC,IAAI,gBAAgB,EAAE,CAAC;QACjC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;YAAE,OAAO,KAAK,CAAC;IACrC,CAAC;IAED,KAAK,MAAM,CAAC,IAAI,eAAe,EAAE,CAAC;QAChC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;YAAE,OAAO,IAAI,CAAC;IACpC,CAAC;IAED,iBAAiB;IACjB,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IACtD,IAAI,CAAC,YAAY,EAAE,QAAQ,EAAE,eAAe,EAAE,aAAa,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACtF,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,0EAA0E;AAC1E,2BAA2B;AAC3B,0EAA0E;AAE1E,2DAA2D;AAC3D,MAAM,cAAc,GAA2B;IAC7C,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC;IACnB,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;IAC9C,UAAU,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC;IACxD,OAAO,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;IAC7C,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;IAC9C,KAAK,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/B,GAAG,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;CACpB,CAAC;AAEF,SAAS,iBAAiB,CAAC,QAAgB;IACzC,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,gBAAgB;AAC7C,CAAC;AAUD;;;;;;;;;;;GAWG;AACH,SAAS,aAAa,CACpB,CAAqB,EACrB,MAAe,EACf,MAAqB,EACrB,WAAmB;IAEnB,MAAM,QAAQ,GAAG,UAAU,IAAI,MAAM,CAAC,CAAC,CAAE,MAAM,CAAC,QAAsB,CAAC,CAAC,CAAC,EAAE,CAAC;IAE5E,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;QAC7B,IAAI,KAAK,CAAC,IAAI,KAAK,KAAK;YAAE,SAAS;QACnC,MAAM,EAAE,GAAG,KAAgB,CAAC;QAC5B,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;QAEhD,oBAAoB;QACpB,IAAI,OAAO,KAAK,QAAQ,IAAI,OAAO,KAAK,OAAO,IAAI,OAAO,KAAK,MAAM,IAAI,OAAO,KAAK,MAAM;YAAE,SAAS;QAEtG,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;QAClB,MAAM,SAAS,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,CAAC;QAEjC,qDAAqD;QACrD,IAAI,OAAO,GAAG,EAAE;YAAE,SAAS;QAE3B,MAAM,WAAW,GAAG,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAChD,MAAM,SAAS,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI;YACjD,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAErE,IAAI,WAAW,IAAI,CAAC,SAAS,IAAI,OAAO,GAAG,WAAW,CAAC,EAAE,CAAC;YACxD,0CAA0C;YAC1C,aAAa,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,WAAW,CAAC,CAAC;QAC5C,CAAC;aAAM,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;YACvB,qBAAqB;YACrB,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,EAAE,CAAC;YAC1B,KAAK,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC,MAAM,EAAE,CAAC;YAC1D,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;YACvC,MAAM,cAAc,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC;YAEjD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YAEzE,IAAI,WAAW,GAAG,CAAC,CAAC;YACpB,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE;gBAC3B,WAAW,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC;YACnD,CAAC,CAAC,CAAC;YACH,MAAM,WAAW,GAAG,cAAc,GAAG,CAAC;gBACpC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,GAAG,cAAc,EAAE,GAAG,CAAC;gBAC7C,CAAC,CAAC,CAAC,CAAC;YAEN,MAAM,WAAW,GAAG,cAAc,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YACjD,MAAM,aAAa,GAAG,iBAAiB,CAAC,WAAW,CAAC,CAAC;YAErD,MAAM,KAAK,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YACxE,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC;gBAChC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC;gBAC5D,CAAC,CAAC,CAAC,CAAC;YAEN,MAAM,KAAK,GAAG,CACZ,WAAW,GAAG,IAAI;gBAClB,CAAC,CAAC,GAAG,WAAW,CAAC,GAAG,IAAI;gBACxB,aAAa,GAAG,GAAG;gBACnB,SAAS,GAAG,GAAG;gBACf,GAAG,CAAC,gFAAgF;aACrF,CAAC;YAEF,MAAM,CAAC,IAAI,CAAC;gBACV,OAAO,EAAE,EAAE;gBACX,OAAO;gBACP,UAAU,EAAE,OAAO;gBACnB,WAAW;gBACX,KAAK;aACN,CAAC,CAAC;QACL,CAAC;IACH,CAAC;AACH,CAAC;AAED,0EAA0E;AAC1E,cAAc;AACd,0EAA0E;AAE1E;;;;;;;;GAQG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,UAAwB,EAAE;IACnE,MAAM,EACJ,SAAS,GAAG,GAAG,EACf,QAAQ,GAAG,CAAC,EACZ,OAAO,GAAG,IAAI,GACf,GAAG,OAAO,CAAC;IAEZ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC;IACnC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QACjB,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,EAAE,gBAAgB,EAAE,CAAC,EAAE,CAAC;IACxD,CAAC;IAED,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,wEAAwE;IACxE,2BAA2B;IAC3B,wEAAwE;IACxE,iEAAiE;IACjE,qEAAqE;IACrE,qDAAqD;IACrD,MAAM,gBAAgB,GAAc,EAAE,CAAC;IAEvC,SAAS,aAAa,CAAC,MAAe,EAAE,KAAa;QACnD,MAAM,QAAQ,GAAG,UAAU,IAAI,MAAM,CAAC,CAAC,CAAE,MAAM,CAAC,QAAsB,CAAC,CAAC,CAAC,EAAE,CAAC;QAC5E,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;YAC7B,IAAI,KAAK,CAAC,IAAI,KAAK,KAAK;gBAAE,SAAS;YACnC,MAAM,EAAE,GAAG,KAAgB,CAAC;YAC5B,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;YAChD,IAAI,OAAO,KAAK,QAAQ,IAAI,OAAO,KAAK,OAAO;gBAAE,SAAS;YAE1D,IAAI,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;gBAChC,kEAAkE;gBAClE,aAAa,CAAC,EAAE,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;gBAC7B,SAAS;YACX,CAAC;YAED,IAAI,kBAAkB,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC;gBAC9B,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAC1B,SAAS,CAAC,4CAA4C;YACxD,CAAC;YAED,mCAAmC;YACnC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACd,aAAa,CAAC,EAAE,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;YAC/B,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC9B,IAAI,IAAI,EAAE,CAAC;QACT,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IACzB,CAAC;IAED,KAAK,MAAM,EAAE,IAAI,gBAAgB,EAAE,CAAC;QAClC,CAAC,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC;QACf,YAAY,EAAE,CAAC;IACjB,CAAC;IAED,wEAAwE;IACxE,kDAAkD;IAClD,wEAAwE;IACxE,MAAM,aAAa,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAC/B,MAAM,eAAe,GAAG,aAAa,CAAC,MAAM,CAAC;IAE7C,2CAA2C;IAC3C,IAAI,eAAe,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;QAClC,MAAM,MAAM,GAAkB,EAAE,CAAC;QACjC,yEAAyE;QACzE,4DAA4D;QAC5D,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,eAAe,GAAG,GAAG,CAAC,CAAC,CAAC;QACrE,aAAa,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,CAAC;QAE5C,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACvB,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACxC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC;YAEtC,IAAI,kBAAkB,GAAG,SAAS,CAAC;YACnC,IAAI,OAAO,EAAE,CAAC;gBACZ,wEAAwE;gBACxE,kBAAkB,GAAG,SAAS,GAAG,GAAG,CAAC;YACvC,CAAC;YAED,oDAAoD;YACpD,MAAM,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,eAAe,GAAG,GAAG,CAAC,CAAC;YAEzD,+CAA+C;YAC/C,MAAM,MAAM,GAAG,MAAM;iBAClB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;iBACzC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;YAErC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAW,CAAC;YAC3C,IAAI,aAAa,GAAG,CAAC,CAAC;YAEtB,KAAK,MAAM,EAAE,CAAC,EAAE,IAAI,MAAM,EAAE,CAAC;gBAC3B,IAAI,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC;oBAAE,SAAS;gBAEhF,MAAM,KAAK,GAAG,CAAC,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAC1E,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,GAAG,QAAQ,CAAC;gBACvC,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,GAAG,kBAAkB,CAAC;gBAE3C,IAAI,CAAC,MAAM,IAAI,CAAC,KAAK;oBAAE,SAAS;gBAEhC,qBAAqB;gBACrB,MAAM,SAAS,GAAG,eAAe,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC;gBACnE,IAAI,SAAS,GAAG,eAAe;oBAAE,SAAS;gBAE1C,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;gBAC/B,aAAa,IAAI,CAAC,CAAC,UAAU,CAAC;YAChC,CAAC;YAED,KAAK,MAAM,EAAE,IAAI,eAAe,EAAE,CAAC;gBACjC,CAAC,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC;gBACf,YAAY,EAAE,CAAC;YACjB,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,IAAI,CAAC;IACpC,MAAM,YAAY,GAAG,UAAU,CAAC,MAAM,CAAC;IACvC,MAAM,gBAAgB,GAAG,cAAc,GAAG,CAAC;QACzC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,cAAc,GAAG,YAAY,CAAC,GAAG,cAAc,CAAC,GAAG,GAAG,CAAC,CAAC;QACnF,CAAC,CAAC,CAAC,CAAC;IAEN,OAAO;QACL,IAAI,EAAE,UAAU;QAChB,YAAY;QACZ,gBAAgB;KACjB,CAAC;AACJ,CAAC"}
@@ -0,0 +1,67 @@
1
+ /**
2
+ * WebPeel Deep Research Agent
3
+ *
4
+ * Autonomously searches the web, fetches top sources, filters content with
5
+ * BM25, optionally follows promising links, and synthesizes a comprehensive
6
+ * report using an LLM.
7
+ *
8
+ * Design principle: orchestrate existing modules (peel, bm25-filter,
9
+ * llm-extract) — don't reinvent anything.
10
+ */
11
+ export interface ResearchOptions {
12
+ /** Research question or topic */
13
+ query: string;
14
+ /** Maximum number of sources to consult. Default: 5 */
15
+ maxSources?: number;
16
+ /** Maximum depth of link-following. Default: 1 (just search results; 2+ follows links) */
17
+ maxDepth?: number;
18
+ /** LLM API key for synthesis */
19
+ apiKey?: string;
20
+ /** LLM model for synthesis. Default: gpt-4o-mini */
21
+ model?: string;
22
+ /** LLM base URL. Default: https://api.openai.com/v1 */
23
+ baseUrl?: string;
24
+ /** Maximum total time in ms. Default: 60000 (1 minute) */
25
+ timeout?: number;
26
+ /** Output format: 'report' (markdown synthesis) or 'sources' (raw extracted data). Default: 'report' */
27
+ outputFormat?: 'report' | 'sources';
28
+ /** Optional callback for progress updates */
29
+ onProgress?: (step: ResearchStep) => void;
30
+ }
31
+ export interface ResearchStep {
32
+ phase: 'searching' | 'fetching' | 'extracting' | 'following' | 'synthesizing';
33
+ message: string;
34
+ sourcesFound?: number;
35
+ sourcesFetched?: number;
36
+ }
37
+ export interface ResearchSource {
38
+ url: string;
39
+ title: string;
40
+ /** Key findings from this source */
41
+ findings: string;
42
+ /** Relevance score (0-1) */
43
+ relevance: number;
44
+ }
45
+ export interface ResearchResult {
46
+ /** Synthesized research report (markdown) */
47
+ report: string;
48
+ /** Sources consulted */
49
+ sources: ResearchSource[];
50
+ /** Total sources found vs consulted */
51
+ totalSourcesFound: number;
52
+ sourcesConsulted: number;
53
+ /** Time taken in ms */
54
+ elapsed: number;
55
+ /** Tokens used for synthesis */
56
+ tokensUsed?: {
57
+ input: number;
58
+ output: number;
59
+ };
60
+ /** Estimated cost in USD */
61
+ cost?: number;
62
+ }
63
+ /**
64
+ * Conduct autonomous multi-step web research on a topic.
65
+ */
66
+ export declare function research(options: ResearchOptions): Promise<ResearchResult>;
67
+ //# sourceMappingURL=research.d.ts.map