@aria-cli/tools 1.0.9 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/package.json +9 -5
  2. package/src/__tests__/web-fetch-download.test.ts +0 -433
  3. package/src/__tests__/web-tools.test.ts +0 -619
  4. package/src/ask-user-interaction.ts +0 -33
  5. package/src/cache/web-cache.ts +0 -110
  6. package/src/definitions/arion.ts +0 -118
  7. package/src/definitions/browser/browser.ts +0 -502
  8. package/src/definitions/browser/index.ts +0 -5
  9. package/src/definitions/browser/pw-downloads.ts +0 -142
  10. package/src/definitions/browser/pw-interactions.ts +0 -282
  11. package/src/definitions/browser/pw-responses.ts +0 -98
  12. package/src/definitions/browser/pw-session.ts +0 -405
  13. package/src/definitions/browser/pw-shared.ts +0 -85
  14. package/src/definitions/browser/pw-snapshot.ts +0 -383
  15. package/src/definitions/browser/pw-state.ts +0 -101
  16. package/src/definitions/browser/types.ts +0 -203
  17. package/src/definitions/code-intelligence.ts +0 -526
  18. package/src/definitions/core.ts +0 -118
  19. package/src/definitions/delegation.ts +0 -567
  20. package/src/definitions/deploy.ts +0 -73
  21. package/src/definitions/filesystem.ts +0 -217
  22. package/src/definitions/frg.ts +0 -67
  23. package/src/definitions/index.ts +0 -28
  24. package/src/definitions/memory.ts +0 -150
  25. package/src/definitions/messaging.ts +0 -734
  26. package/src/definitions/meta.ts +0 -392
  27. package/src/definitions/network.ts +0 -179
  28. package/src/definitions/outlook.ts +0 -318
  29. package/src/definitions/patch/apply-patch.ts +0 -235
  30. package/src/definitions/patch/fuzzy-match.ts +0 -217
  31. package/src/definitions/patch/index.ts +0 -1
  32. package/src/definitions/patch/patch-parser.ts +0 -297
  33. package/src/definitions/patch/sandbox-paths.ts +0 -129
  34. package/src/definitions/process/index.ts +0 -5
  35. package/src/definitions/process/process-registry.ts +0 -303
  36. package/src/definitions/process/process.ts +0 -456
  37. package/src/definitions/process/pty-keys.ts +0 -298
  38. package/src/definitions/process/session-slug.ts +0 -147
  39. package/src/definitions/quip.ts +0 -225
  40. package/src/definitions/search.ts +0 -67
  41. package/src/definitions/session-history.ts +0 -79
  42. package/src/definitions/shell.ts +0 -202
  43. package/src/definitions/slack.ts +0 -211
  44. package/src/definitions/web.ts +0 -119
  45. package/src/executors/apply-patch.ts +0 -1035
  46. package/src/executors/arion.ts +0 -199
  47. package/src/executors/code-intelligence.ts +0 -1179
  48. package/src/executors/deploy.ts +0 -1066
  49. package/src/executors/filesystem.ts +0 -1428
  50. package/src/executors/frg-freshness.ts +0 -743
  51. package/src/executors/frg.ts +0 -394
  52. package/src/executors/index.ts +0 -280
  53. package/src/executors/learning-meta.ts +0 -1367
  54. package/src/executors/lsp-client.ts +0 -355
  55. package/src/executors/memory.ts +0 -978
  56. package/src/executors/meta.ts +0 -293
  57. package/src/executors/process-registry.ts +0 -570
  58. package/src/executors/pty-session-store.ts +0 -43
  59. package/src/executors/pty.ts +0 -342
  60. package/src/executors/restart.ts +0 -133
  61. package/src/executors/search-freshness.ts +0 -249
  62. package/src/executors/search-types.ts +0 -98
  63. package/src/executors/search.ts +0 -89
  64. package/src/executors/self-diagnose.ts +0 -552
  65. package/src/executors/session-history.ts +0 -435
  66. package/src/executors/shell-safety.ts +0 -519
  67. package/src/executors/shell.ts +0 -1243
  68. package/src/executors/utils.ts +0 -40
  69. package/src/executors/web.ts +0 -786
  70. package/src/extraction/content-extraction.ts +0 -281
  71. package/src/extraction/index.ts +0 -5
  72. package/src/headless-control-contract.ts +0 -1149
  73. package/src/index.ts +0 -788
  74. package/src/local-control-http-auth.ts +0 -2
  75. package/src/mcp/client.ts +0 -218
  76. package/src/mcp/connection.ts +0 -568
  77. package/src/mcp/index.ts +0 -11
  78. package/src/mcp/jsonrpc.ts +0 -195
  79. package/src/mcp/types.ts +0 -199
  80. package/src/network-control-adapter.ts +0 -88
  81. package/src/network-runtime/address-types.ts +0 -218
  82. package/src/network-runtime/db-owner-fencing.ts +0 -91
  83. package/src/network-runtime/delivery-receipts.ts +0 -372
  84. package/src/network-runtime/direct-endpoint-authority.ts +0 -35
  85. package/src/network-runtime/index.ts +0 -316
  86. package/src/network-runtime/local-control-contract.ts +0 -784
  87. package/src/network-runtime/node-store-contract.ts +0 -46
  88. package/src/network-runtime/pair-route-contract.ts +0 -97
  89. package/src/network-runtime/peer-capabilities.ts +0 -48
  90. package/src/network-runtime/peer-principal-ref.ts +0 -20
  91. package/src/network-runtime/peer-state-machine.ts +0 -160
  92. package/src/network-runtime/protocol-schemas.ts +0 -265
  93. package/src/network-runtime/runtime-bootstrap-contract.ts +0 -83
  94. package/src/outlook/desktop-session.ts +0 -409
  95. package/src/policy.ts +0 -171
  96. package/src/providers/brave.ts +0 -80
  97. package/src/providers/duckduckgo.ts +0 -199
  98. package/src/providers/exa.ts +0 -85
  99. package/src/providers/firecrawl.ts +0 -77
  100. package/src/providers/index.ts +0 -8
  101. package/src/providers/jina.ts +0 -70
  102. package/src/providers/router.ts +0 -121
  103. package/src/providers/search-provider.ts +0 -74
  104. package/src/providers/tavily.ts +0 -74
  105. package/src/quip/desktop-session.ts +0 -435
  106. package/src/registry/index.ts +0 -1
  107. package/src/registry/registry.ts +0 -905
  108. package/src/runtime-socket-local-control-client.ts +0 -632
  109. package/src/security/dns-normalization.ts +0 -34
  110. package/src/security/dns-pinning.ts +0 -138
  111. package/src/security/external-content.ts +0 -129
  112. package/src/security/ssrf.ts +0 -207
  113. package/src/slack/desktop-session.ts +0 -493
  114. package/src/tool-factory.ts +0 -91
  115. package/src/types.ts +0 -1341
  116. package/src/utils/retry.ts +0 -163
  117. package/src/utils/safe-parse-json.ts +0 -176
  118. package/src/utils/url.ts +0 -20
  119. package/tests/benchmarks/registry.bench.ts +0 -57
  120. package/tests/cache/web-cache.test.ts +0 -147
  121. package/tests/critical-integration.test.ts +0 -1465
  122. package/tests/definitions/apply-patch.test.ts +0 -586
  123. package/tests/definitions/browser.test.ts +0 -495
  124. package/tests/definitions/delegation-pause-resume.test.ts +0 -758
  125. package/tests/definitions/execution.test.ts +0 -671
  126. package/tests/definitions/messaging-inbox-scope.test.ts +0 -229
  127. package/tests/definitions/messaging.test.ts +0 -1468
  128. package/tests/definitions/outlook.test.ts +0 -30
  129. package/tests/definitions/process.test.ts +0 -469
  130. package/tests/definitions/slack.test.ts +0 -28
  131. package/tests/definitions/tool-inventory.test.ts +0 -218
  132. package/tests/e2e/delegation-quest-orchestration.e2e.test.ts +0 -433
  133. package/tests/e2e/memory-tool-discovery-contract.e2e.test.ts +0 -81
  134. package/tests/executors/apply-patch.test.ts +0 -538
  135. package/tests/executors/arion.test.ts +0 -309
  136. package/tests/executors/conversation-primitives.test.ts +0 -250
  137. package/tests/executors/deploy.test.ts +0 -746
  138. package/tests/executors/filesystem-tools.test.ts +0 -357
  139. package/tests/executors/filesystem.test.ts +0 -959
  140. package/tests/executors/frg-freshness.test.ts +0 -136
  141. package/tests/executors/frg-merge.test.ts +0 -70
  142. package/tests/executors/frg-session-content.test.ts +0 -40
  143. package/tests/executors/frg.test.ts +0 -56
  144. package/tests/executors/memory-bugfixes.test.ts +0 -257
  145. package/tests/executors/memory-real-memoria.integration.test.ts +0 -316
  146. package/tests/executors/memory.test.ts +0 -853
  147. package/tests/executors/meta-tools.test.ts +0 -411
  148. package/tests/executors/meta.test.ts +0 -683
  149. package/tests/executors/path-containment.test.ts +0 -51
  150. package/tests/executors/process-registry.test.ts +0 -505
  151. package/tests/executors/pty.test.ts +0 -664
  152. package/tests/executors/quest-security.test.ts +0 -249
  153. package/tests/executors/read-file-media.test.ts +0 -230
  154. package/tests/executors/recall-knowledge-schema.test.ts +0 -209
  155. package/tests/executors/recall-tags.test.ts +0 -278
  156. package/tests/executors/remember-null-safety.contract.test.ts +0 -41
  157. package/tests/executors/restart.test.ts +0 -67
  158. package/tests/executors/search-unified.test.ts +0 -381
  159. package/tests/executors/session-history.test.ts +0 -340
  160. package/tests/executors/session-transcript.test.ts +0 -561
  161. package/tests/executors/shell-abort.test.ts +0 -416
  162. package/tests/executors/shell-env-blocklist.test.ts +0 -648
  163. package/tests/executors/shell-env-process.test.ts +0 -245
  164. package/tests/executors/shell-process-registry.test.ts +0 -334
  165. package/tests/executors/shell-tools.test.ts +0 -393
  166. package/tests/executors/shell.test.ts +0 -690
  167. package/tests/executors/web-abort-vs-timeout.test.ts +0 -213
  168. package/tests/executors/web-integration.test.ts +0 -633
  169. package/tests/executors/web-symlink.test.ts +0 -18
  170. package/tests/executors/web.test.ts +0 -1400
  171. package/tests/executors/write-stdin.test.ts +0 -145
  172. package/tests/extraction/content-extraction.test.ts +0 -153
  173. package/tests/guards/tools-default-test-lane.integration.test.ts +0 -21
  174. package/tests/guards/tools-package-test-commands.e2e.test.ts +0 -43
  175. package/tests/guards/tools-test-lane-manifest.contract.test.ts +0 -76
  176. package/tests/guards/tools-vitest-workspace-alias.contract.test.ts +0 -63
  177. package/tests/helpers/async-waits.ts +0 -53
  178. package/tests/integration/headless-control-contract.integration.test.ts +0 -153
  179. package/tests/integration/memory-tool-schema-parity.integration.test.ts +0 -67
  180. package/tests/integration/meta-tools-round-trip.integration.test.ts +0 -506
  181. package/tests/integration/quest-round-trip.test.ts +0 -303
  182. package/tests/integration/registry-executor-flow.test.ts +0 -85
  183. package/tests/integration.test.ts +0 -177
  184. package/tests/loading-tier.test.ts +0 -126
  185. package/tests/mcp/client-reconnect.test.ts +0 -267
  186. package/tests/mcp/connection.test.ts +0 -846
  187. package/tests/mcp/injectable-logger.test.ts +0 -83
  188. package/tests/mcp/jsonrpc.test.ts +0 -109
  189. package/tests/mcp/lifecycle.test.ts +0 -879
  190. package/tests/network-runtime/address-types.contract.test.ts +0 -143
  191. package/tests/network-runtime/continuity-bind-schema.contract.test.ts +0 -203
  192. package/tests/network-runtime/local-control-contract.test.ts +0 -869
  193. package/tests/network-runtime/local-control-invite-token.contract.test.ts +0 -146
  194. package/tests/network-runtime/node-store-contract.test.ts +0 -11
  195. package/tests/network-runtime/pair-protocol-nodeid.contract.test.ts +0 -15
  196. package/tests/network-runtime/peer-state-machine.contract.test.ts +0 -148
  197. package/tests/network-runtime/protocol-schemas.contract.test.ts +0 -512
  198. package/tests/network-runtime/relay-pending-nodeid.contract.test.ts +0 -62
  199. package/tests/network-runtime/runtime-bootstrap-contract.test.ts +0 -227
  200. package/tests/network-runtime/runtime-socket-local-control-client.test.ts +0 -621
  201. package/tests/network-runtime/wait-for-message-script.test.ts +0 -288
  202. package/tests/parallel.test.ts +0 -71
  203. package/tests/policy.test.ts +0 -184
  204. package/tests/print-default-test-lane.ts +0 -14
  205. package/tests/print-test-lane-manifest.ts +0 -22
  206. package/tests/providers/brave.test.ts +0 -159
  207. package/tests/providers/duckduckgo.test.ts +0 -207
  208. package/tests/providers/exa.test.ts +0 -175
  209. package/tests/providers/firecrawl.test.ts +0 -168
  210. package/tests/providers/jina.test.ts +0 -144
  211. package/tests/providers/router.test.ts +0 -328
  212. package/tests/providers/tavily.test.ts +0 -165
  213. package/tests/registry/discovery.test.ts +0 -154
  214. package/tests/registry/injectable-logger.test.ts +0 -230
  215. package/tests/registry/input-validation.test.ts +0 -361
  216. package/tests/registry/interface-completeness.test.ts +0 -85
  217. package/tests/registry/mcp-integration.test.ts +0 -103
  218. package/tests/registry/mcp-read-only-hint.test.ts +0 -60
  219. package/tests/registry/memoria-discovery.test.ts +0 -390
  220. package/tests/registry/nested-validation.test.ts +0 -283
  221. package/tests/registry/pseudo-tool-filtering.test.ts +0 -258
  222. package/tests/registry/registration-lifecycle.test.ts +0 -133
  223. package/tests/registry-validation.test.ts +0 -424
  224. package/tests/registry.test.ts +0 -460
  225. package/tests/security/dns-pinning.test.ts +0 -162
  226. package/tests/security/external-content.test.ts +0 -144
  227. package/tests/security/ssrf.test.ts +0 -118
  228. package/tests/shell-safety-integration.test.ts +0 -32
  229. package/tests/shell-safety.test.ts +0 -365
  230. package/tests/slack/desktop-session.test.ts +0 -50
  231. package/tests/test-lane-manifest.ts +0 -440
  232. package/tests/test-utils.ts +0 -27
  233. package/tests/tool-factory.test.ts +0 -188
  234. package/tests/utils/retry.test.ts +0 -231
  235. package/tests/utils/url.test.ts +0 -63
  236. package/tsconfig.cjs.json +0 -24
  237. package/tsconfig.json +0 -12
  238. package/vitest.config.ts +0 -55
  239. package/vitest.e2e.config.ts +0 -24
  240. package/vitest.integration.config.ts +0 -24
  241. package/vitest.native.config.ts +0 -24
@@ -1,281 +0,0 @@
1
- /**
2
- * Content Extraction — HTML to Markdown conversion with article detection
3
- *
4
- * Three-tier fallback strategy:
5
- * 1. Readability.js (Mozilla) + Turndown for article-like content
6
- * 2. Turndown raw HTML if Readability fails
7
- * 3. Regex-based strip if both fail
8
- */
9
-
10
- import { JSDOM } from "jsdom";
11
- import { Readability } from "@mozilla/readability";
12
- import TurndownService from "turndown";
13
-
14
- /** Maximum content length (50K chars) */
15
- const MAX_CONTENT_LENGTH = 50_000;
16
-
17
- /** Timeout for Readability parse in milliseconds */
18
- const PARSE_TIMEOUT_MS = 10_000;
19
-
20
- /** Content types that should be parsed as HTML */
21
- const HTML_CONTENT_TYPES = ["text/html", "text/xhtml+xml", "application/xhtml+xml"];
22
-
23
- // Simple concurrency semaphore for JSDOM memory protection
24
- const configuredConcurrentExtractions = Number.parseInt(
25
- process.env.ARIA_MAX_CONCURRENT_EXTRACTIONS ?? "3",
26
- 10,
27
- );
28
- const MAX_CONCURRENT_EXTRACTIONS =
29
- Number.isFinite(configuredConcurrentExtractions) && configuredConcurrentExtractions > 0
30
- ? configuredConcurrentExtractions
31
- : 3;
32
- let activeExtractions = 0;
33
- const extractionQueue: Array<() => void> = [];
34
-
35
- async function acquireExtractionSlot(): Promise<void> {
36
- if (activeExtractions < MAX_CONCURRENT_EXTRACTIONS) {
37
- activeExtractions++;
38
- return;
39
- }
40
- return new Promise<void>((resolve) => {
41
- extractionQueue.push(() => {
42
- activeExtractions++;
43
- resolve();
44
- });
45
- });
46
- }
47
-
48
- function releaseExtractionSlot(): void {
49
- activeExtractions--;
50
- const next = extractionQueue.shift();
51
- if (next) next();
52
- }
53
-
54
- /**
55
- * Result of content extraction
56
- */
57
- export interface ExtractedContent {
58
- /** Page title */
59
- title: string;
60
- /** Extracted content as Markdown */
61
- content: string;
62
- /** Whether Readability detected article-like content */
63
- isArticle: boolean;
64
- }
65
-
66
- /**
67
- * Checks whether a Content-Type header value represents HTML content
68
- * that should be processed through Readability/Turndown extraction.
69
- */
70
- export function isHtmlContentType(contentType: string | undefined | null): boolean {
71
- if (!contentType) return true; // Default to HTML extraction if no Content-Type
72
- const [mime] = contentType.toLowerCase().split(";");
73
- const lower = (mime ?? "").trim();
74
- return HTML_CONTENT_TYPES.includes(lower);
75
- }
76
-
77
- /**
78
- * Extracts content from a response body, checking Content-Type first.
79
- * Non-HTML content types (JSON, PDF, images, etc.) are returned as-is
80
- * or with a descriptive message rather than being fed through Readability.
81
- *
82
- * @param body - The response body text
83
- * @param url - The source URL
84
- * @param contentType - The Content-Type header value (optional)
85
- * @returns Extracted content
86
- */
87
- export async function extractFromResponse(
88
- body: string,
89
- url: string,
90
- contentType?: string | null,
91
- ): Promise<ExtractedContent> {
92
- if (!isHtmlContentType(contentType)) {
93
- // Non-HTML content: return raw text truncated to limit
94
- const [rawMimeType] = contentType?.split(";") ?? [];
95
- const mimeType = (rawMimeType ?? "unknown").trim() || "unknown";
96
- const truncated = body.slice(0, MAX_CONTENT_LENGTH);
97
- return {
98
- title: "",
99
- content: truncated || `[Non-HTML content: ${mimeType}]`,
100
- isArticle: false,
101
- };
102
- }
103
- return extractContent(body, url);
104
- }
105
-
106
- /**
107
- * Wraps a synchronous/async operation with a timeout.
108
- * Returns the result if completed within the timeout, or rejects.
109
- */
110
- function withTimeout<T>(promise: Promise<T>, ms: number, label: string): Promise<T> {
111
- return new Promise<T>((resolve, reject) => {
112
- const timer = setTimeout(() => {
113
- reject(new Error(`${label} timed out after ${ms}ms`));
114
- }, ms);
115
- promise.then(
116
- (val) => {
117
- clearTimeout(timer);
118
- resolve(val);
119
- },
120
- (err) => {
121
- clearTimeout(timer);
122
- reject(err);
123
- },
124
- );
125
- });
126
- }
127
-
128
- function extractOversizedHtmlTitle(html: string): string {
129
- const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
130
- return match?.[1]?.replace(/\s+/g, " ").trim() ?? "";
131
- }
132
-
133
- /**
134
- * Extracts content from HTML and converts to Markdown.
135
- *
136
- * Strategy:
137
- * 1. Try Readability.js (article detection) + Turndown
138
- * 2. Fall back to Turndown on raw HTML
139
- * 3. Fall back to regex-based tag stripping
140
- *
141
- * @param html - The HTML content to extract from
142
- * @param url - The source URL (for link resolution)
143
- * @returns Extracted content with title and article detection
144
- */
145
- export async function extractContent(html: string, url: string): Promise<ExtractedContent> {
146
- // Handle empty input
147
- if (!html || html.trim().length === 0) {
148
- return { title: "", content: "", isArticle: false };
149
- }
150
-
151
- // Oversized pages will be truncated anyway, so skip expensive DOM/readability
152
- // work and strip directly from the raw response body.
153
- if (html.length > MAX_CONTENT_LENGTH * 2) {
154
- return {
155
- title: extractOversizedHtmlTitle(html),
156
- content: stripHtmlTags(html).slice(0, MAX_CONTENT_LENGTH),
157
- isArticle: false,
158
- };
159
- }
160
-
161
- await acquireExtractionSlot();
162
- try {
163
- // Parse HTML with JSDOM
164
- const dom = new JSDOM(html, { url });
165
- try {
166
- const document = dom.window.document;
167
-
168
- // Extract title
169
- const title = document.querySelector("title")?.textContent?.trim() || "";
170
-
171
- // Detect semantic article markers before Readability mutates the DOM
172
- const hasArticleMarkers = !!document.querySelector(
173
- "article, [role='article'], [role='main'], [itemtype*='Article']",
174
- );
175
-
176
- // Initialize Turndown for Markdown conversion
177
- const turndown = new TurndownService({
178
- headingStyle: "atx",
179
- codeBlockStyle: "fenced",
180
- });
181
- // Remove noise elements before conversion
182
- turndown.remove(["script", "style", "meta", "link", "noscript"]);
183
-
184
- // Tier 1: Try Readability.js for article extraction (only for article-like pages)
185
- if (hasArticleMarkers) {
186
- try {
187
- const reader = new Readability(document.cloneNode(true) as Document);
188
- // Wrap Readability.parse() with a timeout to prevent hung parsing on malformed HTML
189
- const article = await withTimeout(
190
- Promise.resolve(reader.parse()),
191
- PARSE_TIMEOUT_MS,
192
- "Readability.parse()",
193
- );
194
-
195
- if (article && article.content) {
196
- // Readability succeeded — convert to Markdown, normalize URLs
197
- let markdown = turndown.turndown(article.content);
198
- markdown = normalizeTrailingSlashes(markdown);
199
- const truncated = markdown.slice(0, MAX_CONTENT_LENGTH);
200
- return {
201
- title: article.title || title,
202
- content: truncated,
203
- isArticle: true,
204
- };
205
- }
206
- } catch {
207
- // Readability failed or timed out, fall through to Tier 2
208
- }
209
- }
210
-
211
- // Tier 2: Turndown on raw body HTML (non-article pages or Readability failure)
212
- try {
213
- const bodyHtml = document.body?.innerHTML || "";
214
- if (bodyHtml) {
215
- let markdown = turndown.turndown(bodyHtml);
216
- markdown = normalizeTrailingSlashes(markdown);
217
- const truncated = markdown.slice(0, MAX_CONTENT_LENGTH);
218
- return {
219
- title,
220
- content: truncated,
221
- isArticle: false,
222
- };
223
- }
224
- } catch {
225
- // Turndown failed, fall through to Tier 3
226
- }
227
-
228
- // Tier 3: Regex-based fallback (strip tags, extract text)
229
- const textContent = stripHtmlTags(html);
230
- const truncated = textContent.slice(0, MAX_CONTENT_LENGTH);
231
- return {
232
- title,
233
- content: truncated,
234
- isArticle: false,
235
- };
236
- } finally {
237
- dom.window.close();
238
- }
239
- } catch {
240
- // Catastrophic failure — return empty result
241
- return { title: "", content: "", isArticle: false };
242
- } finally {
243
- releaseExtractionSlot();
244
- }
245
- }
246
-
247
- /**
248
- * Remove trailing slashes from URLs in markdown links that were added by JSDOM normalization.
249
- * JSDOM normalizes `https://example.com` → `https://example.com/`, breaking exact URL matches.
250
- */
251
- function normalizeTrailingSlashes(markdown: string): string {
252
- return markdown.replace(/\]\(([^)]+?)\/\)/g, (match, url) => {
253
- // Only strip trailing slash from URLs that are just a domain (no path)
254
- try {
255
- const parsed = new URL(url + "/");
256
- if (parsed.pathname === "/") {
257
- return `](${url})`;
258
- }
259
- } catch {
260
- // Not a valid URL, leave as-is
261
- }
262
- return match;
263
- });
264
- }
265
-
266
- /**
267
- * Strips HTML tags using regex (last resort fallback)
268
- */
269
- function stripHtmlTags(html: string): string {
270
- return (
271
- html
272
- // Remove script and style tags and their content
273
- .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, "")
274
- .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, "")
275
- // Remove all other HTML tags
276
- .replace(/<[^>]+>/g, " ")
277
- // Collapse multiple whitespace
278
- .replace(/\s+/g, " ")
279
- .trim()
280
- );
281
- }
@@ -1,5 +0,0 @@
1
- /**
2
- * Content extraction utilities
3
- */
4
-
5
- export { extractContent, type ExtractedContent } from "./content-extraction.js";