@aria-cli/tools 1.0.11 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. package/dist-cjs/index.js +400 -436
  2. package/dist-cjs/network-runtime/index.js +8 -173
  3. package/package.json +6 -6
  4. package/dist-cjs/ask-user-interaction.js +0 -28
  5. package/dist-cjs/ask-user-interaction.js.map +0 -1
  6. package/dist-cjs/cache/web-cache.js +0 -71
  7. package/dist-cjs/cache/web-cache.js.map +0 -1
  8. package/dist-cjs/definitions/arion.js +0 -108
  9. package/dist-cjs/definitions/arion.js.map +0 -1
  10. package/dist-cjs/definitions/browser/browser.js +0 -422
  11. package/dist-cjs/definitions/browser/browser.js.map +0 -1
  12. package/dist-cjs/definitions/browser/index.js +0 -9
  13. package/dist-cjs/definitions/browser/index.js.map +0 -1
  14. package/dist-cjs/definitions/browser/pw-downloads.js +0 -118
  15. package/dist-cjs/definitions/browser/pw-downloads.js.map +0 -1
  16. package/dist-cjs/definitions/browser/pw-interactions.js +0 -214
  17. package/dist-cjs/definitions/browser/pw-interactions.js.map +0 -1
  18. package/dist-cjs/definitions/browser/pw-responses.js +0 -85
  19. package/dist-cjs/definitions/browser/pw-responses.js.map +0 -1
  20. package/dist-cjs/definitions/browser/pw-session.js +0 -327
  21. package/dist-cjs/definitions/browser/pw-session.js.map +0 -1
  22. package/dist-cjs/definitions/browser/pw-shared.js +0 -73
  23. package/dist-cjs/definitions/browser/pw-shared.js.map +0 -1
  24. package/dist-cjs/definitions/browser/pw-snapshot.js +0 -308
  25. package/dist-cjs/definitions/browser/pw-snapshot.js.map +0 -1
  26. package/dist-cjs/definitions/browser/pw-state.js +0 -71
  27. package/dist-cjs/definitions/browser/pw-state.js.map +0 -1
  28. package/dist-cjs/definitions/browser/types.js +0 -6
  29. package/dist-cjs/definitions/browser/types.js.map +0 -1
  30. package/dist-cjs/definitions/code-intelligence.js +0 -474
  31. package/dist-cjs/definitions/code-intelligence.js.map +0 -1
  32. package/dist-cjs/definitions/core.js +0 -134
  33. package/dist-cjs/definitions/core.js.map +0 -1
  34. package/dist-cjs/definitions/delegation.js +0 -516
  35. package/dist-cjs/definitions/delegation.js.map +0 -1
  36. package/dist-cjs/definitions/deploy.js +0 -69
  37. package/dist-cjs/definitions/deploy.js.map +0 -1
  38. package/dist-cjs/definitions/filesystem.js +0 -200
  39. package/dist-cjs/definitions/filesystem.js.map +0 -1
  40. package/dist-cjs/definitions/frg.js +0 -67
  41. package/dist-cjs/definitions/frg.js.map +0 -1
  42. package/dist-cjs/definitions/index.js +0 -44
  43. package/dist-cjs/definitions/index.js.map +0 -1
  44. package/dist-cjs/definitions/memory.js +0 -127
  45. package/dist-cjs/definitions/memory.js.map +0 -1
  46. package/dist-cjs/definitions/messaging.js +0 -632
  47. package/dist-cjs/definitions/messaging.js.map +0 -1
  48. package/dist-cjs/definitions/meta.js +0 -353
  49. package/dist-cjs/definitions/meta.js.map +0 -1
  50. package/dist-cjs/definitions/network.js +0 -163
  51. package/dist-cjs/definitions/network.js.map +0 -1
  52. package/dist-cjs/definitions/outlook.js +0 -281
  53. package/dist-cjs/definitions/outlook.js.map +0 -1
  54. package/dist-cjs/definitions/patch/apply-patch.js +0 -192
  55. package/dist-cjs/definitions/patch/apply-patch.js.map +0 -1
  56. package/dist-cjs/definitions/patch/fuzzy-match.js +0 -173
  57. package/dist-cjs/definitions/patch/fuzzy-match.js.map +0 -1
  58. package/dist-cjs/definitions/patch/index.js +0 -6
  59. package/dist-cjs/definitions/patch/index.js.map +0 -1
  60. package/dist-cjs/definitions/patch/patch-parser.js +0 -216
  61. package/dist-cjs/definitions/patch/patch-parser.js.map +0 -1
  62. package/dist-cjs/definitions/patch/sandbox-paths.js +0 -114
  63. package/dist-cjs/definitions/patch/sandbox-paths.js.map +0 -1
  64. package/dist-cjs/definitions/process/index.js +0 -9
  65. package/dist-cjs/definitions/process/index.js.map +0 -1
  66. package/dist-cjs/definitions/process/process-registry.js +0 -232
  67. package/dist-cjs/definitions/process/process-registry.js.map +0 -1
  68. package/dist-cjs/definitions/process/process.js +0 -390
  69. package/dist-cjs/definitions/process/process.js.map +0 -1
  70. package/dist-cjs/definitions/process/pty-keys.js +0 -260
  71. package/dist-cjs/definitions/process/pty-keys.js.map +0 -1
  72. package/dist-cjs/definitions/process/session-slug.js +0 -146
  73. package/dist-cjs/definitions/process/session-slug.js.map +0 -1
  74. package/dist-cjs/definitions/quip.js +0 -199
  75. package/dist-cjs/definitions/quip.js.map +0 -1
  76. package/dist-cjs/definitions/search.js +0 -64
  77. package/dist-cjs/definitions/search.js.map +0 -1
  78. package/dist-cjs/definitions/session-history.js +0 -73
  79. package/dist-cjs/definitions/session-history.js.map +0 -1
  80. package/dist-cjs/definitions/shell.js +0 -185
  81. package/dist-cjs/definitions/shell.js.map +0 -1
  82. package/dist-cjs/definitions/slack.js +0 -184
  83. package/dist-cjs/definitions/slack.js.map +0 -1
  84. package/dist-cjs/definitions/web.js +0 -113
  85. package/dist-cjs/definitions/web.js.map +0 -1
  86. package/dist-cjs/executors/apply-patch.js +0 -939
  87. package/dist-cjs/executors/apply-patch.js.map +0 -1
  88. package/dist-cjs/executors/arion.js +0 -126
  89. package/dist-cjs/executors/arion.js.map +0 -1
  90. package/dist-cjs/executors/code-intelligence.js +0 -926
  91. package/dist-cjs/executors/code-intelligence.js.map +0 -1
  92. package/dist-cjs/executors/deploy.js +0 -870
  93. package/dist-cjs/executors/deploy.js.map +0 -1
  94. package/dist-cjs/executors/filesystem.js +0 -1168
  95. package/dist-cjs/executors/filesystem.js.map +0 -1
  96. package/dist-cjs/executors/frg-freshness.js +0 -628
  97. package/dist-cjs/executors/frg-freshness.js.map +0 -1
  98. package/dist-cjs/executors/frg.js +0 -335
  99. package/dist-cjs/executors/frg.js.map +0 -1
  100. package/dist-cjs/executors/index.js +0 -144
  101. package/dist-cjs/executors/index.js.map +0 -1
  102. package/dist-cjs/executors/learning-meta.js +0 -1166
  103. package/dist-cjs/executors/learning-meta.js.map +0 -1
  104. package/dist-cjs/executors/lsp-client.js +0 -311
  105. package/dist-cjs/executors/lsp-client.js.map +0 -1
  106. package/dist-cjs/executors/memory.js +0 -797
  107. package/dist-cjs/executors/memory.js.map +0 -1
  108. package/dist-cjs/executors/meta.js +0 -227
  109. package/dist-cjs/executors/meta.js.map +0 -1
  110. package/dist-cjs/executors/process-registry.js +0 -470
  111. package/dist-cjs/executors/process-registry.js.map +0 -1
  112. package/dist-cjs/executors/pty-session-store.js +0 -35
  113. package/dist-cjs/executors/pty-session-store.js.map +0 -1
  114. package/dist-cjs/executors/pty.js +0 -313
  115. package/dist-cjs/executors/pty.js.map +0 -1
  116. package/dist-cjs/executors/restart.js +0 -156
  117. package/dist-cjs/executors/restart.js.map +0 -1
  118. package/dist-cjs/executors/search-freshness.js +0 -235
  119. package/dist-cjs/executors/search-freshness.js.map +0 -1
  120. package/dist-cjs/executors/search-types.js +0 -57
  121. package/dist-cjs/executors/search-types.js.map +0 -1
  122. package/dist-cjs/executors/search.js +0 -103
  123. package/dist-cjs/executors/search.js.map +0 -1
  124. package/dist-cjs/executors/self-diagnose.js +0 -435
  125. package/dist-cjs/executors/self-diagnose.js.map +0 -1
  126. package/dist-cjs/executors/session-history.js +0 -321
  127. package/dist-cjs/executors/session-history.js.map +0 -1
  128. package/dist-cjs/executors/shell-safety.js +0 -479
  129. package/dist-cjs/executors/shell-safety.js.map +0 -1
  130. package/dist-cjs/executors/shell.js +0 -1002
  131. package/dist-cjs/executors/shell.js.map +0 -1
  132. package/dist-cjs/executors/utils.js +0 -74
  133. package/dist-cjs/executors/utils.js.map +0 -1
  134. package/dist-cjs/executors/web.js +0 -548
  135. package/dist-cjs/executors/web.js.map +0 -1
  136. package/dist-cjs/extraction/content-extraction.js +0 -244
  137. package/dist-cjs/extraction/content-extraction.js.map +0 -1
  138. package/dist-cjs/extraction/index.js +0 -9
  139. package/dist-cjs/extraction/index.js.map +0 -1
  140. package/dist-cjs/headless-control-contract.js +0 -973
  141. package/dist-cjs/headless-control-contract.js.map +0 -1
  142. package/dist-cjs/index.js.map +0 -1
  143. package/dist-cjs/local-control-http-auth.js +0 -6
  144. package/dist-cjs/local-control-http-auth.js.map +0 -1
  145. package/dist-cjs/mcp/client.js +0 -186
  146. package/dist-cjs/mcp/client.js.map +0 -1
  147. package/dist-cjs/mcp/connection.js +0 -485
  148. package/dist-cjs/mcp/connection.js.map +0 -1
  149. package/dist-cjs/mcp/index.js +0 -31
  150. package/dist-cjs/mcp/index.js.map +0 -1
  151. package/dist-cjs/mcp/jsonrpc.js +0 -149
  152. package/dist-cjs/mcp/jsonrpc.js.map +0 -1
  153. package/dist-cjs/mcp/types.js +0 -9
  154. package/dist-cjs/mcp/types.js.map +0 -1
  155. package/dist-cjs/network-control-adapter.js +0 -78
  156. package/dist-cjs/network-control-adapter.js.map +0 -1
  157. package/dist-cjs/network-runtime/address-types.js +0 -169
  158. package/dist-cjs/network-runtime/address-types.js.map +0 -1
  159. package/dist-cjs/network-runtime/db-owner-fencing.js +0 -77
  160. package/dist-cjs/network-runtime/db-owner-fencing.js.map +0 -1
  161. package/dist-cjs/network-runtime/delivery-receipts.js +0 -277
  162. package/dist-cjs/network-runtime/delivery-receipts.js.map +0 -1
  163. package/dist-cjs/network-runtime/direct-endpoint-authority.js +0 -30
  164. package/dist-cjs/network-runtime/direct-endpoint-authority.js.map +0 -1
  165. package/dist-cjs/network-runtime/index.js.map +0 -1
  166. package/dist-cjs/network-runtime/local-control-contract.js +0 -634
  167. package/dist-cjs/network-runtime/local-control-contract.js.map +0 -1
  168. package/dist-cjs/network-runtime/node-store-contract.js +0 -39
  169. package/dist-cjs/network-runtime/node-store-contract.js.map +0 -1
  170. package/dist-cjs/network-runtime/pair-route-contract.js +0 -81
  171. package/dist-cjs/network-runtime/pair-route-contract.js.map +0 -1
  172. package/dist-cjs/network-runtime/peer-capabilities.js +0 -38
  173. package/dist-cjs/network-runtime/peer-capabilities.js.map +0 -1
  174. package/dist-cjs/network-runtime/peer-principal-ref.js +0 -16
  175. package/dist-cjs/network-runtime/peer-principal-ref.js.map +0 -1
  176. package/dist-cjs/network-runtime/peer-state-machine.js +0 -130
  177. package/dist-cjs/network-runtime/peer-state-machine.js.map +0 -1
  178. package/dist-cjs/network-runtime/protocol-schemas.js +0 -213
  179. package/dist-cjs/network-runtime/protocol-schemas.js.map +0 -1
  180. package/dist-cjs/network-runtime/runtime-bootstrap-contract.js +0 -64
  181. package/dist-cjs/network-runtime/runtime-bootstrap-contract.js.map +0 -1
  182. package/dist-cjs/outlook/desktop-session.js +0 -319
  183. package/dist-cjs/outlook/desktop-session.js.map +0 -1
  184. package/dist-cjs/policy.js +0 -156
  185. package/dist-cjs/policy.js.map +0 -1
  186. package/dist-cjs/providers/brave.js +0 -67
  187. package/dist-cjs/providers/brave.js.map +0 -1
  188. package/dist-cjs/providers/duckduckgo.js +0 -181
  189. package/dist-cjs/providers/duckduckgo.js.map +0 -1
  190. package/dist-cjs/providers/exa.js +0 -68
  191. package/dist-cjs/providers/exa.js.map +0 -1
  192. package/dist-cjs/providers/firecrawl.js +0 -60
  193. package/dist-cjs/providers/firecrawl.js.map +0 -1
  194. package/dist-cjs/providers/index.js +0 -18
  195. package/dist-cjs/providers/index.js.map +0 -1
  196. package/dist-cjs/providers/jina.js +0 -54
  197. package/dist-cjs/providers/jina.js.map +0 -1
  198. package/dist-cjs/providers/router.js +0 -101
  199. package/dist-cjs/providers/router.js.map +0 -1
  200. package/dist-cjs/providers/search-provider.js +0 -37
  201. package/dist-cjs/providers/search-provider.js.map +0 -1
  202. package/dist-cjs/providers/tavily.js +0 -59
  203. package/dist-cjs/providers/tavily.js.map +0 -1
  204. package/dist-cjs/quip/desktop-session.js +0 -354
  205. package/dist-cjs/quip/desktop-session.js.map +0 -1
  206. package/dist-cjs/registry/index.js +0 -7
  207. package/dist-cjs/registry/index.js.map +0 -1
  208. package/dist-cjs/registry/registry.js +0 -762
  209. package/dist-cjs/registry/registry.js.map +0 -1
  210. package/dist-cjs/runtime-socket-local-control-client.js +0 -368
  211. package/dist-cjs/runtime-socket-local-control-client.js.map +0 -1
  212. package/dist-cjs/security/dns-normalization.js +0 -23
  213. package/dist-cjs/security/dns-normalization.js.map +0 -1
  214. package/dist-cjs/security/dns-pinning.js +0 -161
  215. package/dist-cjs/security/dns-pinning.js.map +0 -1
  216. package/dist-cjs/security/external-content.js +0 -96
  217. package/dist-cjs/security/external-content.js.map +0 -1
  218. package/dist-cjs/security/ssrf.js +0 -222
  219. package/dist-cjs/security/ssrf.js.map +0 -1
  220. package/dist-cjs/slack/desktop-session.js +0 -367
  221. package/dist-cjs/slack/desktop-session.js.map +0 -1
  222. package/dist-cjs/tool-factory.js +0 -51
  223. package/dist-cjs/tool-factory.js.map +0 -1
  224. package/dist-cjs/types.js +0 -9
  225. package/dist-cjs/types.js.map +0 -1
  226. package/dist-cjs/utils/retry.js +0 -170
  227. package/dist-cjs/utils/retry.js.map +0 -1
  228. package/dist-cjs/utils/safe-parse-json.js +0 -165
  229. package/dist-cjs/utils/safe-parse-json.js.map +0 -1
  230. package/dist-cjs/utils/url.js +0 -24
  231. package/dist-cjs/utils/url.js.map +0 -1
@@ -1,244 +0,0 @@
1
- "use strict";
2
- /**
3
- * Content Extraction — HTML to Markdown conversion with article detection
4
- *
5
- * Three-tier fallback strategy:
6
- * 1. Readability.js (Mozilla) + Turndown for article-like content
7
- * 2. Turndown raw HTML if Readability fails
8
- * 3. Regex-based strip if both fail
9
- */
10
- var __importDefault = (this && this.__importDefault) || function (mod) {
11
- return (mod && mod.__esModule) ? mod : { "default": mod };
12
- };
13
- Object.defineProperty(exports, "__esModule", { value: true });
14
- exports.isHtmlContentType = isHtmlContentType;
15
- exports.extractFromResponse = extractFromResponse;
16
- exports.extractContent = extractContent;
17
- const jsdom_1 = require("jsdom");
18
- const readability_1 = require("@mozilla/readability");
19
- const turndown_1 = __importDefault(require("turndown"));
20
- /** Maximum content length (50K chars) */
21
- const MAX_CONTENT_LENGTH = 50_000;
22
- /** Timeout for Readability parse in milliseconds */
23
- const PARSE_TIMEOUT_MS = 10_000;
24
- /** Content types that should be parsed as HTML */
25
- const HTML_CONTENT_TYPES = ["text/html", "text/xhtml+xml", "application/xhtml+xml"];
26
- // Simple concurrency semaphore for JSDOM memory protection
27
- const configuredConcurrentExtractions = Number.parseInt(process.env.ARIA_MAX_CONCURRENT_EXTRACTIONS ?? "3", 10);
28
- const MAX_CONCURRENT_EXTRACTIONS = Number.isFinite(configuredConcurrentExtractions) && configuredConcurrentExtractions > 0
29
- ? configuredConcurrentExtractions
30
- : 3;
31
- let activeExtractions = 0;
32
- const extractionQueue = [];
33
- async function acquireExtractionSlot() {
34
- if (activeExtractions < MAX_CONCURRENT_EXTRACTIONS) {
35
- activeExtractions++;
36
- return;
37
- }
38
- return new Promise((resolve) => {
39
- extractionQueue.push(() => {
40
- activeExtractions++;
41
- resolve();
42
- });
43
- });
44
- }
45
- function releaseExtractionSlot() {
46
- activeExtractions--;
47
- const next = extractionQueue.shift();
48
- if (next)
49
- next();
50
- }
51
- /**
52
- * Checks whether a Content-Type header value represents HTML content
53
- * that should be processed through Readability/Turndown extraction.
54
- */
55
- function isHtmlContentType(contentType) {
56
- if (!contentType)
57
- return true; // Default to HTML extraction if no Content-Type
58
- const [mime] = contentType.toLowerCase().split(";");
59
- const lower = (mime ?? "").trim();
60
- return HTML_CONTENT_TYPES.includes(lower);
61
- }
62
- /**
63
- * Extracts content from a response body, checking Content-Type first.
64
- * Non-HTML content types (JSON, PDF, images, etc.) are returned as-is
65
- * or with a descriptive message rather than being fed through Readability.
66
- *
67
- * @param body - The response body text
68
- * @param url - The source URL
69
- * @param contentType - The Content-Type header value (optional)
70
- * @returns Extracted content
71
- */
72
- async function extractFromResponse(body, url, contentType) {
73
- if (!isHtmlContentType(contentType)) {
74
- // Non-HTML content: return raw text truncated to limit
75
- const [rawMimeType] = contentType?.split(";") ?? [];
76
- const mimeType = (rawMimeType ?? "unknown").trim() || "unknown";
77
- const truncated = body.slice(0, MAX_CONTENT_LENGTH);
78
- return {
79
- title: "",
80
- content: truncated || `[Non-HTML content: ${mimeType}]`,
81
- isArticle: false,
82
- };
83
- }
84
- return extractContent(body, url);
85
- }
86
- /**
87
- * Wraps a synchronous/async operation with a timeout.
88
- * Returns the result if completed within the timeout, or rejects.
89
- */
90
- function withTimeout(promise, ms, label) {
91
- return new Promise((resolve, reject) => {
92
- const timer = setTimeout(() => {
93
- reject(new Error(`${label} timed out after ${ms}ms`));
94
- }, ms);
95
- promise.then((val) => {
96
- clearTimeout(timer);
97
- resolve(val);
98
- }, (err) => {
99
- clearTimeout(timer);
100
- reject(err);
101
- });
102
- });
103
- }
104
- function extractOversizedHtmlTitle(html) {
105
- const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
106
- return match?.[1]?.replace(/\s+/g, " ").trim() ?? "";
107
- }
108
- /**
109
- * Extracts content from HTML and converts to Markdown.
110
- *
111
- * Strategy:
112
- * 1. Try Readability.js (article detection) + Turndown
113
- * 2. Fall back to Turndown on raw HTML
114
- * 3. Fall back to regex-based tag stripping
115
- *
116
- * @param html - The HTML content to extract from
117
- * @param url - The source URL (for link resolution)
118
- * @returns Extracted content with title and article detection
119
- */
120
- async function extractContent(html, url) {
121
- // Handle empty input
122
- if (!html || html.trim().length === 0) {
123
- return { title: "", content: "", isArticle: false };
124
- }
125
- // Oversized pages will be truncated anyway, so skip expensive DOM/readability
126
- // work and strip directly from the raw response body.
127
- if (html.length > MAX_CONTENT_LENGTH * 2) {
128
- return {
129
- title: extractOversizedHtmlTitle(html),
130
- content: stripHtmlTags(html).slice(0, MAX_CONTENT_LENGTH),
131
- isArticle: false,
132
- };
133
- }
134
- await acquireExtractionSlot();
135
- try {
136
- // Parse HTML with JSDOM
137
- const dom = new jsdom_1.JSDOM(html, { url });
138
- try {
139
- const document = dom.window.document;
140
- // Extract title
141
- const title = document.querySelector("title")?.textContent?.trim() || "";
142
- // Detect semantic article markers before Readability mutates the DOM
143
- const hasArticleMarkers = !!document.querySelector("article, [role='article'], [role='main'], [itemtype*='Article']");
144
- // Initialize Turndown for Markdown conversion
145
- const turndown = new turndown_1.default({
146
- headingStyle: "atx",
147
- codeBlockStyle: "fenced",
148
- });
149
- // Remove noise elements before conversion
150
- turndown.remove(["script", "style", "meta", "link", "noscript"]);
151
- // Tier 1: Try Readability.js for article extraction (only for article-like pages)
152
- if (hasArticleMarkers) {
153
- try {
154
- const reader = new readability_1.Readability(document.cloneNode(true));
155
- // Wrap Readability.parse() with a timeout to prevent hung parsing on malformed HTML
156
- const article = await withTimeout(Promise.resolve(reader.parse()), PARSE_TIMEOUT_MS, "Readability.parse()");
157
- if (article && article.content) {
158
- // Readability succeeded — convert to Markdown, normalize URLs
159
- let markdown = turndown.turndown(article.content);
160
- markdown = normalizeTrailingSlashes(markdown);
161
- const truncated = markdown.slice(0, MAX_CONTENT_LENGTH);
162
- return {
163
- title: article.title || title,
164
- content: truncated,
165
- isArticle: true,
166
- };
167
- }
168
- }
169
- catch {
170
- // Readability failed or timed out, fall through to Tier 2
171
- }
172
- }
173
- // Tier 2: Turndown on raw body HTML (non-article pages or Readability failure)
174
- try {
175
- const bodyHtml = document.body?.innerHTML || "";
176
- if (bodyHtml) {
177
- let markdown = turndown.turndown(bodyHtml);
178
- markdown = normalizeTrailingSlashes(markdown);
179
- const truncated = markdown.slice(0, MAX_CONTENT_LENGTH);
180
- return {
181
- title,
182
- content: truncated,
183
- isArticle: false,
184
- };
185
- }
186
- }
187
- catch {
188
- // Turndown failed, fall through to Tier 3
189
- }
190
- // Tier 3: Regex-based fallback (strip tags, extract text)
191
- const textContent = stripHtmlTags(html);
192
- const truncated = textContent.slice(0, MAX_CONTENT_LENGTH);
193
- return {
194
- title,
195
- content: truncated,
196
- isArticle: false,
197
- };
198
- }
199
- finally {
200
- dom.window.close();
201
- }
202
- }
203
- catch {
204
- // Catastrophic failure — return empty result
205
- return { title: "", content: "", isArticle: false };
206
- }
207
- finally {
208
- releaseExtractionSlot();
209
- }
210
- }
211
- /**
212
- * Remove trailing slashes from URLs in markdown links that were added by JSDOM normalization.
213
- * JSDOM normalizes `https://example.com` → `https://example.com/`, breaking exact URL matches.
214
- */
215
- function normalizeTrailingSlashes(markdown) {
216
- return markdown.replace(/\]\(([^)]+?)\/\)/g, (match, url) => {
217
- // Only strip trailing slash from URLs that are just a domain (no path)
218
- try {
219
- const parsed = new URL(url + "/");
220
- if (parsed.pathname === "/") {
221
- return `](${url})`;
222
- }
223
- }
224
- catch {
225
- // Not a valid URL, leave as-is
226
- }
227
- return match;
228
- });
229
- }
230
- /**
231
- * Strips HTML tags using regex (last resort fallback)
232
- */
233
- function stripHtmlTags(html) {
234
- return (html
235
- // Remove script and style tags and their content
236
- .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, "")
237
- .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, "")
238
- // Remove all other HTML tags
239
- .replace(/<[^>]+>/g, " ")
240
- // Collapse multiple whitespace
241
- .replace(/\s+/g, " ")
242
- .trim());
243
- }
244
- //# sourceMappingURL=content-extraction.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"content-extraction.js","sourceRoot":"","sources":["../../src/extraction/content-extraction.ts"],"names":[],"mappings":";AAAA;;;;;;;GAOG;;;;;AA8DH,8CAKC;AAYD,kDAiBC;AAyCD,wCAoGC;AA3OD,iCAA8B;AAC9B,sDAAmD;AACnD,wDAAuC;AAEvC,yCAAyC;AACzC,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAElC,oDAAoD;AACpD,MAAM,gBAAgB,GAAG,MAAM,CAAC;AAEhC,kDAAkD;AAClD,MAAM,kBAAkB,GAAG,CAAC,WAAW,EAAE,gBAAgB,EAAE,uBAAuB,CAAC,CAAC;AAEpF,2DAA2D;AAC3D,MAAM,+BAA+B,GAAG,MAAM,CAAC,QAAQ,CACrD,OAAO,CAAC,GAAG,CAAC,+BAA+B,IAAI,GAAG,EAClD,EAAE,CACH,CAAC;AACF,MAAM,0BAA0B,GAC9B,MAAM,CAAC,QAAQ,CAAC,+BAA+B,CAAC,IAAI,+BAA+B,GAAG,CAAC;IACrF,CAAC,CAAC,+BAA+B;IACjC,CAAC,CAAC,CAAC,CAAC;AACR,IAAI,iBAAiB,GAAG,CAAC,CAAC;AAC1B,MAAM,eAAe,GAAsB,EAAE,CAAC;AAE9C,KAAK,UAAU,qBAAqB;IAClC,IAAI,iBAAiB,GAAG,0BAA0B,EAAE,CAAC;QACnD,iBAAiB,EAAE,CAAC;QACpB,OAAO;IACT,CAAC;IACD,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;QACnC,eAAe,CAAC,IAAI,CAAC,GAAG,EAAE;YACxB,iBAAiB,EAAE,CAAC;YACpB,OAAO,EAAE,CAAC;QACZ,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,qBAAqB;IAC5B,iBAAiB,EAAE,CAAC;IACpB,MAAM,IAAI,GAAG,eAAe,CAAC,KAAK,EAAE,CAAC;IACrC,IAAI,IAAI;QAAE,IAAI,EAAE,CAAC;AACnB,CAAC;AAcD;;;GAGG;AACH,SAAgB,iBAAiB,CAAC,WAAsC;IACtE,IAAI,CAAC,WAAW;QAAE,OAAO,IAAI,CAAC,CAAC,gDAAgD;IAC/E,MAAM,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACpD,MAAM,KAAK,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAClC,OAAO,kBAAkB,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AAC5C,CAAC;AAED;;;;;;;;;GASG;AACI,KAAK,UAAU,mBAAmB,CACvC,IAAY,EACZ,GAAW,EACX,WAA2B;IAE3B,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,EAAE,CAAC;QACpC,uDAAuD;QACvD,MAAM,CAAC,WAAW,CAAC,GAAG,WAAW,EAAE,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QACpD,MAAM,QAAQ,GAAG,CAAC,WAAW,IAAI,SAAS,CAAC,CAAC,IAAI,EAAE,IAAI,SAAS,CAAC;QAChE,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC,CAAC;QACpD,OAAO;YACL,KAAK,EAAE,EAAE;YACT,OAAO,EAAE,SAAS,IAAI,sBAAsB,QAAQ,GAAG;YACvD,SAAS,EAAE,KAAK;SACjB,CAAC;IACJ,CAAC;IACD,OAAO,cAAc,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;AACnC,CAAC;AAED;;;GAGG;AACH,SAAS,WAAW,CAAI,OAAmB,EAAE,EAAU,EAAE,KAAa;IACpE,OAAO,IAAI,OAAO,CAAI,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACxC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;YAC5B,MAAM,CAAC,IAAI,KAAK,CAAC,GAAG,KAAK,oBAAoB,EAAE,IAAI,CAAC,CAAC,CAAC;QACxD,CAAC,EAAE,EAAE,CAAC,CAAC;QACP,OAAO,CAAC,IAAI,CACV,CAAC,GAAG,EAAE,EAAE;YACN,YAAY,CAAC,KAAK,CAAC,CAAC;YACpB,OAAO,CAAC,GAAG,CAAC,CAAC;QACf,CAAC,EACD,CAAC,GAAG,EAAE,EAAE;YACN,YAAY,CAAC,KAAK,CAAC,CAAC;YACpB,MAAM,CAAC,GAAG,CAAC,CAAC;QACd,CAAC,CACF,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,yBAAyB,CAAC,IAAY;IAC7C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;IAC7D,OAAO,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;AACvD,CAAC;AAED;;;;;;;;;;;GAWG;AACI,KAAK,UAAU,cAAc,CAAC,IAAY,EAAE,GAAW;IAC5D,qBAAqB;IACrB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;IACtD,CAAC;IAED,8EAA8E;IAC9E,sDAAsD;IACtD,IAAI,IAAI,CAAC,MAAM,GAAG,kBAAkB,GAAG,CAAC,EAAE,CAAC;QACzC,OAAO;YACL,KAAK,EAAE,yBAAyB,CAAC,IAAI,CAAC;YACtC,OAAO,EAAE,aAAa,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC;YACzD,SAAS,EAAE,KAAK;SACjB,CAAC;IACJ,CAAC;IAED,MAAM,qBAAqB,EAAE,CAAC;IAC9B,IAAI,CAAC;QACH,wBAAwB;QACxB,MAAM,GAAG,GAAG,IAAI,aAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QACrC,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC;YAErC,gBAAgB;YAChB,MAAM,KAAK,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAEzE,qEAAqE;YACrE,MAAM,iBAAiB,GAAG,CAAC,CAAC,QAAQ,CAAC,aAAa,CAChD,iEAAiE,CAClE,CAAC;YAEF,8CAA8C;YAC9C,MAAM,QAAQ,GAAG,IAAI,kBAAe,CAAC;gBACnC,YAAY,EAAE,KAAK;gBACnB,cAAc,EAAE,QAAQ;aACzB,CAAC,CAAC;YACH,0CAA0C;YAC1C,QAAQ,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC;YAEjE,kFAAkF;YAClF,IAAI,iBAAiB,EAAE,CAAC;gBACtB,IAAI,CAAC;oBACH,MAAM,MAAM,GAAG,IAAI,yBAAW,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAa,CAAC,CAAC;oBACrE,oFAAoF;oBACpF,MAAM,OAAO,GAAG,MAAM,WAAW,CAC/B,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,EAC/B,gBAAgB,EAChB,qBAAqB,CACtB,CAAC;oBAEF,IAAI,OAAO,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;wBAC/B,8DAA8D;wBAC9D,IAAI,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;wBAClD,QAAQ,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;wBAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC,CAAC;wBACxD,OAAO;4BACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,KAAK;4BAC7B,OAAO,EAAE,SAAS;4BAClB,SAAS,EAAE,IAAI;yBAChB,CAAC;oBACJ,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,0DAA0D;gBAC5D,CAAC;YACH,CAAC;YAED,+EAA+E;YAC/E,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,EAAE,SAAS,IAAI,EAAE,CAAC;gBAChD,IAAI,QAAQ,EAAE,CAAC;oBACb,IAAI,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;oBAC3C,QAAQ,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;oBAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC,CAAC;oBACxD,OAAO;wBACL,KAAK;wBACL,OAAO,EAAE,SAAS;wBAClB,SAAS,EAAE,KAAK;qBACjB,CAAC;gBACJ,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,0CAA0C;YAC5C,CAAC;YAED,0DAA0D;YAC1D,MAAM,WAAW,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;YACxC,MAAM,SAAS,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC,CAAC;YAC3D,OAAO;gBACL,KAAK;gBACL,OAAO,EAAE,SAAS;gBAClB,SAAS,EAAE,KAAK;aACjB,CAAC;QACJ,CAAC;gBAAS,CAAC;YACT,GAAG,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QACrB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,6CAA6C;QAC7C,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;IACtD,CAAC;YAAS,CAAC;QACT,qBAAqB,EAAE,CAAC;IAC1B,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,wBAAwB,CAAC,QAAgB;IAChD,OAAO,QAAQ,CAAC,OAAO,CAAC,mBAAmB,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;QAC1D,uEAAuE;QACvE,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC;YAClC,IAAI,MAAM,CAAC,QAAQ,KAAK,GAAG,EAAE,CAAC;gBAC5B,OAAO,KAAK,GAAG,GAAG,CAAC;YACrB,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,+BAA+B;QACjC,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,IAAY;IACjC,OAAO,CACL,IAAI;QACF,iDAAiD;SAChD,OAAO,CAAC,qDAAqD,EAAE,EAAE,CAAC;SAClE,OAAO,CAAC,kDAAkD,EAAE,EAAE,CAAC;QAChE,6BAA6B;SAC5B,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;QACzB,+BAA+B;SAC9B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CACV,CAAC;AACJ,CAAC"}
@@ -1,9 +0,0 @@
1
- "use strict";
2
- /**
3
- * Content extraction utilities
4
- */
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.extractContent = void 0;
7
- var content_extraction_js_1 = require("./content-extraction.js");
8
- Object.defineProperty(exports, "extractContent", { enumerable: true, get: function () { return content_extraction_js_1.extractContent; } });
9
- //# sourceMappingURL=index.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/extraction/index.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,iEAAgF;AAAvE,uHAAA,cAAc,OAAA"}