@staticn0va/wigolo 0.6.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/README.md +48 -48
  2. package/SKILL.md +22 -22
  3. package/assets/skills/wigolo/rules/cache-first.md +1 -1
  4. package/assets/skills/wigolo/rules/synthesis.md +1 -1
  5. package/assets/skills/wigolo-fetch/SKILL.md +1 -1
  6. package/assets/skills/wigolo-find-similar/SKILL.md +2 -2
  7. package/assets/skills/wigolo-search/SKILL.md +3 -3
  8. package/dist/cache/store.d.ts +9 -1
  9. package/dist/cache/store.d.ts.map +1 -1
  10. package/dist/cache/store.js +30 -4
  11. package/dist/cache/store.js.map +1 -1
  12. package/dist/cli/doctor.d.ts +3 -3
  13. package/dist/cli/doctor.d.ts.map +1 -1
  14. package/dist/cli/doctor.js +67 -13
  15. package/dist/cli/doctor.js.map +1 -1
  16. package/dist/cli/health.js +1 -1
  17. package/dist/cli/health.js.map +1 -1
  18. package/dist/cli/status.js +1 -1
  19. package/dist/cli/status.js.map +1 -1
  20. package/dist/cli/tui/hooks/useInstall.js +2 -2
  21. package/dist/cli/tui/hooks/useInstall.js.map +1 -1
  22. package/dist/cli/tui/hooks/useVerify.js +3 -3
  23. package/dist/cli/tui/hooks/useVerify.js.map +1 -1
  24. package/dist/cli/tui/status-format.d.ts +1 -1
  25. package/dist/cli/tui/status-format.d.ts.map +1 -1
  26. package/dist/cli/tui/status-format.js +5 -5
  27. package/dist/cli/tui/status-format.js.map +1 -1
  28. package/dist/cli/tui/status-python.d.ts +1 -1
  29. package/dist/cli/tui/status-python.d.ts.map +1 -1
  30. package/dist/cli/tui/status-python.js +17 -1
  31. package/dist/cli/tui/status-python.js.map +1 -1
  32. package/dist/cli/tui/verify-suggestions.d.ts +1 -1
  33. package/dist/cli/tui/verify-suggestions.d.ts.map +1 -1
  34. package/dist/cli/tui/verify-suggestions.js +5 -5
  35. package/dist/cli/tui/verify-suggestions.js.map +1 -1
  36. package/dist/cli/tui/verify.d.ts +2 -2
  37. package/dist/cli/tui/verify.d.ts.map +1 -1
  38. package/dist/cli/tui/verify.js +34 -8
  39. package/dist/cli/tui/verify.js.map +1 -1
  40. package/dist/cli/uninstall.js +2 -2
  41. package/dist/cli/uninstall.js.map +1 -1
  42. package/dist/cli/warmup.d.ts.map +1 -1
  43. package/dist/cli/warmup.js +29 -25
  44. package/dist/cli/warmup.js.map +1 -1
  45. package/dist/config.d.ts +6 -1
  46. package/dist/config.d.ts.map +1 -1
  47. package/dist/config.js +15 -2
  48. package/dist/config.js.map +1 -1
  49. package/dist/crawl/dedup.d.ts +1 -0
  50. package/dist/crawl/dedup.d.ts.map +1 -1
  51. package/dist/crawl/dedup.js +47 -1
  52. package/dist/crawl/dedup.js.map +1 -1
  53. package/dist/extraction/boilerplate.d.ts +15 -0
  54. package/dist/extraction/boilerplate.d.ts.map +1 -0
  55. package/dist/extraction/boilerplate.js +49 -0
  56. package/dist/extraction/boilerplate.js.map +1 -0
  57. package/dist/extraction/defuddle.d.ts.map +1 -1
  58. package/dist/extraction/defuddle.js +7 -3
  59. package/dist/extraction/defuddle.js.map +1 -1
  60. package/dist/extraction/jsonld.js +1 -1
  61. package/dist/extraction/jsonld.js.map +1 -1
  62. package/dist/extraction/lang-hints.d.ts +2 -0
  63. package/dist/extraction/lang-hints.d.ts.map +1 -0
  64. package/dist/extraction/lang-hints.js +28 -0
  65. package/dist/extraction/lang-hints.js.map +1 -0
  66. package/dist/extraction/llm/anthropic.d.ts +3 -0
  67. package/dist/extraction/llm/anthropic.d.ts.map +1 -0
  68. package/dist/extraction/llm/anthropic.js +33 -0
  69. package/dist/extraction/llm/anthropic.js.map +1 -0
  70. package/dist/extraction/llm/cache.d.ts +5 -0
  71. package/dist/extraction/llm/cache.d.ts.map +1 -0
  72. package/dist/extraction/llm/cache.js +35 -0
  73. package/dist/extraction/llm/cache.js.map +1 -0
  74. package/dist/extraction/llm/gemini.d.ts +3 -0
  75. package/dist/extraction/llm/gemini.d.ts.map +1 -0
  76. package/dist/extraction/llm/gemini.js +35 -0
  77. package/dist/extraction/llm/gemini.js.map +1 -0
  78. package/dist/extraction/llm/groq.d.ts +3 -0
  79. package/dist/extraction/llm/groq.d.ts.map +1 -0
  80. package/dist/extraction/llm/groq.js +63 -0
  81. package/dist/extraction/llm/groq.js.map +1 -0
  82. package/dist/extraction/llm/hash.d.ts +3 -0
  83. package/dist/extraction/llm/hash.d.ts.map +1 -0
  84. package/dist/extraction/llm/hash.js +22 -0
  85. package/dist/extraction/llm/hash.js.map +1 -0
  86. package/dist/extraction/llm/openai.d.ts +3 -0
  87. package/dist/extraction/llm/openai.d.ts.map +1 -0
  88. package/dist/extraction/llm/openai.js +38 -0
  89. package/dist/extraction/llm/openai.js.map +1 -0
  90. package/dist/extraction/llm/select.d.ts +5 -0
  91. package/dist/extraction/llm/select.d.ts.map +1 -0
  92. package/dist/extraction/llm/select.js +27 -0
  93. package/dist/extraction/llm/select.js.map +1 -0
  94. package/dist/extraction/llm/types.d.ts +24 -0
  95. package/dist/extraction/llm/types.d.ts.map +1 -0
  96. package/dist/extraction/llm/types.js +2 -0
  97. package/dist/extraction/llm/types.js.map +1 -0
  98. package/dist/extraction/llm/validate.d.ts +6 -0
  99. package/dist/extraction/llm/validate.d.ts.map +1 -0
  100. package/dist/extraction/llm/validate.js +63 -0
  101. package/dist/extraction/llm/validate.js.map +1 -0
  102. package/dist/extraction/llm-fallback.d.ts +17 -0
  103. package/dist/extraction/llm-fallback.d.ts.map +1 -0
  104. package/dist/extraction/llm-fallback.js +129 -0
  105. package/dist/extraction/llm-fallback.js.map +1 -0
  106. package/dist/extraction/markdown.d.ts +9 -0
  107. package/dist/extraction/markdown.d.ts.map +1 -1
  108. package/dist/extraction/markdown.js +52 -3
  109. package/dist/extraction/markdown.js.map +1 -1
  110. package/dist/extraction/pipeline.d.ts.map +1 -1
  111. package/dist/extraction/pipeline.js +17 -5
  112. package/dist/extraction/pipeline.js.map +1 -1
  113. package/dist/extraction/readability.d.ts.map +1 -1
  114. package/dist/extraction/readability.js +2 -3
  115. package/dist/extraction/readability.js.map +1 -1
  116. package/dist/extraction/schema.d.ts +12 -0
  117. package/dist/extraction/schema.d.ts.map +1 -1
  118. package/dist/extraction/schema.js +81 -11
  119. package/dist/extraction/schema.js.map +1 -1
  120. package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
  121. package/dist/extraction/site-extractors/docs-generic.js +2 -3
  122. package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
  123. package/dist/extraction/site-extractors/github.d.ts.map +1 -1
  124. package/dist/extraction/site-extractors/github.js +4 -5
  125. package/dist/extraction/site-extractors/github.js.map +1 -1
  126. package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
  127. package/dist/extraction/site-extractors/mdn.js +2 -3
  128. package/dist/extraction/site-extractors/mdn.js.map +1 -1
  129. package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
  130. package/dist/extraction/site-extractors/stackoverflow.js +3 -4
  131. package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
  132. package/dist/extraction/structured-data.d.ts +4 -0
  133. package/dist/extraction/structured-data.d.ts.map +1 -0
  134. package/dist/extraction/structured-data.js +203 -0
  135. package/dist/extraction/structured-data.js.map +1 -0
  136. package/dist/fetch/router.d.ts +2 -1
  137. package/dist/fetch/router.d.ts.map +1 -1
  138. package/dist/fetch/router.js +19 -1
  139. package/dist/fetch/router.js.map +1 -1
  140. package/dist/instructions.d.ts +8 -8
  141. package/dist/instructions.d.ts.map +1 -1
  142. package/dist/instructions.js +48 -41
  143. package/dist/instructions.js.map +1 -1
  144. package/dist/logger.d.ts +1 -1
  145. package/dist/logger.d.ts.map +1 -1
  146. package/dist/research/brief.js +1 -1
  147. package/dist/research/brief.js.map +1 -1
  148. package/dist/search/evidence.d.ts +25 -0
  149. package/dist/search/evidence.d.ts.map +1 -0
  150. package/dist/search/evidence.js +260 -0
  151. package/dist/search/evidence.js.map +1 -0
  152. package/dist/search/highlights.d.ts +11 -2
  153. package/dist/search/highlights.d.ts.map +1 -1
  154. package/dist/search/highlights.js +131 -48
  155. package/dist/search/highlights.js.map +1 -1
  156. package/dist/search/multi-query.d.ts +1 -0
  157. package/dist/search/multi-query.d.ts.map +1 -1
  158. package/dist/search/multi-query.js +13 -0
  159. package/dist/search/multi-query.js.map +1 -1
  160. package/dist/search/rerank.d.ts +3 -2
  161. package/dist/search/rerank.d.ts.map +1 -1
  162. package/dist/search/rerank.js +16 -44
  163. package/dist/search/rerank.js.map +1 -1
  164. package/dist/search/reranker/download.d.ts +9 -0
  165. package/dist/search/reranker/download.d.ts.map +1 -0
  166. package/dist/search/reranker/download.js +77 -0
  167. package/dist/search/reranker/download.js.map +1 -0
  168. package/dist/search/reranker/models.d.ts +14 -0
  169. package/dist/search/reranker/models.d.ts.map +1 -0
  170. package/dist/search/reranker/models.js +37 -0
  171. package/dist/search/reranker/models.js.map +1 -0
  172. package/dist/search/reranker/onnx.d.ts +13 -0
  173. package/dist/search/reranker/onnx.d.ts.map +1 -0
  174. package/dist/search/reranker/onnx.js +70 -0
  175. package/dist/search/reranker/onnx.js.map +1 -0
  176. package/dist/search/reranker/recency-boost.d.ts +3 -0
  177. package/dist/search/reranker/recency-boost.d.ts.map +1 -0
  178. package/dist/search/reranker/recency-boost.js +12 -0
  179. package/dist/search/reranker/recency-boost.js.map +1 -0
  180. package/dist/search/reranker/recency.d.ts +3 -0
  181. package/dist/search/reranker/recency.d.ts.map +1 -0
  182. package/dist/search/reranker/recency.js +26 -0
  183. package/dist/search/reranker/recency.js.map +1 -0
  184. package/dist/search/reranker/tokenizer.d.ts +30 -0
  185. package/dist/search/reranker/tokenizer.d.ts.map +1 -0
  186. package/dist/search/reranker/tokenizer.js +49 -0
  187. package/dist/search/reranker/tokenizer.js.map +1 -0
  188. package/dist/search/tokens.d.ts +3 -0
  189. package/dist/search/tokens.d.ts.map +1 -0
  190. package/dist/search/tokens.js +38 -0
  191. package/dist/search/tokens.js.map +1 -0
  192. package/dist/search/truncate.d.ts +4 -0
  193. package/dist/search/truncate.d.ts.map +1 -1
  194. package/dist/search/truncate.js +13 -0
  195. package/dist/search/truncate.js.map +1 -1
  196. package/dist/server/backend-status.js +2 -2
  197. package/dist/server/backend-status.js.map +1 -1
  198. package/dist/server/tool-schemas.d.ts +503 -0
  199. package/dist/server/tool-schemas.d.ts.map +1 -0
  200. package/dist/server/tool-schemas.js +425 -0
  201. package/dist/server/tool-schemas.js.map +1 -0
  202. package/dist/server.d.ts.map +1 -1
  203. package/dist/server.js +14 -339
  204. package/dist/server.js.map +1 -1
  205. package/dist/tools/agent.d.ts.map +1 -1
  206. package/dist/tools/agent.js +36 -0
  207. package/dist/tools/agent.js.map +1 -1
  208. package/dist/tools/crawl.d.ts.map +1 -1
  209. package/dist/tools/crawl.js +37 -2
  210. package/dist/tools/crawl.js.map +1 -1
  211. package/dist/tools/extract.d.ts.map +1 -1
  212. package/dist/tools/extract.js +19 -3
  213. package/dist/tools/extract.js.map +1 -1
  214. package/dist/tools/fetch.d.ts.map +1 -1
  215. package/dist/tools/fetch.js +44 -7
  216. package/dist/tools/fetch.js.map +1 -1
  217. package/dist/tools/find-similar.d.ts.map +1 -1
  218. package/dist/tools/find-similar.js +32 -1
  219. package/dist/tools/find-similar.js.map +1 -1
  220. package/dist/tools/research.d.ts.map +1 -1
  221. package/dist/tools/research.js +34 -1
  222. package/dist/tools/research.js.map +1 -1
  223. package/dist/tools/search.d.ts.map +1 -1
  224. package/dist/tools/search.js +98 -54
  225. package/dist/tools/search.js.map +1 -1
  226. package/dist/types.d.ts +65 -1
  227. package/dist/types.d.ts.map +1 -1
  228. package/dist/types.js +1 -1
  229. package/dist/types.js.map +1 -1
  230. package/dist/util/mode.d.ts +4 -0
  231. package/dist/util/mode.d.ts.map +1 -0
  232. package/dist/util/mode.js +13 -0
  233. package/dist/util/mode.js.map +1 -0
  234. package/package.json +10 -4
  235. package/dist/search/flashrank.d.ts +0 -12
  236. package/dist/search/flashrank.d.ts.map +0 -1
  237. package/dist/search/flashrank.js +0 -64
  238. package/dist/search/flashrank.js.map +0 -1
@@ -1,5 +1,21 @@
1
1
  import TurndownService from 'turndown';
2
- function buildTurndown() {
2
+ import { detectCodeLanguage } from './lang-hints.js';
3
+ function longestBacktickRun(s) {
4
+ let max = 0;
5
+ let cur = 0;
6
+ for (let i = 0; i < s.length; i++) {
7
+ if (s.charCodeAt(i) === 96) {
8
+ cur++;
9
+ if (cur > max)
10
+ max = cur;
11
+ }
12
+ else {
13
+ cur = 0;
14
+ }
15
+ }
16
+ return max;
17
+ }
18
+ export function buildTurndown() {
3
19
  const td = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
4
20
  // Remove script and style tags entirely
5
21
  td.remove(['script', 'style']);
@@ -35,6 +51,20 @@ function buildTurndown() {
35
51
  return content;
36
52
  },
37
53
  });
54
+ td.addRule('codeBlockLang', {
55
+ filter(node) {
56
+ return node.nodeName === 'PRE' && node.querySelector('code') !== null;
57
+ },
58
+ replacement(_content, node) {
59
+ const pre = node;
60
+ const code = pre.querySelector('code');
61
+ const cls = code?.getAttribute('class') ?? pre.getAttribute('class') ?? '';
62
+ const lang = detectCodeLanguage(cls);
63
+ const body = code?.textContent ?? pre.textContent ?? '';
64
+ const fence = '`'.repeat(Math.max(3, longestBacktickRun(body) + 1));
65
+ return `\n\n${fence}${lang ?? ''}\n${body.replace(/\n+$/, '')}\n${fence}\n\n`;
66
+ },
67
+ });
38
68
  return td;
39
69
  }
40
70
  const turndown = buildTurndown();
@@ -43,7 +73,7 @@ export function htmlToMarkdown(html) {
43
73
  return '';
44
74
  return turndown.turndown(html);
45
75
  }
46
- function parseHeadings(lines) {
76
+ export function parseHeadings(lines) {
47
77
  const headings = [];
48
78
  for (let i = 0; i < lines.length; i++) {
49
79
  const match = lines[i].match(/^(#{1,6})\s+(.+)/);
@@ -53,6 +83,17 @@ function parseHeadings(lines) {
53
83
  }
54
84
  return headings;
55
85
  }
86
+ // Prefix-sum array of char offsets: offsets[i] is the index in
87
+ // `lines.join('\n')` at which lines[i] begins.
88
+ export function lineStartCharOffsets(lines) {
89
+ const offsets = new Array(lines.length);
90
+ let acc = 0;
91
+ for (let i = 0; i < lines.length; i++) {
92
+ offsets[i] = acc;
93
+ acc += lines[i].length + 1; // +1 for the '\n' separator
94
+ }
95
+ return offsets;
96
+ }
56
97
  function extractFromHeading(lines, headings, headingIdx) {
57
98
  const heading = headings[headingIdx];
58
99
  const start = heading.lineIndex;
@@ -151,8 +192,16 @@ export function resolveRelativeUrls(markdown, baseUrl) {
151
192
  const trimmed = path.trim();
152
193
  if (!trimmed)
153
194
  return path;
154
- if (/^(?:https?:|mailto:|tel:|javascript:|data:|#)/i.test(trimmed))
195
+ if (/^(?:https?:|mailto:|tel:|javascript:|data:)/i.test(trimmed))
155
196
  return path;
197
+ if (trimmed.startsWith('#')) {
198
+ try {
199
+ return new URL(trimmed, baseUrl).href;
200
+ }
201
+ catch {
202
+ return path;
203
+ }
204
+ }
156
205
  if (trimmed.startsWith('//')) {
157
206
  try {
158
207
  const base = new URL(baseUrl);
@@ -1 +1 @@
1
- {"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,SAAS,aAAa;IACpB,MAAM,EAAE,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;IAElF,wCAAwC;IACxC,EAAE,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;IAE/B,iDAAiD;IACjD,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE;QAClB,MAAM,EAAE,OAAO;QACf,WAAW,CAAC,QAAQ,EAAE,IAAI;YACxB,MAAM,EAAE,GAAG,IAAe,CAAC;YAC3B,MAAM,IAAI,GAAc,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAEjC,MAAM,SAAS,GAAG,CAAC,GAAY,EAAU,EAAE;gBACzC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACzD,OAAO,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YACnG,CAAC,CAAC;YAEF,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,WAAW,GAAG,SAAS,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;YAChE,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrE,MAAM,SAAS,GAAG,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YAEzE,IAAI,WAAW,EAAE,CAAC;gBAChB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC/B,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC5E,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;YAC5C,CAAC;YAED,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;YACjF,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;QAC5C,CAAC;KACF,CAAC,CAAC;IAEH,qFAAqF;IACrF,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE;QACtB,MAAM,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;QACrD,WAAW,CAAC,OAAO;YACjB,OAAO,OAAO,CAAC;QACjB,CAAC;KACF,CAAC,CAAC;IAEH,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;AAEjC,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAQD,SAAS,aAAa,CAAC,KAAe;IACpC,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACjD,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAe,EAAE,QAAmB,EAAE,UAAkB;IAClF,MAAM,OAAO,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC;IAEhC,0EAA0E;IAC1E,IAAI,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,KAAK,IAAI,CAAC,GAAG,UAAU,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtD,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACvC,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC5B,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,QAAgB,EAChB,OAAe,EACf,YAAY,GAAG,CAAC;IAEhB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAExE,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IAEnD,8BAA8B;IAC9B,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,KAAK,CAAC,CAAC;IAE/E,yDAAyD;IACzD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC;QAClE,MAAM,EAAE,CAAC,EAAE,GAAG,YAAY,CAAC,YAAY,CAAC,CAAC;QACzC,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IAC5E,CAAC;IAED,4EAA4E;IAC5E,MAAM,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAEzF,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,IAAI,gBAAgB,CAAC,MAAM,EAAE,CAAC;QAC7E,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAC/C,CAAC;IAED,MAAM,EAAE,CAAC,EAAE,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAC7C,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,QAAgB;IACpD,MAAM,YAAY,GAAG,yBAAyB,CAAC;IAC/C,MAAM,WAAW,GAAG,8BAA8B,CAAC;IAEnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;IACjC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,IAAI,KAA6B,CAAC;IAElC,uBAAuB;IACvB,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACtD,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,4BAA4B;IAC5B,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,sBAAsB,GAAG;IAC7B,QAAQ;IACR,MAAM;IACN,MAAM;IACN,OAAO;IACP,QAAQ;IACR,UAAU;IACV,OAAO;IACP,QAAQ;IACR,OAAO;IACP,SAAS;CACV,CAAC;AAEF,yEAAyE;AACzE,0EAA0E;AAC1E,wEAAwE;AACxE,MAAM,UAAU,sBAAsB,CAAC,QAAgB;IACrD,IAAI,CAAC,QAAQ;QAAE,OAAO,QAAQ,CAAC;IAC/B,OAAO,QAAQ,CAAC,OAAO,CAAC,2BAA2B,EAAE,CAAC,KAAK,EAAE,GAAW,EAAE,GAAW,EAAE,EAAE;QACvF,MAAM,UAAU,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;QAEnC,iDAAiD;QACjD,IAAI,QAAQ,CAAC,UAAU,CAAC,wBAAwB,CAAC;YAAE,OAAO,EAAE,CAAC;QAE7D,oEAAoE;QACpE,IAAI,QAAQ,CAAC,UAAU,CAAC,oBAAoB,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,EAAE,CAAC;QAE7E,+CAA+C;QAC/C,KAAK,MAAM,MAAM,IAAI,sBAAsB,EAAE,CAAC;YAC5C,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAAE,OAAO,EAAE,CAAC;QAC3C,CAAC;QAED,sCAAsC;QACtC,IAAI,CAAC,UAAU;YAAE,OAAO,EAAE,CAAC;QAE3B,OAAO,KAAK,CAAC;IACf,CAAC,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,8EAA8E;AAC9E,MAAM,UAAU,mBAAmB,CAAC,QAAgB,EAAE,OAAe;IACnE,IAAI,CAAC,QAAQ,IAAI,CAAC,OAAO;QAAE,OAAO,QAAQ,CAAC;IAE3C,MAAM,OAAO,GAAG,CAAC,IAAY,EAAU,EAAE;QACvC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAC1B,IAAI,gDAAgD,CAAC,IAAI,CAAC,OAAO,CAAC;YAAE,OAAO,IAAI,CAAC;QAChF,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC9B,OAAO,GAAG,IAAI,CAAC,QAAQ,GAAG,OAAO,EAAE,CAAC;YACtC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QACD,IAAI,CAAC;YACH,OAAO,IAAI,GAAG,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;QACxC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC,CAAC;IAEF,0EAA0E;IAC1E,IAAI,MAAM,GAAG,QAAQ,CAAC,OAAO,CAC3B,8CAA8C,EAC9C,CAAC,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,EAAE,CAC7D,CAAC;IAEF,MAAM,GAAG,MAAM,CAAC,OAAO,CACrB,qDAAqD,EACrD,CAAC,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,EAAE,CACxE,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC"}
1
+ {"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AAErD,SAAS,kBAAkB,CAAC,CAAS;IACnC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,IAAI,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC;YAC3B,GAAG,EAAE,CAAC;YACN,IAAI,GAAG,GAAG,GAAG;gBAAE,GAAG,GAAG,GAAG,CAAC;QAC3B,CAAC;aAAM,CAAC;YACN,GAAG,GAAG,CAAC,CAAC;QACV,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,aAAa;IAC3B,MAAM,EAAE,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;IAElF,wCAAwC;IACxC,EAAE,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;IAE/B,iDAAiD;IACjD,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE;QAClB,MAAM,EAAE,OAAO;QACf,WAAW,CAAC,QAAQ,EAAE,IAAI;YACxB,MAAM,EAAE,GAAG,IAAe,CAAC;YAC3B,MAAM,IAAI,GAAc,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAEjC,MAAM,SAAS,GAAG,CAAC,GAAY,EAAU,EAAE;gBACzC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACzD,OAAO,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YACnG,CAAC,CAAC;YAEF,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,WAAW,GAAG,SAAS,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;YAChE,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrE,MAAM,SAAS,GAAG,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YAEzE,IAAI,WAAW,EAAE,CAAC;gBAChB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC/B,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC5E,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;YAC5C,CAAC;YAED,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;YACjF,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;QAC5C,CAAC;KACF,CAAC,CAAC;IAEH,qFAAqF;IACrF,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE;QACtB,MAAM,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;QACrD,WAAW,CAAC,OAAO;YACjB,OAAO,OAAO,CAAC;QACjB,CAAC;KACF,CAAC,CAAC;IAEH,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE;QAC1B,MAAM,CAAC,IAAI;YACT,OAAO,IAAI,CAAC,QAAQ,KAAK,KAAK,IAAK,IAAgB,CAAC,aAAa,CAAC,MAAM,CAAC,KAAK,IAAI,CAAC;QACrF,CAAC;QACD,WAAW,CAAC,QAAQ,EAAE,IAAI;YACxB,MAAM,GAAG,GAAG,IAAe,CAAC;YAC5B,MAAM,IAAI,GAAG,GAAG,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YACvC,MAAM,GAAG,GAAG,IAAI,EAAE,YAAY,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YAC3E,MAAM,IAAI,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAC;YACrC,MAAM,IAAI,GAAG,IAAI,EAAE,WAAW,IAAI,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC;YACxD,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,kBAAkB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACpE,OAAO,OAAO,KAAK,GAAG,IAAI,IAAI,EAAE,KAAK,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,KAAK,KAAK,MAAM,CAAC;QAChF,CAAC;KACF,CAAC,CAAC;IAEH,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;AAEjC,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAQD,MAAM,UAAU,aAAa,CAAC,KAAe;IAC3C,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACjD,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,+DAA+D;AAC/D,+CAA+C;AAC/C,MAAM,UAAU,oBAAoB,CAAC,KAAe;IAClD,MAAM,OAAO,GAAG,IAAI,KAAK,CAAS,KAAK,CAAC,MAAM,CAAC,CAAC;IAChD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,OAAO,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;QACjB,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,4BAA4B;IAC1D,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAe,EAAE,QAAmB,EAAE,UAAkB;IAClF,MAAM,OAAO,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC;IAEhC,0EAA0E;IAC1E,IAAI,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,KAAK,IAAI,CAAC,GAAG,UAAU,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtD,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACvC,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC5B,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,QAAgB,EAChB,OAAe,EACf,YAAY,GAAG,CAAC;IAEhB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAExE,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IAEnD,8BAA8B;IAC9B,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,KAAK,CAAC,CAAC;IAE/E,yDAAyD;IACzD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC;QAClE,MAAM,EAAE,CAAC,EAAE,GAAG,YAAY,CAAC,YAAY,CAAC,CAAC;QACzC,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IAC5E,CAAC;IAED,4EAA4E;IAC5E,MAAM,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAEzF,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,IAAI,gBAAgB,CAAC,MAAM,EAAE,CAAC;QAC7E,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAC/C,CAAC;IAED,MAAM,EAAE,CAAC,EAAE,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAC7C,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,QAAgB;IACpD,MAAM,YAAY,GAAG,yBAAyB,CAAC;IAC/C,MAAM,WAAW,GAAG,8BAA8B,CAAC;IAEnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;IACjC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,IAAI,KAA6B,CAAC;IAElC,uBAAuB;IACvB,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACtD,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,4BAA4B;IAC5B,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,sBAAsB,GAAG;IAC7B,QAAQ;IACR,MAAM;IACN,MAAM;IACN,OAAO;IACP,QAAQ;IACR,UAAU;IACV,OAAO;IACP,QAAQ;IACR,OAAO;IACP,SAAS;CACV,CAAC;AAEF,yEAAyE;AACzE,0EAA0E;AAC1E,wEAAwE;AACxE,MAAM,UAAU,sBAAsB,CAAC,QAAgB;IACrD,IAAI,CAAC,QAAQ;QAAE,OAAO,QAAQ,CAAC;IAC/B,OAAO,QAAQ,CAAC,OAAO,CAAC,2BAA2B,EAAE,CAAC,KAAK,EAAE,GAAW,EAAE,GAAW,EAAE,EAAE;QACvF,MAAM,UAAU,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;QAEnC,iDAAiD;QACjD,IAAI,QAAQ,CAAC,UAAU,CAAC,wBAAwB,CAAC;YAAE,OAAO,EAAE,CAAC;QAE7D,oEAAoE;QACpE,IAAI,QAAQ,CAAC,UAAU,CAAC,oBAAoB,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,EAAE,CAAC;QAE7E,+CAA+C;QAC/C,KAAK,MAAM,MAAM,IAAI,sBAAsB,EAAE,CAAC;YAC5C,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAAE,OAAO,EAAE,CAAC;QAC3C,CAAC;QAED,sCAAsC;QACtC,IAAI,CAAC,UAAU;YAAE,OAAO,EAAE,CAAC;QAE3B,OAAO,KAAK,CAAC;IACf,CAAC,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,8EAA8E;AAC9E,MAAM,UAAU,mBAAmB,CAAC,QAAgB,EAAE,OAAe;IACnE,IAAI,CAAC,QAAQ,IAAI,CAAC,OAAO;QAAE,OAAO,QAAQ,CAAC;IAE3C,MAAM,OAAO,GAAG,CAAC,IAAY,EAAU,EAAE;QACvC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAC1B,IAAI,8CAA8C,CAAC,IAAI,CAAC,OAAO,CAAC;YAAE,OAAO,IAAI,CAAC;QAC9E,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC5B,IAAI,CAAC;gBACH,OAAO,IAAI,GAAG,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YACxC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QACD,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC9B,OAAO,GAAG,IAAI,CAAC,QAAQ,GAAG,OAAO,EAAE,CAAC;YACtC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QACD,IAAI,CAAC;YACH,OAAO,IAAI,GAAG,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;QACxC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC,CAAC;IAEF,0EAA0E;IAC1E,IAAI,MAAM,GAAG,QAAQ,CAAC,OAAO,CAC3B,8CAA8C,EAC9C,CAAC,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,EAAE,CAC7D,CAAC;IAEF,MAAM,GAAG,MAAM,CAAC,OAAO,CACrB,qDAAqD,EACrD,CAAC,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,EAAE,CACxE,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,gBAAgB,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAU/D,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AASD,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,SAAS,GAAG,IAAI,CAE5D;AAED,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,gBAAgB,CAAC,CAmE3B"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAaA,OAAO,KAAK,EAAE,gBAAgB,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAU/D,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AASD,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,SAAS,GAAG,IAAI,CAE5D;AAED,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,gBAAgB,CAAC,CA4E3B"}
@@ -1,8 +1,10 @@
1
+ import { parseHTML } from 'linkedom';
1
2
  import { defuddleExtract } from './defuddle.js';
2
3
  import { readabilityExtract } from './readability.js';
3
4
  import { trafilaturaExtract, isTrafilaturaAvailable } from './trafilatura.js';
4
5
  import { htmlToMarkdown, extractSection, extractLinksAndImages, filterDecorativeImages, resolveRelativeUrls, } from './markdown.js';
5
6
  import { extractMetadata } from './extract.js';
7
+ import { stripBoilerplateDom, stripBoilerplateMarkdown } from './boilerplate.js';
6
8
  import { githubExtractor } from './site-extractors/github.js';
7
9
  import { stackoverflowExtractor } from './site-extractors/stackoverflow.js';
8
10
  import { mdnExtractor } from './site-extractors/mdn.js';
@@ -43,21 +45,30 @@ export async function extractContent(html, url, options = {}) {
43
45
  };
44
46
  return applyPostProcessing(result, url, html, options);
45
47
  }
48
+ let cleanedHtml = html;
49
+ try {
50
+ const { document } = parseHTML(html);
51
+ stripBoilerplateDom(document);
52
+ cleanedHtml = document.toString();
53
+ }
54
+ catch (err) {
55
+ log.warn('boilerplate DOM pre-pass failed', { url, error: String(err) });
56
+ }
46
57
  const siteExtractor = siteExtractors.find((e) => e.canHandle(url, html));
47
58
  if (siteExtractor) {
48
- const extracted = siteExtractor.extract(html, url);
59
+ const extracted = siteExtractor.extract(cleanedHtml, url);
49
60
  if (extracted) {
50
61
  result = extracted;
51
62
  return applyPostProcessing(result, url, html, options);
52
63
  }
53
64
  }
54
- result = await defuddleExtract(html, url);
65
+ result = await defuddleExtract(cleanedHtml, url);
55
66
  if (!result) {
56
67
  const config = getConfig();
57
68
  if (config.trafilatura !== 'never') {
58
69
  const trafAvailable = await isTrafilaturaAvailable();
59
70
  if (trafAvailable) {
60
- result = await trafilaturaExtract(html, url);
71
+ result = await trafilaturaExtract(cleanedHtml, url);
61
72
  if (result) {
62
73
  log.info('Trafilatura extraction succeeded', { url, chars: result.markdown.length });
63
74
  return applyPostProcessing(result, url, html, options);
@@ -66,10 +77,10 @@ export async function extractContent(html, url, options = {}) {
66
77
  }
67
78
  }
68
79
  if (!result) {
69
- result = readabilityExtract(html, url);
80
+ result = readabilityExtract(cleanedHtml, url);
70
81
  }
71
82
  if (!result) {
72
- const markdown = htmlToMarkdown(html);
83
+ const markdown = htmlToMarkdown(cleanedHtml);
73
84
  result = {
74
85
  title: '',
75
86
  markdown,
@@ -105,6 +116,7 @@ function applyPostProcessing(result, url, html, options) {
105
116
  let markdown = result.markdown;
106
117
  // Resolve relative links/images before slicing so downstream consumers get absolute URLs.
107
118
  markdown = resolveRelativeUrls(markdown, url);
119
+ markdown = stripBoilerplateMarkdown(markdown);
108
120
  markdown = filterDecorativeImages(markdown);
109
121
  if (options.section) {
110
122
  const { content } = extractSection(markdown, options.section, options.sectionIndex ?? 0);
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,MAAM,kBAAkB,CAAC;AAC9E,OAAO,EACL,cAAc,EACd,cAAc,EACd,qBAAqB,EACrB,sBAAsB,EACtB,mBAAmB,GACpB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAE/C,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAC9D,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAC5E,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACzE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEzC,MAAM,GAAG,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC;AAUpC,MAAM,cAAc,GAAgB;IAClC,eAAe;IACf,sBAAsB;IACtB,YAAY;IACZ,oBAAoB;CACrB,CAAC;AAEF,MAAM,UAAU,iBAAiB,CAAC,SAAoB;IACpD,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,IAAY,EACZ,GAAW,EACX,UAA6B,EAAE;IAE/B,IAAI,MAAM,GAA4B,IAAI,CAAC;IAE3C,IAAI,OAAO,CAAC,WAAW,KAAK,iBAAiB,EAAE,CAAC;QAC9C,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,CAAC,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC;gBACrD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;gBACjD,OAAO,GAAG,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAC9B,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,GAAG,CAAC,IAAI,CAAC,kBAAkB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC5D,CAAC;QACH,CAAC;QACD,MAAM,GAAG;YACP,KAAK,EAAE,EAAE;YACT,QAAQ,EAAE,OAAO;YACjB,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,UAAU;SACtB,CAAC;QACF,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IACzD,CAAC;IAED,MAAM,aAAa,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC;IACzE,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,SAAS,GAAG,aAAa,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACnD,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,GAAG,SAAS,CAAC;YACnB,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;IAED,MAAM,GAAG,MAAM,eAAe,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAE1C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,IAAI,MAAM,CAAC,WAAW,KAAK,OAAO,EAAE,CAAC;YACnC,MAAM,aAAa,GAAG,MAAM,sBAAsB,EAAE,CAAC;YACrD,IAAI,aAAa,EAAE,CAAC;gBAClB,MAAM,GAAG,MAAM,kBAAkB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;gBAC7C,IAAI,MAAM,EAAE,CAAC;oBACX,GAAG,CAAC,IAAI,CAAC,kCAAkC,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;oBACrF,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;gBACzD,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,GAAG,kBAAkB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,GAAG;YACP,KAAK,EAAE,EAAE;YACT,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,UAAU;SACtB,CAAC;IACJ,CAAC;IAED,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;AACzD,CAAC;AAED,SAAS,aAAa,CACpB,IAAkC,EAClC,IAAY;IAEZ,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,OAAO;YACL,GAAG,IAAI;YACP,oFAAoF;YACpF,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW;YACjD,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM;YAClC,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI;YAC5B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;YACxC,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;YACrC,aAAa,EAAE,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,aAAa;YACvD,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;SACzC,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,mBAAmB,CAC1B,MAAwB,EACxB,GAAW,EACX,IAAY,EACZ,OAA0B;IAE1B,IAAI,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IAE/B,0FAA0F;IAC1F,QAAQ,GAAG,mBAAmB,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC9C,QAAQ,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IAE5C,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,EAAE,OAAO,EAAE,GAAG,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,OAAO,EAAE,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC;QACzF,QAAQ,GAAG,OAAO,CAAC;IACrB,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAC1D,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IAEtD,IAAI,OAAO,CAAC,QAAQ,IAAI,QAAQ,CAAC,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;QAC3D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACjD,CAAC;IAED,OAAO,EAAE,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;AAC1D,CAAC"}
1
+ {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,MAAM,kBAAkB,CAAC;AAC9E,OAAO,EACL,cAAc,EACd,cAAc,EACd,qBAAqB,EACrB,sBAAsB,EACtB,mBAAmB,GACpB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC/C,OAAO,EAAE,mBAAmB,EAAE,wBAAwB,EAAE,MAAM,kBAAkB,CAAC;AAEjF,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAC9D,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAC5E,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACzE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEzC,MAAM,GAAG,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC;AAUpC,MAAM,cAAc,GAAgB;IAClC,eAAe;IACf,sBAAsB;IACtB,YAAY;IACZ,oBAAoB;CACrB,CAAC;AAEF,MAAM,UAAU,iBAAiB,CAAC,SAAoB;IACpD,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,IAAY,EACZ,GAAW,EACX,UAA6B,EAAE;IAE/B,IAAI,MAAM,GAA4B,IAAI,CAAC;IAE3C,IAAI,OAAO,CAAC,WAAW,KAAK,iBAAiB,EAAE,CAAC;QAC9C,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,CAAC,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC;gBACrD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;gBACjD,OAAO,GAAG,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAC9B,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,GAAG,CAAC,IAAI,CAAC,kBAAkB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC5D,CAAC;QACH,CAAC;QACD,MAAM,GAAG;YACP,KAAK,EAAE,EAAE;YACT,QAAQ,EAAE,OAAO;YACjB,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,UAAU;SACtB,CAAC;QACF,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IACzD,CAAC;IAED,IAAI,WAAW,GAAG,IAAI,CAAC;IACvB,IAAI,CAAC;QACH,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAC9B,WAAW,GAAG,QAAQ,CAAC,QAAQ,EAAE,CAAC;IACpC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,IAAI,CAAC,iCAAiC,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAC3E,CAAC;IAED,MAAM,aAAa,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC;IACzE,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,SAAS,GAAG,aAAa,CAAC,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;QAC1D,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,GAAG,SAAS,CAAC;YACnB,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;IAED,MAAM,GAAG,MAAM,eAAe,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;IAEjD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,IAAI,MAAM,CAAC,WAAW,KAAK,OAAO,EAAE,CAAC;YACnC,MAAM,aAAa,GAAG,MAAM,sBAAsB,EAAE,CAAC;YACrD,IAAI,aAAa,EAAE,CAAC;gBAClB,MAAM,GAAG,MAAM,kBAAkB,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;gBACpD,IAAI,MAAM,EAAE,CAAC;oBACX,GAAG,CAAC,IAAI,CAAC,kCAAkC,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;oBACrF,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;gBACzD,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,GAAG,kBAAkB,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;IAChD,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,QAAQ,GAAG,cAAc,CAAC,WAAW,CAAC,CAAC;QAC7C,MAAM,GAAG;YACP,KAAK,EAAE,EAAE;YACT,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,UAAU;SACtB,CAAC;IACJ,CAAC;IAED,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;AACzD,CAAC;AAED,SAAS,aAAa,CACpB,IAAkC,EAClC,IAAY;IAEZ,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,OAAO;YACL,GAAG,IAAI;YACP,oFAAoF;YACpF,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW;YACjD,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM;YAClC,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI;YAC5B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;YACxC,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;YACrC,aAAa,EAAE,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,aAAa;YACvD,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;SACzC,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,mBAAmB,CAC1B,MAAwB,EACxB,GAAW,EACX,IAAY,EACZ,OAA0B;IAE1B,IAAI,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IAE/B,0FAA0F;IAC1F,QAAQ,GAAG,mBAAmB,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC9C,QAAQ,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;IAC9C,QAAQ,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IAE5C,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,EAAE,OAAO,EAAE,GAAG,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,OAAO,EAAE,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC;QACzF,QAAQ,GAAG,OAAO,CAAC;IACrB,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAC1D,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IAEtD,IAAI,OAAO,CAAC,QAAQ,IAAI,QAAQ,CAAC,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;QAC3D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACjD,CAAC;IAED,OAAO,EAAE,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;AAC1D,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"readability.d.ts","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIpD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI,CA0BtF"}
1
+ {"version":3,"file":"readability.d.ts","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIpD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI,CAyBtF"}
@@ -1,6 +1,6 @@
1
1
  import { Readability } from '@mozilla/readability';
2
2
  import { parseHTML } from 'linkedom';
3
- import TurndownService from 'turndown';
3
+ import { htmlToMarkdown } from './markdown.js';
4
4
  const MIN_CONTENT_THRESHOLD = 100;
5
5
  export function readabilityExtract(html, _url) {
6
6
  try {
@@ -9,8 +9,7 @@ export function readabilityExtract(html, _url) {
9
9
  const article = reader.parse();
10
10
  if (!article || !article.content)
11
11
  return null;
12
- const turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
13
- const markdown = turndown.turndown(article.content);
12
+ const markdown = htmlToMarkdown(article.content);
14
13
  if (markdown.length < MIN_CONTENT_THRESHOLD)
15
14
  return null;
16
15
  return {
@@ -1 +1 @@
1
- {"version":3,"file":"readability.js","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC,MAAM,UAAU,kBAAkB,CAAC,IAAY,EAAE,IAAY;IAC3D,IAAI,CAAC;QACH,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAe,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAC/B,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE9C,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;QACxF,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAEpD,IAAI,QAAQ,CAAC,MAAM,GAAG,qBAAqB;YAAE,OAAO,IAAI,CAAC;QAEzD,OAAO;YACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE;YAC1B,QAAQ;YACR,QAAQ,EAAE;gBACR,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;gBACnC,QAAQ,EAAE,OAAO,CAAC,IAAI,IAAI,SAAS;aACpC;YACD,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,aAAa;SACzB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"readability.js","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAG/C,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC,MAAM,UAAU,kBAAkB,CAAC,IAAY,EAAE,IAAY;IAC3D,IAAI,CAAC;QACH,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAe,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAC/B,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE9C,MAAM,QAAQ,GAAG,cAAc,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAEjD,IAAI,QAAQ,CAAC,MAAM,GAAG,qBAAqB;YAAE,OAAO,IAAI,CAAC;QAEzD,OAAO;YACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE;YAC1B,QAAQ;YACR,QAAQ,EAAE;gBACR,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;gBACnC,QAAQ,EAAE,OAAO,CAAC,IAAI,IAAI,SAAS;aACpC;YACD,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,aAAa;SACzB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -1,7 +1,19 @@
1
+ import { type LLMFallbackBudget } from './llm-fallback.js';
2
+ import type { SchemaExtractionResult } from '../types.js';
1
3
  export interface JsonSchema {
2
4
  type?: string;
3
5
  properties?: Record<string, JsonSchema>;
4
6
  items?: JsonSchema;
7
+ required?: string[];
8
+ }
9
+ export interface SchemaExtractionOpts {
10
+ signal?: AbortSignal;
11
+ budget?: LLMFallbackBudget;
5
12
  }
6
13
  export declare function extractWithSchema(html: string, schema: JsonSchema): Record<string, unknown>;
14
+ export declare function extractWithSchemaDetailed(html: string, schema: JsonSchema): SchemaExtractionResult;
15
+ export interface SchemaExtractionAsyncResult extends SchemaExtractionResult {
16
+ warnings: string[];
17
+ }
18
+ export declare function extractWithSchemaDetailedAsync(html: string, schema: JsonSchema, opts?: SchemaExtractionOpts): Promise<SchemaExtractionAsyncResult>;
7
19
  //# sourceMappingURL=schema.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../src/extraction/schema.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,UAAU;IACzB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IACxC,KAAK,CAAC,EAAE,UAAU,CAAC;CACpB;AAED,wBAAgB,iBAAiB,CAC/B,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,UAAU,GACjB,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAmBzB"}
1
+ {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../src/extraction/schema.ts"],"names":[],"mappings":"AAEA,OAAO,EAAkB,KAAK,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAC3E,OAAO,KAAK,EAEV,sBAAsB,EAEvB,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,UAAU;IACzB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IACxC,KAAK,CAAC,EAAE,UAAU,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,oBAAoB;IACnC,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,MAAM,CAAC,EAAE,iBAAiB,CAAC;CAC5B;AAQD,wBAAgB,iBAAiB,CAC/B,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,UAAU,GACjB,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAEzB;AAED,wBAAgB,yBAAyB,CACvC,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,UAAU,GACjB,sBAAsB,CAsCxB;AAED,MAAM,WAAW,2BAA4B,SAAQ,sBAAsB;IACzE,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,wBAAsB,8BAA8B,CAClD,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,UAAU,EAClB,IAAI,GAAE,oBAAyB,GAC9B,OAAO,CAAC,2BAA2B,CAAC,CA+BtC"}
@@ -1,22 +1,93 @@
1
1
  import { parseHTML } from 'linkedom';
2
- import { extractJsonLd, matchJsonLdToSchema } from './jsonld.js';
2
+ import { extractStructuredData } from './structured-data.js';
3
+ import { extractWithLLM } from './llm-fallback.js';
4
+ const PROVENANCE_PRIORITY = [
5
+ 'json-ld',
6
+ 'microdata',
7
+ 'rdfa',
8
+ ];
3
9
  export function extractWithSchema(html, schema) {
10
+ return extractWithSchemaDetailed(html, schema).values;
11
+ }
12
+ export function extractWithSchemaDetailed(html, schema) {
13
+ const values = {};
14
+ const provenance = {};
4
15
  if (!html || !schema.properties)
5
- return {};
6
- const jsonLdBlocks = extractJsonLd(html);
7
- const jsonLdResult = matchJsonLdToSchema(jsonLdBlocks, schema);
16
+ return { values, provenance };
17
+ const blocks = extractStructuredData(html);
18
+ for (const source of PROVENANCE_PRIORITY) {
19
+ for (const block of blocks) {
20
+ if (block.provenance !== source)
21
+ continue;
22
+ for (const fieldName of Object.keys(schema.properties)) {
23
+ if (values[fieldName] !== undefined)
24
+ continue;
25
+ const v = pickField(block.fields, fieldName);
26
+ if (v !== undefined) {
27
+ values[fieldName] = v;
28
+ provenance[fieldName] = source;
29
+ }
30
+ }
31
+ }
32
+ }
33
+ const allCovered = Object.keys(schema.properties).every((k) => values[k] !== undefined);
34
+ if (allCovered)
35
+ return { values, provenance };
36
+ // Heuristic fallback only for fields still missing
8
37
  const { document: doc } = parseHTML(html);
9
- const heuristicResult = {};
10
38
  for (const [fieldName, fieldSchema] of Object.entries(schema.properties)) {
11
- if (jsonLdResult[fieldName] !== undefined)
39
+ if (values[fieldName] !== undefined)
12
40
  continue;
13
- const value = findFieldValue(doc, fieldName, fieldSchema);
14
- if (value !== undefined) {
15
- heuristicResult[fieldName] = value;
41
+ const v = findFieldValue(doc, fieldName, fieldSchema);
42
+ if (v !== undefined) {
43
+ values[fieldName] = v;
44
+ provenance[fieldName] = 'heuristic';
16
45
  }
17
46
  }
18
- return { ...jsonLdResult, ...heuristicResult };
47
+ return { values, provenance };
48
+ }
49
+ export async function extractWithSchemaDetailedAsync(html, schema, opts = {}) {
50
+ const det = extractWithSchemaDetailed(html, schema);
51
+ const warnings = [];
52
+ if (!schema.required || schema.required.length === 0) {
53
+ return { ...det, warnings };
54
+ }
55
+ const missing = schema.required.filter((k) => det.values[k] === undefined);
56
+ if (missing.length === 0) {
57
+ return { ...det, warnings };
58
+ }
59
+ const llm = await extractWithLLM({
60
+ html,
61
+ jsonSchema: schema,
62
+ partial: det.values,
63
+ missing,
64
+ signal: opts.signal,
65
+ budget: opts.budget,
66
+ });
67
+ const values = { ...det.values };
68
+ const provenance = { ...det.provenance };
69
+ for (const key of missing) {
70
+ if (llm.values[key] !== undefined && values[key] === undefined) {
71
+ values[key] = llm.values[key];
72
+ provenance[key] = 'llm';
73
+ }
74
+ }
75
+ return { values, provenance, warnings: llm.warnings };
76
+ }
77
+ function pickField(fields, name) {
78
+ if (fields[name] !== undefined)
79
+ return fields[name];
80
+ // Shallow nested — e.g. JSON-LD Product.offers.price
81
+ for (const v of Object.values(fields)) {
82
+ if (v && typeof v === 'object' && !Array.isArray(v)) {
83
+ const nested = v[name];
84
+ if (nested !== undefined)
85
+ return nested;
86
+ }
87
+ }
88
+ return undefined;
19
89
  }
90
+ // ---------- heuristic helpers (preserved from prior schema.ts) ----------
20
91
  function findFieldValue(doc, fieldName, schema) {
21
92
  const normalizedName = fieldName.toLowerCase().replace(/_/g, '-');
22
93
  const compactName = fieldName.replace(/_/g, '').toLowerCase();
@@ -37,7 +108,6 @@ function findSingleValue(doc, variants) {
37
108
  if (text)
38
109
  return text;
39
110
  }
40
- // Substring match is intentional — heuristic best-effort for partial class names
41
111
  const byClass = doc.querySelector(`[class*="${name}"]`);
42
112
  if (byClass) {
43
113
  const text = byClass.textContent?.trim();
@@ -1 +1 @@
1
- {"version":3,"file":"schema.js","sourceRoot":"","sources":["../../src/extraction/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AAQjE,MAAM,UAAU,iBAAiB,CAC/B,IAAY,EACZ,MAAkB;IAElB,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU;QAAE,OAAO,EAAE,CAAC;IAE3C,MAAM,YAAY,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;IACzC,MAAM,YAAY,GAAG,mBAAmB,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IAE/D,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,eAAe,GAA4B,EAAE,CAAC;IAEpD,KAAK,MAAM,CAAC,SAAS,EAAE,WAAW,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC;QACzE,IAAI,YAAY,CAAC,SAAS,CAAC,KAAK,SAAS;YAAE,SAAS;QAEpD,MAAM,KAAK,GAAG,cAAc,CAAC,GAAG,EAAE,SAAS,EAAE,WAAW,CAAC,CAAC;QAC1D,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACxB,eAAe,CAAC,SAAS,CAAC,GAAG,KAAK,CAAC;QACrC,CAAC;IACH,CAAC;IAED,OAAO,EAAE,GAAG,YAAY,EAAE,GAAG,eAAe,EAAE,CAAC;AACjD,CAAC;AAED,SAAS,cAAc,CACrB,GAAa,EACb,SAAiB,EACjB,MAAkB;IAElB,MAAM,cAAc,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAClE,MAAM,WAAW,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IAC9D,MAAM,QAAQ,GAAG,CAAC,SAAS,EAAE,cAAc,EAAE,WAAW,CAAC,CAAC;IAE1D,IAAI,MAAM,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;QAC5B,OAAO,eAAe,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IACxC,CAAC;IAED,OAAO,eAAe,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;AACxC,CAAC;AAED,SAAS,SAAS,CAAC,KAAa;IAC9B,OAAO,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;AAC5C,CAAC;AAED,SAAS,eAAe,CAAC,GAAa,EAAE,QAAkB;IACxD,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,UAAU,GAAG,GAAG,CAAC,aAAa,CAAC,cAAc,IAAI,IAAI,CAAC,CAAC;QAC7D,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,UAAU,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,UAAU,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;YAClF,IAAI,IAAI;gBAAE,OAAO,IAAI,CAAC;QACxB,CAAC;QAED,iFAAiF;QACjF,MAAM,OAAO,GAAG,GAAG,CAAC,aAAa,CAAC,YAAY,IAAI,IAAI,CAAC,CAAC;QACxD,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;YACzC,IAAI,IAAI;gBAAE,OAAO,IAAI,CAAC;QACxB,CAAC;QAED,MAAM,WAAW,GAAG,GAAG,CAAC,gBAAgB,CAAC,cAAc,CAAC,CAAC;QACzD,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;YAC7B,MAAM,KAAK,GAAG,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,EAAE,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;YACtF,IAAI,KAAK,KAAK,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;gBACjC,MAAM,IAAI,GAAG,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;gBACpC,IAAI,IAAI;oBAAE,OAAO,IAAI,CAAC;YACxB,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,GAAG,CAAC,aAAa,CAAC,IAAI,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACtD,IAAI,IAAI,EAAE,CAAC;YACT,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;YACtC,IAAI,IAAI;gBAAE,OAAO,IAAI,CAAC;QACxB,CAAC;QAED,MAAM,MAAM,GAAG,GAAG,CAAC,aAAa,CAAC,SAAS,IAAI,GAAG,CAAC,CAAC;QACnD,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,MAAM,CAAC,YAAY,CAAC,QAAQ,IAAI,EAAE,CAAC,IAAI,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;QACxF,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,eAAe,CAAC,GAAa,EAAE,QAAkB;IACxD,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,SAAS,GAAG,GAAG,CAAC,aAAa,CAAC,YAAY,IAAI,IAAI,CAAC,CAAC;QAC1D,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,KAAK,GAAG,SAAS,CAAC,gBAAgB,CAAC,qBAAqB,CAAC,CAAC;YAChE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YACtF,CAAC;QACH,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACxC,MAAM,QAAQ,GAAG,GAAG,CAAC,gBAAgB,CAAC,YAAY,QAAQ,IAAI,CAAC,CAAC;QAChE,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACzF,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
1
+ {"version":3,"file":"schema.js","sourceRoot":"","sources":["../../src/extraction/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAC7D,OAAO,EAAE,cAAc,EAA0B,MAAM,mBAAmB,CAAC;AAmB3E,MAAM,mBAAmB,GAAyC;IAChE,SAAS;IACT,WAAW;IACX,MAAM;CACP,CAAC;AAEF,MAAM,UAAU,iBAAiB,CAC/B,IAAY,EACZ,MAAkB;IAElB,OAAO,yBAAyB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC;AACxD,CAAC;AAED,MAAM,UAAU,yBAAyB,CACvC,IAAY,EACZ,MAAkB;IAElB,MAAM,MAAM,GAA4B,EAAE,CAAC;IAC3C,MAAM,UAAU,GAAoC,EAAE,CAAC;IACvD,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU;QAAE,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;IAE/D,MAAM,MAAM,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAE3C,KAAK,MAAM,MAAM,IAAI,mBAAmB,EAAE,CAAC;QACzC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,IAAI,KAAK,CAAC,UAAU,KAAK,MAAM;gBAAE,SAAS;YAC1C,KAAK,MAAM,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC;gBACvD,IAAI,MAAM,CAAC,SAAS,CAAC,KAAK,SAAS;oBAAE,SAAS;gBAC9C,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;gBAC7C,IAAI,CAAC,KAAK,SAAS,EAAE,CAAC;oBACpB,MAAM,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;oBACtB,UAAU,CAAC,SAAS,CAAC,GAAG,MAAM,CAAC;gBACjC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,KAAK,CACrD,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,SAAS,CAC/B,CAAC;IACF,IAAI,UAAU;QAAE,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;IAE9C,mDAAmD;IACnD,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,KAAK,MAAM,CAAC,SAAS,EAAE,WAAW,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC;QACzE,IAAI,MAAM,CAAC,SAAS,CAAC,KAAK,SAAS;YAAE,SAAS;QAC9C,MAAM,CAAC,GAAG,cAAc,CAAC,GAAG,EAAE,SAAS,EAAE,WAAW,CAAC,CAAC;QACtD,IAAI,CAAC,KAAK,SAAS,EAAE,CAAC;YACpB,MAAM,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YACtB,UAAU,CAAC,SAAS,CAAC,GAAG,WAAW,CAAC;QACtC,CAAC;IACH,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;AAChC,CAAC;AAMD,MAAM,CAAC,KAAK,UAAU,8BAA8B,CAClD,IAAY,EACZ,MAAkB,EAClB,OAA6B,EAAE;IAE/B,MAAM,GAAG,GAAG,yBAAyB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACpD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrD,OAAO,EAAE,GAAG,GAAG,EAAE,QAAQ,EAAE,CAAC;IAC9B,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC;IAC3E,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,EAAE,GAAG,GAAG,EAAE,QAAQ,EAAE,CAAC;IAC9B,CAAC;IAED,MAAM,GAAG,GAAG,MAAM,cAAc,CAAC;QAC/B,IAAI;QACJ,UAAU,EAAE,MAA4C;QACxD,OAAO,EAAE,GAAG,CAAC,MAAM;QACnB,OAAO;QACP,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,MAAM,EAAE,IAAI,CAAC,MAAM;KACpB,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,EAAE,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;IACjC,MAAM,UAAU,GAAoC,EAAE,GAAG,GAAG,CAAC,UAAU,EAAE,CAAC;IAC1E,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,IAAI,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,SAAS,IAAI,MAAM,CAAC,GAAG,CAAC,KAAK,SAAS,EAAE,CAAC;YAC/D,MAAM,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC9B,UAAU,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QAC1B,CAAC;IACH,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,CAAC;AACxD,CAAC;AAED,SAAS,SAAS,CAAC,MAA+B,EAAE,IAAY;IAC9D,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,SAAS;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC;IACpD,qDAAqD;IACrD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;QACtC,IAAI,CAAC,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;YACpD,MAAM,MAAM,GAAI,CAA6B,CAAC,IAAI,CAAC,CAAC;YACpD,IAAI,MAAM,KAAK,SAAS;gBAAE,OAAO,MAAM,CAAC;QAC1C,CAAC;IACH,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,2EAA2E;AAE3E,SAAS,cAAc,CACrB,GAAa,EACb,SAAiB,EACjB,MAAkB;IAElB,MAAM,cAAc,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAClE,MAAM,WAAW,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IAC9D,MAAM,QAAQ,GAAG,CAAC,SAAS,EAAE,cAAc,EAAE,WAAW,CAAC,CAAC;IAE1D,IAAI,MAAM,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;QAC5B,OAAO,eAAe,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IACxC,CAAC;IAED,OAAO,eAAe,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;AACxC,CAAC;AAED,SAAS,SAAS,CAAC,KAAa;IAC9B,OAAO,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;AAC5C,CAAC;AAED,SAAS,eAAe,CAAC,GAAa,EAAE,QAAkB;IACxD,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,UAAU,GAAG,GAAG,CAAC,aAAa,CAAC,cAAc,IAAI,IAAI,CAAC,CAAC;QAC7D,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,UAAU,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,UAAU,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;YAClF,IAAI,IAAI;gBAAE,OAAO,IAAI,CAAC;QACxB,CAAC;QAED,MAAM,OAAO,GAAG,GAAG,CAAC,aAAa,CAAC,YAAY,IAAI,IAAI,CAAC,CAAC;QACxD,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;YACzC,IAAI,IAAI;gBAAE,OAAO,IAAI,CAAC;QACxB,CAAC;QAED,MAAM,WAAW,GAAG,GAAG,CAAC,gBAAgB,CAAC,cAAc,CAAC,CAAC;QACzD,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;YAC7B,MAAM,KAAK,GAAG,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,EAAE,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;YACtF,IAAI,KAAK,KAAK,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;gBACjC,MAAM,IAAI,GAAG,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;gBACpC,IAAI,IAAI;oBAAE,OAAO,IAAI,CAAC;YACxB,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,GAAG,CAAC,aAAa,CAAC,IAAI,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACtD,IAAI,IAAI,EAAE,CAAC;YACT,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;YACtC,IAAI,IAAI;gBAAE,OAAO,IAAI,CAAC;QACxB,CAAC;QAED,MAAM,MAAM,GAAG,GAAG,CAAC,aAAa,CAAC,SAAS,IAAI,GAAG,CAAC,CAAC;QACnD,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,MAAM,CAAC,YAAY,CAAC,QAAQ,IAAI,EAAE,CAAC,IAAI,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;QACxF,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,eAAe,CAAC,GAAa,EAAE,QAAkB;IACxD,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,SAAS,GAAG,GAAG,CAAC,aAAa,CAAC,YAAY,IAAI,IAAI,CAAC,CAAC;QAC1D,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,KAAK,GAAG,SAAS,CAAC,gBAAgB,CAAC,qBAAqB,CAAC,CAAC;YAChE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YACtF,CAAC;QACH,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACxC,MAAM,QAAQ,GAAG,GAAG,CAAC,gBAAgB,CAAC,YAAY,QAAQ,IAAI,CAAC,CAAC;QAChE,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACzF,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"docs-generic.d.ts","sourceRoot":"","sources":["../../../src/extraction/site-extractors/docs-generic.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAoB,MAAM,gBAAgB,CAAC;AAqFlE,eAAO,MAAM,oBAAoB,EAAE,SAqClC,CAAC"}
1
+ {"version":3,"file":"docs-generic.d.ts","sourceRoot":"","sources":["../../../src/extraction/site-extractors/docs-generic.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAoB,MAAM,gBAAgB,CAAC;AAmFlE,eAAO,MAAM,oBAAoB,EAAE,SAqClC,CAAC"}
@@ -1,6 +1,5 @@
1
1
  import { parseHTML } from 'linkedom';
2
- import TurndownService from 'turndown';
3
- const turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
2
+ import { htmlToMarkdown } from '../markdown.js';
4
3
  const STRIP_SELECTORS = [
5
4
  'nav',
6
5
  '.docs-sidebar',
@@ -54,7 +53,7 @@ function buildResult(document, contentEl) {
54
53
  : rawTitle;
55
54
  if (!title)
56
55
  return null;
57
- const markdown = turndown.turndown(contentEl.innerHTML).trim();
56
+ const markdown = htmlToMarkdown(contentEl.innerHTML).trim();
58
57
  if (!markdown)
59
58
  return null;
60
59
  return {
@@ -1 +1 @@
1
- {"version":3,"file":"docs-generic.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/docs-generic.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAIxF,MAAM,eAAe,GAAG;IACtB,KAAK;IACL,eAAe;IACf,UAAU;IACV,cAAc;IACd,oBAAoB;IACpB,iBAAiB;IACjB,iBAAiB;IACjB,QAAQ;IACR,QAAQ;CACT,CAAC;AAEF,SAAS,eAAe,CAAC,IAAY;IACnC,IAAI,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,CAAC;QAC3E,OAAO,YAAY,CAAC;IACtB,CAAC;IACD,IAAI,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QAChC,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,IAAI,IAAI,CAAC,QAAQ,CAAC,kBAAkB,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,kBAAkB,CAAC;QACtE,CAAC,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,EAAE,CAAC;QAClE,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QAC/B,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,oBAAoB,CAC3B,QAAkB,EAClB,gBAA0B;IAE1B,KAAK,MAAM,QAAQ,IAAI,gBAAgB,EAAE,CAAC;QACxC,MAAM,EAAE,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;QAC5C,IAAI,EAAE;YAAE,OAAO,EAAa,CAAC;IAC/B,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,aAAa,CAAC,IAAa,EAAE,SAAmB;IACvD,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;YAC7D,EAAE,CAAC,UAAU,EAAE,WAAW,CAAC,EAAE,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAClB,QAAkB,EAClB,SAAkB;IAElB,aAAa,CAAC,SAAS,EAAE,eAAe,CAAC,CAAC;IAE1C,MAAM,OAAO,GACX,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC;QAC7B,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC;QAC5B,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAElC,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;QAClC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE;QAChC,CAAC,CAAC,QAAQ,CAAC;IAEb,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IAExB,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC/D,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,MAAM,oBAAoB,GAAc;IAC7C,IAAI,EAAE,cAAc;IAEpB,SAAS,CAAC,IAAY,EAAE,IAAa;QACnC,IAAI,CAAC,IAAI;YAAE,OAAO,KAAK,CAAC;QACxB,OAAO,eAAe,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC;IACxC,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,IAAY;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,SAAS,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACxC,IAAI,CAAC,SAAS;YAAE,OAAO,IAAI,CAAC;QAE5B,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,IAAI,gBAA0B,CAAC;QAC/B,QAAQ,SAAS,EAAE,CAAC;YAClB,KAAK,YAAY;gBACf,gBAAgB,GAAG,CAAC,WAAW,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;gBACpD,MAAM;YACR,KAAK,QAAQ;gBACX,gBAAgB,GAAG,CAAC,aAAa,CAAC,CAAC;gBACnC,MAAM;YACR,KAAK,QAAQ;gBACX,gBAAgB,GAAG,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;gBAC1C,MAAM;YACR,KAAK,SAAS;gBACZ,gBAAgB,GAAG,CAAC,YAAY,CAAC,CAAC;gBAClC,MAAM;QACV,CAAC;QAED,MAAM,SAAS,GAAG,oBAAoB,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;QACnE,IAAI,CAAC,SAAS;YAAE,OAAO,IAAI,CAAC;QAE5B,OAAO,WAAW,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAC1C,CAAC;CACF,CAAC"}
1
+ {"version":3,"file":"docs-generic.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/docs-generic.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAKhD,MAAM,eAAe,GAAG;IACtB,KAAK;IACL,eAAe;IACf,UAAU;IACV,cAAc;IACd,oBAAoB;IACpB,iBAAiB;IACjB,iBAAiB;IACjB,QAAQ;IACR,QAAQ;CACT,CAAC;AAEF,SAAS,eAAe,CAAC,IAAY;IACnC,IAAI,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,CAAC;QAC3E,OAAO,YAAY,CAAC;IACtB,CAAC;IACD,IAAI,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QAChC,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,IAAI,IAAI,CAAC,QAAQ,CAAC,kBAAkB,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,kBAAkB,CAAC;QACtE,CAAC,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,EAAE,CAAC;QAClE,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QAC/B,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,oBAAoB,CAC3B,QAAkB,EAClB,gBAA0B;IAE1B,KAAK,MAAM,QAAQ,IAAI,gBAAgB,EAAE,CAAC;QACxC,MAAM,EAAE,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;QAC5C,IAAI,EAAE;YAAE,OAAO,EAAa,CAAC;IAC/B,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,aAAa,CAAC,IAAa,EAAE,SAAmB;IACvD,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;YAC7D,EAAE,CAAC,UAAU,EAAE,WAAW,CAAC,EAAE,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAClB,QAAkB,EAClB,SAAkB;IAElB,aAAa,CAAC,SAAS,EAAE,eAAe,CAAC,CAAC;IAE1C,MAAM,OAAO,GACX,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC;QAC7B,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC;QAC5B,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAElC,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;QAClC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE;QAChC,CAAC,CAAC,QAAQ,CAAC;IAEb,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IAExB,MAAM,QAAQ,GAAG,cAAc,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC5D,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,MAAM,oBAAoB,GAAc;IAC7C,IAAI,EAAE,cAAc;IAEpB,SAAS,CAAC,IAAY,EAAE,IAAa;QACnC,IAAI,CAAC,IAAI;YAAE,OAAO,KAAK,CAAC;QACxB,OAAO,eAAe,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC;IACxC,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,IAAY;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,SAAS,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACxC,IAAI,CAAC,SAAS;YAAE,OAAO,IAAI,CAAC;QAE5B,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,IAAI,gBAA0B,CAAC;QAC/B,QAAQ,SAAS,EAAE,CAAC;YAClB,KAAK,YAAY;gBACf,gBAAgB,GAAG,CAAC,WAAW,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;gBACpD,MAAM;YACR,KAAK,QAAQ;gBACX,gBAAgB,GAAG,CAAC,aAAa,CAAC,CAAC;gBACnC,MAAM;YACR,KAAK,QAAQ;gBACX,gBAAgB,GAAG,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;gBAC1C,MAAM;YACR,KAAK,SAAS;gBACZ,gBAAgB,GAAG,CAAC,YAAY,CAAC,CAAC;gBAClC,MAAM;QACV,CAAC;QAED,MAAM,SAAS,GAAG,oBAAoB,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;QACnE,IAAI,CAAC,SAAS;YAAE,OAAO,IAAI,CAAC;QAE5B,OAAO,WAAW,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAC1C,CAAC;CACF,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"github.d.ts","sourceRoot":"","sources":["../../../src/extraction/site-extractors/github.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAoB,MAAM,gBAAgB,CAAC;AAoGlE,eAAO,MAAM,eAAe,EAAE,SA2B7B,CAAC"}
1
+ {"version":3,"file":"github.d.ts","sourceRoot":"","sources":["../../../src/extraction/site-extractors/github.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAoB,MAAM,gBAAgB,CAAC;AAkGlE,eAAO,MAAM,eAAe,EAAE,SA2B7B,CAAC"}
@@ -1,6 +1,5 @@
1
1
  import { parseHTML } from 'linkedom';
2
- import TurndownService from 'turndown';
3
- const turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
2
+ import { htmlToMarkdown } from '../markdown.js';
4
3
  function isIssueOrPR(url) {
5
4
  return /\/issues\/\d+|\/pull\/\d+/.test(url);
6
5
  }
@@ -25,7 +24,7 @@ function extractIssue(document, _url) {
25
24
  }
26
25
  Array.from(commentBodies).forEach((body, i) => {
27
26
  const html = body.innerHTML;
28
- const md = turndown.turndown(html).trim();
27
+ const md = htmlToMarkdown(html).trim();
29
28
  if (md) {
30
29
  sections.push(i === 0 ? md : `---\n\n${md}`);
31
30
  }
@@ -48,7 +47,7 @@ function extractReadme(document) {
48
47
  document.querySelector('.markdown-body');
49
48
  if (!readmeBody)
50
49
  return null;
51
- const markdown = turndown.turndown(readmeBody.innerHTML).trim();
50
+ const markdown = htmlToMarkdown(readmeBody.innerHTML).trim();
52
51
  if (!markdown)
53
52
  return null;
54
53
  return {
@@ -68,7 +67,7 @@ function extractBlob(document) {
68
67
  document.querySelector('.markdown-body');
69
68
  if (!codeBlock)
70
69
  return null;
71
- const markdown = turndown.turndown(codeBlock.innerHTML).trim();
70
+ const markdown = htmlToMarkdown(codeBlock.innerHTML).trim();
72
71
  if (!markdown)
73
72
  return null;
74
73
  return {
@@ -1 +1 @@
1
- {"version":3,"file":"github.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/github.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,SAAS,WAAW,CAAC,GAAW;IAC9B,OAAO,2BAA2B,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,MAAM,CAAC,GAAW;IACzB,OAAO,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC9B,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB,EAAE,IAAY;IACpD,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,iBAAiB,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,kBAAkB,CAAC,CAAC;IACxG,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAEhD,MAAM,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC;SAChC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;SACzC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,MAAM,aAAa,GAAG,QAAQ,CAAC,gBAAgB,CAAC,uBAAuB,CAAC,CAAC;IACzE,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAE5C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,QAAQ,CAAC,IAAI,CAAC,eAAe,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QAC5C,MAAM,IAAI,GAAI,IAAgB,CAAC,SAAS,CAAC;QACzC,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1C,IAAI,EAAE,EAAE,CAAC;YACP,QAAQ,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEvC,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,QAAkB;IACvC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,QAAQ,CAAC;IAEzD,MAAM,UAAU,GACd,QAAQ,CAAC,aAAa,CAAC,wBAAwB,CAAC;QAChD,QAAQ,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAE3C,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAE7B,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,UAAsB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC7E,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAAC,QAAkB;IACrC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAEjD,MAAM,SAAS,GACb,QAAQ,CAAC,aAAa,CAAC,oBAAoB,CAAC;QAC5C,QAAQ,CAAC,aAAa,CAAC,YAAY,CAAC;QACpC,QAAQ,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAE3C,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC;IAE5B,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,SAAqB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC5E,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,MAAM,eAAe,GAAc;IACxC,IAAI,EAAE,QAAQ;IAEd,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,YAAY,IAAI,QAAQ,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QACvE,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,GAAW;QAC/B,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;YACrB,OAAO,YAAY,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACrC,CAAC;QAED,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YAChB,OAAO,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC/B,CAAC;QAED,OAAO,aAAa,CAAC,QAAQ,CAAC,CAAC;IACjC,CAAC;CACF,CAAC"}
1
+ {"version":3,"file":"github.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/github.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAGhD,SAAS,WAAW,CAAC,GAAW;IAC9B,OAAO,2BAA2B,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,MAAM,CAAC,GAAW;IACzB,OAAO,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC9B,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB,EAAE,IAAY;IACpD,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,iBAAiB,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,kBAAkB,CAAC,CAAC;IACxG,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAEhD,MAAM,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC;SAChC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;SACzC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,MAAM,aAAa,GAAG,QAAQ,CAAC,gBAAgB,CAAC,uBAAuB,CAAC,CAAC;IACzE,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAE5C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,QAAQ,CAAC,IAAI,CAAC,eAAe,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QAC5C,MAAM,IAAI,GAAI,IAAgB,CAAC,SAAS,CAAC;QACzC,MAAM,EAAE,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QACvC,IAAI,EAAE,EAAE,CAAC;YACP,QAAQ,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEvC,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,QAAkB;IACvC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,QAAQ,CAAC;IAEzD,MAAM,UAAU,GACd,QAAQ,CAAC,aAAa,CAAC,wBAAwB,CAAC;QAChD,QAAQ,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAE3C,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAE7B,MAAM,QAAQ,GAAG,cAAc,CAAE,UAAsB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1E,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAAC,QAAkB;IACrC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAEjD,MAAM,SAAS,GACb,QAAQ,CAAC,aAAa,CAAC,oBAAoB,CAAC;QAC5C,QAAQ,CAAC,aAAa,CAAC,YAAY,CAAC;QACpC,QAAQ,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAE3C,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC;IAE5B,MAAM,QAAQ,GAAG,cAAc,CAAE,SAAqB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IACzE,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,MAAM,eAAe,GAAc;IACxC,IAAI,EAAE,QAAQ;IAEd,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,YAAY,IAAI,QAAQ,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QACvE,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,GAAW;QAC/B,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;YACrB,OAAO,YAAY,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACrC,CAAC;QAED,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YAChB,OAAO,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC/B,CAAC;QAED,OAAO,aAAa,CAAC,QAAQ,CAAC,CAAC;IACjC,CAAC;CACF,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"mdn.d.ts","sourceRoot":"","sources":["../../../src/extraction/site-extractors/mdn.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAoB,MAAM,gBAAgB,CAAC;AAalE,eAAO,MAAM,YAAY,EAAE,SAqD1B,CAAC"}
1
+ {"version":3,"file":"mdn.d.ts","sourceRoot":"","sources":["../../../src/extraction/site-extractors/mdn.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAoB,MAAM,gBAAgB,CAAC;AAWlE,eAAO,MAAM,YAAY,EAAE,SAqD1B,CAAC"}
@@ -1,6 +1,5 @@
1
1
  import { parseHTML } from 'linkedom';
2
- import TurndownService from 'turndown';
3
- const turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
2
+ import { htmlToMarkdown } from '../markdown.js';
4
3
  const STRIP_SELECTORS = [
5
4
  'nav',
6
5
  '.sidebar',
@@ -42,7 +41,7 @@ export const mdnExtractor = {
42
41
  : rawTitle;
43
42
  if (!title)
44
43
  return null;
45
- const markdown = turndown.turndown(article.innerHTML).trim();
44
+ const markdown = htmlToMarkdown(article.innerHTML).trim();
46
45
  if (!markdown)
47
46
  return null;
48
47
  return {
@@ -1 +1 @@
1
- {"version":3,"file":"mdn.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/mdn.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,MAAM,eAAe,GAAG;IACtB,KAAK;IACL,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,WAAW;CACZ,CAAC;AAEF,MAAM,CAAC,MAAM,YAAY,GAAc;IACrC,IAAI,EAAE,KAAK;IAEX,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,uBAAuB,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,IAAY;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,MAAM,OAAO,GACX,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC;YACnD,QAAQ,CAAC,aAAa,CAAC,kBAAkB,CAAC;YAC1C,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAEpC,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,KAAK,MAAM,QAAQ,IAAI,eAAe,EAAE,CAAC;YACvC,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;gBAChE,EAAE,CAAC,UAAU,EAAE,WAAW,CAAC,EAAE,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GACX,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC;YAC3B,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAElC,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;YAClC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE;YAChC,CAAC,CAAC,QAAQ,CAAC;QAEb,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,OAAmB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1E,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC;QAE3B,OAAO;YACL,KAAK;YACL,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,eAAe;SAC3B,CAAC;IACJ,CAAC;CACF,CAAC"}
1
+ {"version":3,"file":"mdn.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/mdn.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAGhD,MAAM,eAAe,GAAG;IACtB,KAAK;IACL,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,WAAW;CACZ,CAAC;AAEF,MAAM,CAAC,MAAM,YAAY,GAAc;IACrC,IAAI,EAAE,KAAK;IAEX,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,uBAAuB,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,IAAY;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,MAAM,OAAO,GACX,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC;YACnD,QAAQ,CAAC,aAAa,CAAC,kBAAkB,CAAC;YAC1C,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAEpC,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,KAAK,MAAM,QAAQ,IAAI,eAAe,EAAE,CAAC;YACvC,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;gBAChE,EAAE,CAAC,UAAU,EAAE,WAAW,CAAC,EAAE,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GACX,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC;YAC3B,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAElC,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;YAClC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE;YAChC,CAAC,CAAC,QAAQ,CAAC;QAEb,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,QAAQ,GAAG,cAAc,CAAE,OAAmB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;QACvE,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC;QAE3B,OAAO;YACL,KAAK;YACL,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,eAAe;SAC3B,CAAC;IACJ,CAAC;CACF,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"stackoverflow.d.ts","sourceRoot":"","sources":["../../../src/extraction/site-extractors/stackoverflow.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAoB,MAAM,gBAAgB,CAAC;AAgElE,eAAO,MAAM,sBAAsB,EAAE,SAkDpC,CAAC"}
1
+ {"version":3,"file":"stackoverflow.d.ts","sourceRoot":"","sources":["../../../src/extraction/site-extractors/stackoverflow.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAoB,MAAM,gBAAgB,CAAC;AA8DlE,eAAO,MAAM,sBAAsB,EAAE,SAkDpC,CAAC"}
@@ -1,6 +1,5 @@
1
1
  import { parseHTML } from 'linkedom';
2
- import TurndownService from 'turndown';
3
- const turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
2
+ import { htmlToMarkdown } from '../markdown.js';
4
3
  function parseVotes(el) {
5
4
  if (!el)
6
5
  return 0;
@@ -22,7 +21,7 @@ function parseAnswers(document) {
22
21
  }
23
22
  function buildMarkdown(title, tags, votes, questionHtml, answers) {
24
23
  const tagLine = `Tags: ${tags.join(', ')} | Votes: ${votes}`;
25
- const questionMd = turndown.turndown(questionHtml).trim();
24
+ const questionMd = htmlToMarkdown(questionHtml).trim();
26
25
  const sections = [
27
26
  `# ${title}`,
28
27
  tagLine,
@@ -36,7 +35,7 @@ function buildMarkdown(title, tags, votes, questionHtml, answers) {
36
35
  const heading = answer.accepted
37
36
  ? `## Accepted Answer (Votes: ${answer.votes})`
38
37
  : `## Answer (Votes: ${answer.votes})`;
39
- const bodyMd = turndown.turndown(answer.bodyHtml).trim();
38
+ const bodyMd = htmlToMarkdown(answer.bodyHtml).trim();
40
39
  sections.push('---', '', heading, '', bodyMd);
41
40
  }
42
41
  return sections.join('\n\n');
@@ -1 +1 @@
1
- {"version":3,"file":"stackoverflow.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/stackoverflow.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAQxF,SAAS,UAAU,CAAC,EAAkB;IACpC,IAAI,CAAC,EAAE;QAAE,OAAO,CAAC,CAAC;IAClB,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,YAAY,CAAC,YAAY,CAAC,IAAI,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,GAAG,CAAC;IACrF,OAAO,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB;IACtC,MAAM,SAAS,GAAG,QAAQ,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;IAChE,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,UAAU,CAAC,EAAa,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,qCAAqC,CAAC,CAAC;QACvE,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAE,MAAkB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,OAAO,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CACpB,KAAa,EACb,IAAc,EACd,KAAa,EACb,YAAoB,EACpB,OAAiB;IAEjB,MAAM,OAAO,GAAG,SAAS,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,KAAK,EAAE,CAAC;IAC7D,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC;IAE1D,MAAM,QAAQ,GAAa;QACzB,KAAK,KAAK,EAAE;QACZ,OAAO;QACP,EAAE;QACF,UAAU;KACX,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACpF,MAAM,OAAO,GAAG,CAAC,GAAG,QAAQ,EAAE,GAAG,MAAM,CAAC,CAAC;IAEzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ;YAC7B,CAAC,CAAC,8BAA8B,MAAM,CAAC,KAAK,GAAG;YAC/C,CAAC,CAAC,qBAAqB,MAAM,CAAC,KAAK,GAAG,CAAC;QACzC,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;QACzD,QAAQ,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC;AAED,MAAM,CAAC,MAAM,sBAAsB,GAAc;IAC/C,IAAI,EAAE,eAAe;IAErB,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,mBAAmB;gBACrC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC;gBACvC,QAAQ,KAAK,mBAAmB;gBAChC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC;QAC5C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,IAAY;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,qBAAqB,CAAC,CAAC;QAC9D,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,cAAc,GAAG,QAAQ,CAAC,aAAa,CAAC,mEAAmE,CAAC,CAAC;QACnH,IAAI,CAAC,cAAc;YAAE,OAAO,IAAI,CAAC;QAEjC,MAAM,YAAY,GAAI,cAA0B,CAAC,SAAS,CAAC;QAE3D,MAAM,MAAM,GAAG,QAAQ,CAAC,gBAAgB,CAAC,8DAA8D,CAAC,CAAC;QACzG,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAE1F,MAAM,UAAU,GAAG,QAAQ,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,UAAU,CAAC,UAA4B,CAAC,CAAC;QAEvD,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;QAEvC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAE1E,OAAO;YACL,KAAK;YACL,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,eAAe;SAC3B,CAAC;IACJ,CAAC;CACF,CAAC"}
1
+ {"version":3,"file":"stackoverflow.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/stackoverflow.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAShD,SAAS,UAAU,CAAC,EAAkB;IACpC,IAAI,CAAC,EAAE;QAAE,OAAO,CAAC,CAAC;IAClB,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,YAAY,CAAC,YAAY,CAAC,IAAI,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,GAAG,CAAC;IACrF,OAAO,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB;IACtC,MAAM,SAAS,GAAG,QAAQ,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;IAChE,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,UAAU,CAAC,EAAa,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,qCAAqC,CAAC,CAAC;QACvE,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAE,MAAkB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,OAAO,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CACpB,KAAa,EACb,IAAc,EACd,KAAa,EACb,YAAoB,EACpB,OAAiB;IAEjB,MAAM,OAAO,GAAG,SAAS,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,KAAK,EAAE,CAAC;IAC7D,MAAM,UAAU,GAAG,cAAc,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC;IAEvD,MAAM,QAAQ,GAAa;QACzB,KAAK,KAAK,EAAE;QACZ,OAAO;QACP,EAAE;QACF,UAAU;KACX,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACpF,MAAM,OAAO,GAAG,CAAC,GAAG,QAAQ,EAAE,GAAG,MAAM,CAAC,CAAC;IAEzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ;YAC7B,CAAC,CAAC,8BAA8B,MAAM,CAAC,KAAK,GAAG;YAC/C,CAAC,CAAC,qBAAqB,MAAM,CAAC,KAAK,GAAG,CAAC;QACzC,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;QACtD,QAAQ,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC;AAED,MAAM,CAAC,MAAM,sBAAsB,GAAc;IAC/C,IAAI,EAAE,eAAe;IAErB,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,mBAAmB;gBACrC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC;gBACvC,QAAQ,KAAK,mBAAmB;gBAChC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC;QAC5C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,IAAY;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,qBAAqB,CAAC,CAAC;QAC9D,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,cAAc,GAAG,QAAQ,CAAC,aAAa,CAAC,mEAAmE,CAAC,CAAC;QACnH,IAAI,CAAC,cAAc;YAAE,OAAO,IAAI,CAAC;QAEjC,MAAM,YAAY,GAAI,cAA0B,CAAC,SAAS,CAAC;QAE3D,MAAM,MAAM,GAAG,QAAQ,CAAC,gBAAgB,CAAC,8DAA8D,CAAC,CAAC;QACzG,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAE1F,MAAM,UAAU,GAAG,QAAQ,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,UAAU,CAAC,UAA4B,CAAC,CAAC;QAEvD,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;QAEvC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAE1E,OAAO;YACL,KAAK;YACL,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,eAAe;SAC3B,CAAC;IACJ,CAAC;CACF,CAAC"}
@@ -0,0 +1,4 @@
1
+ import type { StructuredDataResult } from '../types.js';
2
+ export declare function extractStructuredData(html: string): StructuredDataResult[];
3
+ export declare const KNOWN_SCHEMA_TYPES: ReadonlySet<string>;
4
+ //# sourceMappingURL=structured-data.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"structured-data.d.ts","sourceRoot":"","sources":["../../src/extraction/structured-data.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAaxD,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,GAAG,oBAAoB,EAAE,CAQ1E;AA8KD,eAAO,MAAM,kBAAkB,EAAE,WAAW,CAAC,MAAM,CAAe,CAAC"}