opencode-dux 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +452 -0
  3. package/dist/agents/descriptions.d.ts +6 -0
  4. package/dist/agents/designer.d.ts +2 -0
  5. package/dist/agents/explorer.d.ts +2 -0
  6. package/dist/agents/fixer.d.ts +2 -0
  7. package/dist/agents/index.d.ts +22 -0
  8. package/dist/agents/interpreter.d.ts +2 -0
  9. package/dist/agents/librarian.d.ts +2 -0
  10. package/dist/agents/oracle.d.ts +2 -0
  11. package/dist/agents/orchestrator.d.ts +27 -0
  12. package/dist/agents/overrides.d.ts +18 -0
  13. package/dist/agents/prompt-blocks.d.ts +97 -0
  14. package/dist/agents/steward.d.ts +3 -0
  15. package/dist/cli/config-io.d.ts +24 -0
  16. package/dist/cli/config-manager.d.ts +4 -0
  17. package/dist/cli/index.d.ts +2 -0
  18. package/dist/cli/index.js +1006 -0
  19. package/dist/cli/install.d.ts +2 -0
  20. package/dist/cli/mcps.d.ts +13 -0
  21. package/dist/cli/model-key-normalization.d.ts +1 -0
  22. package/dist/cli/paths.d.ts +35 -0
  23. package/dist/cli/providers.d.ts +137 -0
  24. package/dist/cli/skills.d.ts +22 -0
  25. package/dist/cli/system.d.ts +5 -0
  26. package/dist/cli/types.d.ts +38 -0
  27. package/dist/config/constants.d.ts +12 -0
  28. package/dist/config/index.d.ts +4 -0
  29. package/dist/config/loader.d.ts +40 -0
  30. package/dist/config/runtime-preset.d.ts +12 -0
  31. package/dist/config/schema.d.ts +281 -0
  32. package/dist/config/utils.d.ts +10 -0
  33. package/dist/discovery/local/types.d.ts +79 -0
  34. package/dist/discovery/local.d.ts +73 -0
  35. package/dist/discovery/mcp-servers.d.ts +88 -0
  36. package/dist/discovery/skills.d.ts +94 -0
  37. package/dist/hooks/apply-patch/codec.d.ts +7 -0
  38. package/dist/hooks/apply-patch/errors.d.ts +25 -0
  39. package/dist/hooks/apply-patch/execution-context.d.ts +27 -0
  40. package/dist/hooks/apply-patch/index.d.ts +15 -0
  41. package/dist/hooks/apply-patch/matching.d.ts +26 -0
  42. package/dist/hooks/apply-patch/operations.d.ts +3 -0
  43. package/dist/hooks/apply-patch/patch.d.ts +2 -0
  44. package/dist/hooks/apply-patch/prepared-changes.d.ts +17 -0
  45. package/dist/hooks/apply-patch/resolution.d.ts +19 -0
  46. package/dist/hooks/apply-patch/rewrite.d.ts +7 -0
  47. package/dist/hooks/apply-patch/test-helpers.d.ts +6 -0
  48. package/dist/hooks/apply-patch/types.d.ts +80 -0
  49. package/dist/hooks/auto-update-checker/cache.d.ts +11 -0
  50. package/dist/hooks/auto-update-checker/checker.d.ts +32 -0
  51. package/dist/hooks/auto-update-checker/constants.d.ts +11 -0
  52. package/dist/hooks/auto-update-checker/index.d.ts +18 -0
  53. package/dist/hooks/auto-update-checker/types.d.ts +22 -0
  54. package/dist/hooks/chat-headers.d.ts +16 -0
  55. package/dist/hooks/context-pressure-reminder/index.d.ts +33 -0
  56. package/dist/hooks/delegate-task-retry/guidance.d.ts +2 -0
  57. package/dist/hooks/delegate-task-retry/hook.d.ts +8 -0
  58. package/dist/hooks/delegate-task-retry/index.d.ts +4 -0
  59. package/dist/hooks/delegate-task-retry/patterns.d.ts +11 -0
  60. package/dist/hooks/filter-available-skills/index.d.ts +32 -0
  61. package/dist/hooks/foreground-fallback/index.d.ts +72 -0
  62. package/dist/hooks/image-hook.d.ts +5 -0
  63. package/dist/hooks/index.d.ts +14 -0
  64. package/dist/hooks/json-error-recovery/hook.d.ts +18 -0
  65. package/dist/hooks/json-error-recovery/index.d.ts +1 -0
  66. package/dist/hooks/phase-reminder/index.d.ts +26 -0
  67. package/dist/hooks/post-file-tool-nudge/index.d.ts +19 -0
  68. package/dist/hooks/task-session-manager/index.d.ts +52 -0
  69. package/dist/hooks/todo-continuation/index.d.ts +53 -0
  70. package/dist/hooks/todo-continuation/todo-hygiene.d.ts +35 -0
  71. package/dist/index.d.ts +5 -0
  72. package/dist/index.js +31782 -0
  73. package/dist/mcp/context7.d.ts +6 -0
  74. package/dist/mcp/grep-app.d.ts +6 -0
  75. package/dist/mcp/index.d.ts +13 -0
  76. package/dist/mcp/types.d.ts +12 -0
  77. package/dist/mcp/websearch.d.ts +9 -0
  78. package/dist/skills/registry.d.ts +29 -0
  79. package/dist/subscriptions/accounts-store.d.ts +57 -0
  80. package/dist/subscriptions/index.d.ts +13 -0
  81. package/dist/subscriptions/neuralwatt-scraper.d.ts +14 -0
  82. package/dist/subscriptions/opencode-go-scraper.d.ts +27 -0
  83. package/dist/subscriptions/types.d.ts +115 -0
  84. package/dist/subscriptions/usage-service.d.ts +74 -0
  85. package/dist/tools/ast-grep/cli.d.ts +15 -0
  86. package/dist/tools/ast-grep/constants.d.ts +25 -0
  87. package/dist/tools/ast-grep/downloader.d.ts +5 -0
  88. package/dist/tools/ast-grep/index.d.ts +10 -0
  89. package/dist/tools/ast-grep/tools.d.ts +3 -0
  90. package/dist/tools/ast-grep/types.d.ts +30 -0
  91. package/dist/tools/ast-grep/utils.d.ts +4 -0
  92. package/dist/tools/delegate.d.ts +14 -0
  93. package/dist/tools/index.d.ts +5 -0
  94. package/dist/tools/preset-manager.d.ts +27 -0
  95. package/dist/tools/smartfetch/binary.d.ts +3 -0
  96. package/dist/tools/smartfetch/cache.d.ts +6 -0
  97. package/dist/tools/smartfetch/constants.d.ts +12 -0
  98. package/dist/tools/smartfetch/index.d.ts +3 -0
  99. package/dist/tools/smartfetch/network.d.ts +38 -0
  100. package/dist/tools/smartfetch/secondary-model.d.ts +28 -0
  101. package/dist/tools/smartfetch/tool.d.ts +3 -0
  102. package/dist/tools/smartfetch/types.d.ts +122 -0
  103. package/dist/tools/smartfetch/utils.d.ts +18 -0
  104. package/dist/tui-state.d.ts +168 -0
  105. package/dist/tui.d.ts +37 -0
  106. package/dist/tui.js +1896 -0
  107. package/dist/utils/agent-variant.d.ts +63 -0
  108. package/dist/utils/compat.d.ts +30 -0
  109. package/dist/utils/env.d.ts +1 -0
  110. package/dist/utils/index.d.ts +9 -0
  111. package/dist/utils/internal-initiator.d.ts +6 -0
  112. package/dist/utils/logger.d.ts +8 -0
  113. package/dist/utils/polling.d.ts +21 -0
  114. package/dist/utils/session-manager.d.ts +55 -0
  115. package/dist/utils/session.d.ts +90 -0
  116. package/dist/utils/subagent-depth.d.ts +35 -0
  117. package/dist/utils/system-collapse.d.ts +6 -0
  118. package/dist/utils/task.d.ts +4 -0
  119. package/dist/utils/zip-extractor.d.ts +1 -0
  120. package/index.ts +1 -0
  121. package/opencode-dux.schema.json +634 -0
  122. package/package.json +103 -0
  123. package/src/agents/descriptions.ts +55 -0
  124. package/src/agents/designer.test.ts +86 -0
  125. package/src/agents/designer.ts +154 -0
  126. package/src/agents/display-name.test.ts +186 -0
  127. package/src/agents/explorer.test.ts +79 -0
  128. package/src/agents/explorer.ts +144 -0
  129. package/src/agents/fixer.test.ts +79 -0
  130. package/src/agents/fixer.ts +145 -0
  131. package/src/agents/index.test.ts +472 -0
  132. package/src/agents/index.ts +248 -0
  133. package/src/agents/interpreter.ts +136 -0
  134. package/src/agents/librarian.test.ts +80 -0
  135. package/src/agents/librarian.ts +145 -0
  136. package/src/agents/oracle.test.ts +89 -0
  137. package/src/agents/oracle.ts +184 -0
  138. package/src/agents/orchestrator.test.ts +116 -0
  139. package/src/agents/orchestrator.ts +574 -0
  140. package/src/agents/overrides.ts +95 -0
  141. package/src/agents/prompt-blocks.test.ts +114 -0
  142. package/src/agents/prompt-blocks.ts +640 -0
  143. package/src/agents/steward.ts +146 -0
  144. package/src/cli/config-io.test.ts +536 -0
  145. package/src/cli/config-io.ts +473 -0
  146. package/src/cli/config-manager.test.ts +141 -0
  147. package/src/cli/config-manager.ts +4 -0
  148. package/src/cli/index.ts +88 -0
  149. package/src/cli/install.ts +282 -0
  150. package/src/cli/mcps.test.ts +62 -0
  151. package/src/cli/mcps.ts +39 -0
  152. package/src/cli/model-key-normalization.test.ts +21 -0
  153. package/src/cli/model-key-normalization.ts +60 -0
  154. package/src/cli/paths.test.ts +167 -0
  155. package/src/cli/paths.ts +144 -0
  156. package/src/cli/providers.test.ts +118 -0
  157. package/src/cli/providers.ts +141 -0
  158. package/src/cli/skills.test.ts +111 -0
  159. package/src/cli/skills.ts +103 -0
  160. package/src/cli/system.test.ts +91 -0
  161. package/src/cli/system.ts +180 -0
  162. package/src/cli/types.ts +43 -0
  163. package/src/config/constants.ts +58 -0
  164. package/src/config/index.ts +4 -0
  165. package/src/config/loader.test.ts +1194 -0
  166. package/src/config/loader.ts +269 -0
  167. package/src/config/model-resolution.test.ts +176 -0
  168. package/src/config/runtime-preset.test.ts +61 -0
  169. package/src/config/runtime-preset.ts +37 -0
  170. package/src/config/schema.ts +248 -0
  171. package/src/config/utils.test.ts +41 -0
  172. package/src/config/utils.ts +23 -0
  173. package/src/discovery/local/types.ts +85 -0
  174. package/src/discovery/local.ts +322 -0
  175. package/src/discovery/mcp-servers.ts +804 -0
  176. package/src/discovery/skills.ts +959 -0
  177. package/src/hooks/apply-patch/codec.test.ts +184 -0
  178. package/src/hooks/apply-patch/codec.ts +352 -0
  179. package/src/hooks/apply-patch/errors.ts +117 -0
  180. package/src/hooks/apply-patch/execution-context.ts +432 -0
  181. package/src/hooks/apply-patch/hook.test.ts +768 -0
  182. package/src/hooks/apply-patch/index.ts +126 -0
  183. package/src/hooks/apply-patch/matching.test.ts +215 -0
  184. package/src/hooks/apply-patch/matching.ts +586 -0
  185. package/src/hooks/apply-patch/operations.test.ts +1535 -0
  186. package/src/hooks/apply-patch/operations.ts +3 -0
  187. package/src/hooks/apply-patch/patch.ts +9 -0
  188. package/src/hooks/apply-patch/prepared-changes.ts +400 -0
  189. package/src/hooks/apply-patch/resolution.test.ts +420 -0
  190. package/src/hooks/apply-patch/resolution.ts +437 -0
  191. package/src/hooks/apply-patch/rewrite.ts +496 -0
  192. package/src/hooks/apply-patch/test-helpers.ts +52 -0
  193. package/src/hooks/apply-patch/types.ts +111 -0
  194. package/src/hooks/auto-update-checker/cache.test.ts +179 -0
  195. package/src/hooks/auto-update-checker/cache.ts +188 -0
  196. package/src/hooks/auto-update-checker/checker.test.ts +159 -0
  197. package/src/hooks/auto-update-checker/checker.ts +308 -0
  198. package/src/hooks/auto-update-checker/constants.ts +33 -0
  199. package/src/hooks/auto-update-checker/index.test.ts +282 -0
  200. package/src/hooks/auto-update-checker/index.ts +225 -0
  201. package/src/hooks/auto-update-checker/types.ts +26 -0
  202. package/src/hooks/chat-headers.test.ts +236 -0
  203. package/src/hooks/chat-headers.ts +97 -0
  204. package/src/hooks/context-pressure-reminder/index.test.ts +179 -0
  205. package/src/hooks/context-pressure-reminder/index.ts +137 -0
  206. package/src/hooks/delegate-task-retry/guidance.ts +41 -0
  207. package/src/hooks/delegate-task-retry/hook.ts +23 -0
  208. package/src/hooks/delegate-task-retry/index.test.ts +38 -0
  209. package/src/hooks/delegate-task-retry/index.ts +7 -0
  210. package/src/hooks/delegate-task-retry/patterns.ts +79 -0
  211. package/src/hooks/filter-available-skills/index.test.ts +297 -0
  212. package/src/hooks/filter-available-skills/index.ts +160 -0
  213. package/src/hooks/foreground-fallback/index.test.ts +624 -0
  214. package/src/hooks/foreground-fallback/index.ts +374 -0
  215. package/src/hooks/image-hook.ts +6 -0
  216. package/src/hooks/index.ts +17 -0
  217. package/src/hooks/json-error-recovery/hook.ts +73 -0
  218. package/src/hooks/json-error-recovery/index.test.ts +111 -0
  219. package/src/hooks/json-error-recovery/index.ts +6 -0
  220. package/src/hooks/phase-reminder/index.test.ts +74 -0
  221. package/src/hooks/phase-reminder/index.ts +85 -0
  222. package/src/hooks/post-file-tool-nudge/index.test.ts +94 -0
  223. package/src/hooks/post-file-tool-nudge/index.ts +63 -0
  224. package/src/hooks/task-session-manager/index.test.ts +833 -0
  225. package/src/hooks/task-session-manager/index.ts +434 -0
  226. package/src/hooks/todo-continuation/index.test.ts +3026 -0
  227. package/src/hooks/todo-continuation/index.ts +878 -0
  228. package/src/hooks/todo-continuation/todo-hygiene.test.ts +204 -0
  229. package/src/hooks/todo-continuation/todo-hygiene.ts +207 -0
  230. package/src/index.ts +1672 -0
  231. package/src/mcp/context7.ts +14 -0
  232. package/src/mcp/grep-app.ts +11 -0
  233. package/src/mcp/index.test.ts +96 -0
  234. package/src/mcp/index.ts +66 -0
  235. package/src/mcp/types.ts +16 -0
  236. package/src/mcp/websearch.ts +47 -0
  237. package/src/skills/codemap/README.md +60 -0
  238. package/src/skills/codemap/SKILL.md +174 -0
  239. package/src/skills/codemap/scripts/codemap.mjs +483 -0
  240. package/src/skills/codemap/scripts/codemap.test.ts +129 -0
  241. package/src/skills/registry.ts +218 -0
  242. package/src/skills/simplify/README.md +19 -0
  243. package/src/skills/simplify/SKILL.md +138 -0
  244. package/src/subscriptions/accounts-store.test.ts +236 -0
  245. package/src/subscriptions/accounts-store.ts +184 -0
  246. package/src/subscriptions/index.ts +30 -0
  247. package/src/subscriptions/neuralwatt-scraper.ts +108 -0
  248. package/src/subscriptions/opencode-go-scraper.ts +301 -0
  249. package/src/subscriptions/types.ts +145 -0
  250. package/src/subscriptions/usage-service.test.ts +202 -0
  251. package/src/subscriptions/usage-service.ts +651 -0
  252. package/src/tools/ast-grep/cli.ts +257 -0
  253. package/src/tools/ast-grep/constants.ts +214 -0
  254. package/src/tools/ast-grep/downloader.ts +131 -0
  255. package/src/tools/ast-grep/index.ts +24 -0
  256. package/src/tools/ast-grep/tools.ts +117 -0
  257. package/src/tools/ast-grep/types.ts +51 -0
  258. package/src/tools/ast-grep/utils.ts +126 -0
  259. package/src/tools/delegate-handoff.test.ts +18 -0
  260. package/src/tools/delegate.ts +508 -0
  261. package/src/tools/index.ts +8 -0
  262. package/src/tools/preset-manager.test.ts +795 -0
  263. package/src/tools/preset-manager.ts +332 -0
  264. package/src/tools/smartfetch/binary.ts +58 -0
  265. package/src/tools/smartfetch/cache.test.ts +34 -0
  266. package/src/tools/smartfetch/cache.ts +112 -0
  267. package/src/tools/smartfetch/constants.ts +29 -0
  268. package/src/tools/smartfetch/index.ts +8 -0
  269. package/src/tools/smartfetch/network.test.ts +178 -0
  270. package/src/tools/smartfetch/network.ts +614 -0
  271. package/src/tools/smartfetch/secondary-model.test.ts +85 -0
  272. package/src/tools/smartfetch/secondary-model.ts +276 -0
  273. package/src/tools/smartfetch/tool.test.ts +60 -0
  274. package/src/tools/smartfetch/tool.ts +832 -0
  275. package/src/tools/smartfetch/types.ts +135 -0
  276. package/src/tools/smartfetch/utils.test.ts +24 -0
  277. package/src/tools/smartfetch/utils.ts +456 -0
  278. package/src/tui-state.test.ts +867 -0
  279. package/src/tui-state.ts +1255 -0
  280. package/src/tui.test.ts +336 -0
  281. package/src/tui.ts +1539 -0
  282. package/src/utils/agent-variant.test.ts +244 -0
  283. package/src/utils/agent-variant.ts +187 -0
  284. package/src/utils/compat.ts +91 -0
  285. package/src/utils/env.ts +12 -0
  286. package/src/utils/index.ts +9 -0
  287. package/src/utils/internal-initiator.ts +28 -0
  288. package/src/utils/logger.test.ts +220 -0
  289. package/src/utils/logger.ts +136 -0
  290. package/src/utils/polling.test.ts +191 -0
  291. package/src/utils/polling.ts +67 -0
  292. package/src/utils/session-manager.test.ts +173 -0
  293. package/src/utils/session-manager.ts +356 -0
  294. package/src/utils/session.test.ts +110 -0
  295. package/src/utils/session.ts +389 -0
  296. package/src/utils/subagent-depth.test.ts +170 -0
  297. package/src/utils/subagent-depth.ts +75 -0
  298. package/src/utils/system-collapse.test.ts +86 -0
  299. package/src/utils/system-collapse.ts +24 -0
  300. package/src/utils/task.test.ts +24 -0
  301. package/src/utils/task.ts +20 -0
  302. package/src/utils/zip-extractor.ts +102 -0
@@ -0,0 +1,135 @@
1
+ export type SmartfetchOptions = {
2
+ binaryDir?: string;
3
+ };
4
+
5
+ export type SecondaryModel = {
6
+ providerID: string;
7
+ modelID: string;
8
+ };
9
+
10
+ export type RedirectStep = {
11
+ from: string;
12
+ to: string;
13
+ status: number;
14
+ };
15
+
16
+ export type CachedFetch = {
17
+ requestedUrl: string;
18
+ finalUrl: string;
19
+ statusCode: number;
20
+ contentType: string;
21
+ charset?: string;
22
+ etag?: string;
23
+ lastModified?: string;
24
+ contentLength?: number;
25
+ filename?: string;
26
+ canonicalUrl?: string;
27
+ headings?: string[];
28
+ title?: string;
29
+ rawContent: string;
30
+ markdown: string;
31
+ text: string;
32
+ html: string;
33
+ extractedMain: boolean;
34
+ usedLlmsTxt: boolean;
35
+ sourceKind: 'llms_txt' | 'html' | 'text';
36
+ upgradedToHttps: boolean;
37
+ redirectChain: RedirectStep[];
38
+ truncated: boolean;
39
+ wordCount: number;
40
+ qualitySignals?: string[];
41
+ llmsProbeError?: string;
42
+ llmsProbeTruncated?: boolean;
43
+ cacheRevalidated?: boolean;
44
+ upstreamStatusCode?: number;
45
+ cacheHit?: boolean;
46
+ decodedCharset?: string;
47
+ decodeFallback?: boolean;
48
+ decodeWarning?: string;
49
+ secondaryModelInputTruncated?: boolean;
50
+ secondaryModelInputChars?: number;
51
+ secondaryModelSourceChars?: number;
52
+ };
53
+
54
+ export type BinaryFetch = {
55
+ requestedUrl: string;
56
+ finalUrl: string;
57
+ statusCode: number;
58
+ contentType: string;
59
+ charset?: string;
60
+ etag?: string;
61
+ lastModified?: string;
62
+ contentLength?: number;
63
+ filename?: string;
64
+ canonicalUrl?: string;
65
+ redirectChain: RedirectStep[];
66
+ upgradedToHttps: boolean;
67
+ truncated: boolean;
68
+ binary: true;
69
+ binaryKind: 'image' | 'audio' | 'video' | 'pdf' | 'binary';
70
+ downloadLimitBytes?: number;
71
+ metadataOnly?: boolean;
72
+ data?: Uint8Array;
73
+ llmsProbeError?: string;
74
+ llmsProbeTruncated?: boolean;
75
+ cacheRevalidated?: boolean;
76
+ upstreamStatusCode?: number;
77
+ cacheHit?: boolean;
78
+ };
79
+
80
+ export type FetchResult = CachedFetch | BinaryFetch;
81
+
82
+ export type DecodedBody = {
83
+ text: string;
84
+ decodedCharset: string;
85
+ decodeFallback: boolean;
86
+ decodeWarning?: string;
87
+ };
88
+
89
+ export type ExtractedContent = {
90
+ title?: string;
91
+ rawContent: string;
92
+ markdown: string;
93
+ text: string;
94
+ html: string;
95
+ extractedMain: boolean;
96
+ canonicalUrl?: string;
97
+ headings?: string[];
98
+ };
99
+
100
+ export type FetchWithRedirectsResult =
101
+ | {
102
+ blockedRedirect: true;
103
+ redirectUrl: string;
104
+ statusCode: number;
105
+ redirectChain: RedirectStep[];
106
+ }
107
+ | {
108
+ response: Response;
109
+ finalUrl: string;
110
+ redirectChain: RedirectStep[];
111
+ };
112
+
113
+ export type LlmsProbeResult =
114
+ | {
115
+ url: string;
116
+ statusCode: number;
117
+ redirectChain: RedirectStep[];
118
+ text: string;
119
+ headers: {
120
+ contentType?: string;
121
+ charset?: string;
122
+ etag?: string;
123
+ lastModified?: string;
124
+ contentLength?: number;
125
+ filename?: string;
126
+ };
127
+ truncated: boolean;
128
+ decodedCharset: string;
129
+ decodeFallback: boolean;
130
+ decodeWarning?: string;
131
+ upgradedToHttps: boolean;
132
+ }
133
+ | {
134
+ error?: string;
135
+ };
@@ -0,0 +1,24 @@
1
+ import { describe, expect, test } from 'bun:test';
2
+ import { extractHeadingsFromMarkdown, joinRenderedContent } from './utils';
3
+
4
+ describe('smartfetch/utils', () => {
5
+ test('extracts cleaned headings from markdown', () => {
6
+ const headings = extractHeadingsFromMarkdown(
7
+ ['# Intro', '## Details ###', '### C#', 'plain text'].join('\n'),
8
+ );
9
+
10
+ expect(headings).toEqual(['Intro', 'Details', 'C#']);
11
+ });
12
+
13
+ test('injects metadata comments after an XML declaration in html output', () => {
14
+ const result = joinRenderedContent(
15
+ '---\nsource: "smartfetch"\n---\n\n',
16
+ '<?xml version="1.0"?><root>ok</root>',
17
+ 'html',
18
+ );
19
+
20
+ expect(result).toStartWith('<?xml version="1.0"?>');
21
+ expect(result).toContain('<!--\n---\nsource: "smartfetch"\n---\n-->');
22
+ expect(result).toContain('<root>ok</root>');
23
+ });
24
+ });
@@ -0,0 +1,456 @@
1
+ import { Readability } from '@mozilla/readability';
2
+ import TurndownService from 'turndown';
3
+ import type { CachedFetch, ExtractedContent } from './types';
4
+
5
+ let jsdomPromise: Promise<typeof import('jsdom')> | undefined;
6
+
7
+ async function getJSDOM() {
8
+ jsdomPromise ??= import('jsdom');
9
+ const { JSDOM } = await jsdomPromise;
10
+ return JSDOM;
11
+ }
12
+
13
+ export function wordCount(text: string) {
14
+ const trimmed = text.trim();
15
+ if (!trimmed) return 0;
16
+ return trimmed.split(/\s+/).length;
17
+ }
18
+
19
+ function byteLength(text: string) {
20
+ return Buffer.byteLength(text || '', 'utf8');
21
+ }
22
+
23
+ function quote(value: unknown) {
24
+ return JSON.stringify(value ?? '');
25
+ }
26
+
27
+ export function frontmatter(metadata: Record<string, unknown>) {
28
+ const lines = ['---'];
29
+ for (const [key, value] of Object.entries(metadata)) {
30
+ if (value === undefined) continue;
31
+ if (Array.isArray(value)) {
32
+ if (value.length === 0) {
33
+ lines.push(`${key}: []`);
34
+ continue;
35
+ }
36
+ lines.push(`${key}:`);
37
+ for (const item of value) lines.push(` - ${quote(item)}`);
38
+ continue;
39
+ }
40
+ lines.push(`${key}: ${quote(value)}`);
41
+ }
42
+ lines.push('---', '', '');
43
+ return lines.join('\n');
44
+ }
45
+
46
+ export function trimBlankRuns(input: string) {
47
+ return input.replace(/\n{3,}/g, '\n\n').trim();
48
+ }
49
+
50
+ function cleanExtractedText(input: string) {
51
+ return trimBlankRuns(input);
52
+ }
53
+
54
+ function mapOutsideCodeBlocks(
55
+ input: string,
56
+ transform: (value: string) => string,
57
+ ) {
58
+ const parts = input.split(/(```[\s\S]*?```|~~~[\s\S]*?~~~)/g);
59
+ return parts
60
+ .map((part, index) => (index % 2 === 1 ? part : transform(part)))
61
+ .join('');
62
+ }
63
+
64
+ function extractStructuredText(root: Element | null) {
65
+ if (!root) return '';
66
+ const chunks: string[] = [];
67
+ const ignoredTags = new Set(['SCRIPT', 'STYLE', 'NOSCRIPT', 'TEMPLATE']);
68
+ const blockTags = new Set([
69
+ 'ARTICLE',
70
+ 'ASIDE',
71
+ 'BLOCKQUOTE',
72
+ 'DIV',
73
+ 'DL',
74
+ 'DT',
75
+ 'DD',
76
+ 'FIGCAPTION',
77
+ 'FIGURE',
78
+ 'FOOTER',
79
+ 'FORM',
80
+ 'H1',
81
+ 'H2',
82
+ 'H3',
83
+ 'H4',
84
+ 'H5',
85
+ 'H6',
86
+ 'HEADER',
87
+ 'HR',
88
+ 'LI',
89
+ 'MAIN',
90
+ 'NAV',
91
+ 'OL',
92
+ 'P',
93
+ 'PRE',
94
+ 'SECTION',
95
+ 'TABLE',
96
+ 'TBODY',
97
+ 'TD',
98
+ 'TH',
99
+ 'THEAD',
100
+ 'TR',
101
+ 'UL',
102
+ ]);
103
+ const isText = (node: Node) => node.nodeType === node.TEXT_NODE;
104
+ const isElement = (node: Node) => node.nodeType === node.ELEMENT_NODE;
105
+ const pushText = (value: string) => {
106
+ const normalized = value.replace(/\s+/g, ' ');
107
+ if (!normalized.trim()) return;
108
+ const previous = chunks[chunks.length - 1];
109
+ if (!previous || /\n$| $/.test(previous)) {
110
+ chunks.push(normalized.trimStart());
111
+ } else {
112
+ chunks.push(normalized);
113
+ }
114
+ };
115
+ const pushBreak = (count = 1) => {
116
+ const wanted = '\n'.repeat(count);
117
+ const last = chunks[chunks.length - 1] || '';
118
+ const trailing = last.match(/\n+$/)?.[0].length || 0;
119
+ if (trailing >= count) return;
120
+ if (trailing > 0) {
121
+ chunks[chunks.length - 1] = last.replace(/\n+$/, '') + wanted;
122
+ return;
123
+ }
124
+ chunks.push(wanted);
125
+ };
126
+ const visit = (node: Node) => {
127
+ if (isText(node)) {
128
+ pushText(node.textContent || '');
129
+ return;
130
+ }
131
+ if (!isElement(node)) return;
132
+ const element = node as Element;
133
+ const tag = element.tagName;
134
+ if (ignoredTags.has(tag)) return;
135
+ if (tag === 'BR') {
136
+ pushBreak(1);
137
+ return;
138
+ }
139
+ if (tag === 'PRE') {
140
+ const text = trimBlankRuns(element.textContent || '');
141
+ if (!text) return;
142
+ pushBreak(2);
143
+ chunks.push(text);
144
+ pushBreak(2);
145
+ return;
146
+ }
147
+ const isBlock = blockTags.has(tag);
148
+ if (isBlock) pushBreak(tag === 'LI' ? 1 : 2);
149
+ if (tag === 'LI') chunks.push('- ');
150
+ for (const child of element.childNodes) visit(child);
151
+ if (isBlock) pushBreak(tag === 'LI' ? 1 : 2);
152
+ };
153
+ visit(root);
154
+ return cleanExtractedText(chunks.join(''));
155
+ }
156
+
157
+ export function cleanHeadingText(input: string) {
158
+ const normalized = trimBlankRuns(input).replace(/¶+$/g, '').trim();
159
+ if (/^(?:C|F)#$/.test(normalized)) return normalized;
160
+ if (/\s#+$/.test(normalized)) {
161
+ return normalized.replace(/\s#+$/g, '').trim();
162
+ }
163
+ return normalized;
164
+ }
165
+
166
+ export function cleanFetchedMarkdown(input: string) {
167
+ const output = mapOutsideCodeBlocks(input, (value) =>
168
+ value
169
+ .replace(/^\s*!\[[^\]]*\]\([^)]+\)\s*$/gm, 'Image omitted')
170
+ .replace(/(^|\n)Image(?=\n|$)/g, '$1Image omitted')
171
+ .replace(/^\s*(#{1,6})\s*\\?\['([^'\n]+)'\s*$/gm, '$1 $2')
172
+ .replace(/^\s*(#{1,6})\s*'([^'\n]+)'\]\s*$/gm, '$1 $2')
173
+ .replace(/^\s*(#{1,6})\s*'([^'\n]+)'\s*$/gm, '$1 $2')
174
+ .replace(/(#{1,6}[^\n]*?)\s*\[¶\]\(#.*?"Permanent link"\)\s*$/gm, '$1')
175
+ .replace(/\s+\(#[A-Za-z0-9_-]+\)\s*$/gm, ''),
176
+ );
177
+
178
+ return trimBlankRuns(output);
179
+ }
180
+
181
+ export function cleanFetchedText(input: string) {
182
+ return trimBlankRuns(input);
183
+ }
184
+
185
+ export function escapeHtml(input: string) {
186
+ return input
187
+ .replace(/&/g, '&amp;')
188
+ .replace(/</g, '&lt;')
189
+ .replace(/>/g, '&gt;')
190
+ .replace(/"/g, '&quot;')
191
+ .replace(/'/g, '&#39;');
192
+ }
193
+
194
+ export function withTruncationMarker(
195
+ content: string,
196
+ format: 'text' | 'markdown' | 'html',
197
+ truncated: boolean,
198
+ ) {
199
+ if (!truncated) return content;
200
+ if (format === 'html') return `${content}\n<!-- [..content truncated..] -->`;
201
+ return `${content}\n\n[..content truncated..]`;
202
+ }
203
+
204
+ export function joinRenderedContent(
205
+ metadata: string,
206
+ content: string,
207
+ format: 'text' | 'markdown' | 'html',
208
+ ) {
209
+ if (!metadata) return content;
210
+ if (!content) {
211
+ return format === 'html' ? `<!--\n${metadata.trim()}\n-->` : metadata;
212
+ }
213
+ if (format === 'html') {
214
+ const comment = `<!--\n${metadata.trim()}\n-->\n`;
215
+ const xmlDecl = content.match(/^\s*(<\?xml[\s\S]*?\?>\s*)/i);
216
+ if (xmlDecl) {
217
+ return `${xmlDecl[1]}${comment}${content.slice(xmlDecl[0].length)}`;
218
+ }
219
+ return `${comment}${content}`;
220
+ }
221
+ const startsWithFrontmatter = /^---(?:\r?\n|$)/.test(content);
222
+ if (!startsWithFrontmatter) return `${metadata}${content}`;
223
+ return `${metadata}Source content:\n\n${content}`;
224
+ }
225
+
226
+ export function renderMessageForFormat(
227
+ content: string,
228
+ format: 'text' | 'markdown' | 'html',
229
+ ) {
230
+ if (format === 'html') return `<pre>${escapeHtml(content)}</pre>`;
231
+ return content;
232
+ }
233
+
234
+ export function buildRedirectResultMessage(
235
+ originalUrl: string,
236
+ redirectUrl: string,
237
+ statusCode: number,
238
+ ) {
239
+ return [
240
+ 'Redirect was blocked by policy.',
241
+ `Original URL: ${originalUrl}`,
242
+ `Redirect URL: ${redirectUrl}`,
243
+ `Status: ${statusCode}`,
244
+ '',
245
+ 'Re-run webfetch with the redirect URL to continue.',
246
+ ].join('\n');
247
+ }
248
+
249
+ export function buildLlmsRequiredMessage(originalUrl: string, reason?: string) {
250
+ return [
251
+ 'Required llms.txt content was unavailable.',
252
+ `Original URL: ${originalUrl}`,
253
+ ...(reason ? [`Reason: ${reason}`] : []),
254
+ ].join('\n');
255
+ }
256
+
257
+ const turndown = new TurndownService({
258
+ headingStyle: 'atx',
259
+ bulletListMarker: '-',
260
+ codeBlockStyle: 'fenced',
261
+ });
262
+
263
+ turndown.remove(['script', 'style', 'noscript', 'meta', 'link']);
264
+ turndown.remove(
265
+ (node: unknown) =>
266
+ (node as Element).nodeName === 'A' &&
267
+ /permanent link/i.test((node as Element).getAttribute('title') || ''),
268
+ );
269
+ turndown.addRule('fenced-pre-code', {
270
+ filter(node: unknown) {
271
+ return (
272
+ (node as Element).nodeName === 'PRE' &&
273
+ !!(node as Element).querySelector('code')
274
+ );
275
+ },
276
+ replacement(_content: string, node: unknown) {
277
+ const code = (node as Element).querySelector('code');
278
+ const text = trimBlankRuns(
279
+ code?.textContent || (node as Element).textContent || '',
280
+ );
281
+ if (!text) return '';
282
+ return `\n\n\`\`\`\n${text}\n\`\`\`\n\n`;
283
+ },
284
+ });
285
+
286
+ export async function extractFromHtml(
287
+ html: string,
288
+ finalUrl: string,
289
+ extractMain: boolean,
290
+ ): Promise<ExtractedContent> {
291
+ const JSDOM = await getJSDOM();
292
+ const dom = new JSDOM(html, { url: finalUrl });
293
+ const document = dom.window.document;
294
+ const title = document.title || undefined;
295
+ const canonical =
296
+ document.querySelector('link[rel="canonical"]')?.getAttribute('href') ||
297
+ undefined;
298
+ const canonicalUrl = (() => {
299
+ if (!canonical) return undefined;
300
+ try {
301
+ return new URL(canonical, finalUrl).toString();
302
+ } catch {
303
+ return undefined;
304
+ }
305
+ })();
306
+ const headings = Array.from(
307
+ document.querySelectorAll<HTMLElement>('h1, h2, h3'),
308
+ )
309
+ .map((node) => cleanHeadingText(node.textContent || ''))
310
+ .filter(Boolean)
311
+ .slice(0, 12);
312
+
313
+ if (extractMain) {
314
+ const readerDom = new JSDOM(html, { url: finalUrl });
315
+ const article = new Readability(readerDom.window.document).parse();
316
+ if (article?.content?.trim()) {
317
+ const articleContainer = readerDom.window.document.createElement('div');
318
+ articleContainer.innerHTML = article.content;
319
+ const articleText = extractStructuredText(articleContainer);
320
+ const articleMarkdown = trimBlankRuns(turndown.turndown(article.content));
321
+ return {
322
+ title: article.title || title,
323
+ rawContent: html,
324
+ html: article.content,
325
+ text: articleText,
326
+ markdown: articleMarkdown,
327
+ extractedMain: true,
328
+ canonicalUrl,
329
+ headings,
330
+ };
331
+ }
332
+ }
333
+
334
+ const bodyHtml = document.body?.innerHTML || html;
335
+ const bodyText = extractStructuredText(document.body);
336
+ const markdown = trimBlankRuns(turndown.turndown(bodyHtml));
337
+ return {
338
+ title,
339
+ rawContent: html,
340
+ html: bodyHtml,
341
+ text: bodyText,
342
+ markdown,
343
+ extractedMain: false,
344
+ canonicalUrl,
345
+ headings,
346
+ };
347
+ }
348
+
349
+ function parseFrontmatterBlock(content: string) {
350
+ const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n?/);
351
+ if (!match) return undefined;
352
+ const result: Record<string, string> = {};
353
+ for (const line of match[1].split(/\r?\n/)) {
354
+ const kv = line.match(/^([A-Za-z0-9_-]+):\s*(.+?)\s*$/);
355
+ if (!kv) continue;
356
+ result[kv[1]] = kv[2].replace(/^(['"])(.*)\1$/, '$2');
357
+ }
358
+ return result;
359
+ }
360
+
361
+ export function inferCanonicalUrlFromText(content: string, finalUrl: string) {
362
+ const frontmatter = parseFrontmatterBlock(content);
363
+ const raw = frontmatter?.url;
364
+ if (!raw) return undefined;
365
+ try {
366
+ return new URL(raw, finalUrl).toString();
367
+ } catch {
368
+ return undefined;
369
+ }
370
+ }
371
+
372
+ export function extractHeadingsFromMarkdown(content: string) {
373
+ const headings = content
374
+ .split(/\r?\n/)
375
+ .filter((line) => /^#{1,6}\s+/.test(line))
376
+ .map((line) => cleanHeadingText(line.replace(/^#{1,6}\s+/, '')))
377
+ .filter(Boolean)
378
+ .slice(0, 12);
379
+ return headings.length ? headings : undefined;
380
+ }
381
+
382
+ export function detectQualitySignals(
383
+ fetchResult: Pick<
384
+ CachedFetch,
385
+ | 'text'
386
+ | 'markdown'
387
+ | 'rawContent'
388
+ | 'wordCount'
389
+ | 'sourceKind'
390
+ | 'extractedMain'
391
+ >,
392
+ ) {
393
+ const signals = new Set<string>();
394
+ const text = `${fetchResult.text}\n${fetchResult.markdown}`.toLowerCase();
395
+
396
+ if (fetchResult.wordCount > 0 && fetchResult.wordCount < 60) {
397
+ signals.add('very_short_content');
398
+ }
399
+
400
+ if (
401
+ /(subscribe to continue|subscription required|sign in to continue|log in to continue|create an account to continue|members only|premium content|paywall)/i.test(
402
+ text,
403
+ )
404
+ ) {
405
+ signals.add('possible_paywall');
406
+ }
407
+
408
+ if (fetchResult.sourceKind === 'html') {
409
+ const renderedBytes = Math.max(byteLength(fetchResult.text), 1);
410
+ const rawBytes = byteLength(fetchResult.rawContent);
411
+ const ratio = rawBytes / renderedBytes;
412
+ if (
413
+ !fetchResult.extractedMain &&
414
+ ratio >= 10 &&
415
+ fetchResult.wordCount < 1200
416
+ ) {
417
+ signals.add('high_boilerplate_ratio');
418
+ }
419
+ }
420
+
421
+ return [...signals];
422
+ }
423
+
424
+ export function pickContent(
425
+ fetchResult: CachedFetch,
426
+ format: 'text' | 'markdown' | 'html',
427
+ ) {
428
+ if (format === 'html') {
429
+ if (fetchResult.sourceKind === 'html') {
430
+ const htmlContent = fetchResult.extractedMain
431
+ ? fetchResult.html
432
+ : fetchResult.rawContent;
433
+ return withTruncationMarker(htmlContent, format, fetchResult.truncated);
434
+ }
435
+ return withTruncationMarker(
436
+ renderMessageForFormat(
437
+ fetchResult.text || fetchResult.rawContent,
438
+ format,
439
+ ),
440
+ format,
441
+ fetchResult.truncated,
442
+ );
443
+ }
444
+ if (format === 'text') {
445
+ return withTruncationMarker(
446
+ cleanFetchedText(fetchResult.text),
447
+ format,
448
+ fetchResult.truncated,
449
+ );
450
+ }
451
+ return withTruncationMarker(
452
+ cleanFetchedMarkdown(fetchResult.markdown),
453
+ format,
454
+ fetchResult.truncated,
455
+ );
456
+ }