@oh-my-pi/pi-coding-agent 15.9.67 → 15.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. package/CHANGELOG.md +136 -0
  2. package/dist/types/cli/args.d.ts +1 -1
  3. package/dist/types/cli/dry-balance-cli.d.ts +15 -1
  4. package/dist/types/cli/gallery-cli.d.ts +43 -0
  5. package/dist/types/cli/gallery-fixtures/agentic.d.ts +2 -0
  6. package/dist/types/cli/gallery-fixtures/codeintel.d.ts +3 -0
  7. package/dist/types/cli/gallery-fixtures/edit.d.ts +3 -0
  8. package/dist/types/cli/gallery-fixtures/fs.d.ts +2 -0
  9. package/dist/types/cli/gallery-fixtures/index.d.ts +4 -0
  10. package/dist/types/cli/gallery-fixtures/interaction.d.ts +3 -0
  11. package/dist/types/cli/gallery-fixtures/memory.d.ts +2 -0
  12. package/dist/types/cli/gallery-fixtures/misc.d.ts +3 -0
  13. package/dist/types/cli/gallery-fixtures/search.d.ts +3 -0
  14. package/dist/types/cli/gallery-fixtures/shell.d.ts +3 -0
  15. package/dist/types/cli/gallery-fixtures/types.d.ts +44 -0
  16. package/dist/types/cli/gallery-fixtures/web.d.ts +2 -0
  17. package/dist/types/cli/gallery-screenshot.d.ts +35 -0
  18. package/dist/types/commands/gallery.d.ts +47 -0
  19. package/dist/types/commit/analysis/conventional.d.ts +2 -2
  20. package/dist/types/commit/analysis/summary.d.ts +2 -2
  21. package/dist/types/commit/changelog/generate.d.ts +2 -2
  22. package/dist/types/commit/changelog/index.d.ts +2 -2
  23. package/dist/types/commit/map-reduce/index.d.ts +3 -3
  24. package/dist/types/commit/map-reduce/map-phase.d.ts +2 -2
  25. package/dist/types/commit/map-reduce/reduce-phase.d.ts +2 -2
  26. package/dist/types/commit/model-selection.d.ts +10 -4
  27. package/dist/types/config/api-key-resolver.d.ts +34 -0
  28. package/dist/types/config/keybindings.d.ts +6 -1
  29. package/dist/types/config/model-id-affixes.d.ts +2 -0
  30. package/dist/types/config/model-registry.d.ts +25 -2
  31. package/dist/types/config/settings-schema.d.ts +41 -6
  32. package/dist/types/dap/config.d.ts +14 -1
  33. package/dist/types/dap/types.d.ts +10 -0
  34. package/dist/types/extensibility/plugins/marketplace-auto-update.d.ts +8 -0
  35. package/dist/types/lsp/types.d.ts +10 -0
  36. package/dist/types/lsp/utils.d.ts +3 -2
  37. package/dist/types/main.d.ts +3 -2
  38. package/dist/types/memory-backend/index.d.ts +2 -1
  39. package/dist/types/memory-backend/resolve.d.ts +1 -1
  40. package/dist/types/memory-backend/types.d.ts +1 -1
  41. package/dist/types/modes/components/chat-block.d.ts +64 -0
  42. package/dist/types/modes/components/custom-editor.d.ts +5 -1
  43. package/dist/types/modes/components/overlay-box.d.ts +17 -0
  44. package/dist/types/modes/components/plan-review-overlay.d.ts +59 -0
  45. package/dist/types/modes/components/plan-toc.d.ts +41 -0
  46. package/dist/types/modes/components/read-tool-group.d.ts +2 -0
  47. package/dist/types/modes/components/tool-execution.d.ts +18 -0
  48. package/dist/types/modes/components/transcript-container.d.ts +11 -0
  49. package/dist/types/modes/controllers/command-controller.d.ts +1 -0
  50. package/dist/types/modes/controllers/event-controller.d.ts +0 -1
  51. package/dist/types/modes/controllers/extension-ui-controller.d.ts +0 -1
  52. package/dist/types/modes/controllers/input-controller.d.ts +1 -1
  53. package/dist/types/modes/controllers/selector-controller.d.ts +1 -1
  54. package/dist/types/modes/controllers/streaming-reveal.d.ts +22 -0
  55. package/dist/types/modes/controllers/tan-command-controller.d.ts +6 -0
  56. package/dist/types/modes/index.d.ts +5 -4
  57. package/dist/types/modes/interactive-mode.d.ts +16 -6
  58. package/dist/types/modes/setup-version.d.ts +11 -0
  59. package/dist/types/modes/setup-wizard/index.d.ts +2 -1
  60. package/dist/types/modes/setup-wizard/scenes/web-search.d.ts +2 -1
  61. package/dist/types/modes/theme/theme.d.ts +1 -1
  62. package/dist/types/modes/types.d.ts +19 -6
  63. package/dist/types/modes/utils/copy-targets.d.ts +21 -1
  64. package/dist/types/plan-mode/approved-plan.d.ts +27 -8
  65. package/dist/types/plan-mode/plan-protection.d.ts +4 -4
  66. package/dist/types/sdk.d.ts +3 -1
  67. package/dist/types/session/agent-session.d.ts +21 -0
  68. package/dist/types/session/messages.d.ts +12 -0
  69. package/dist/types/session/session-manager.d.ts +3 -1
  70. package/dist/types/slash-commands/types.d.ts +4 -6
  71. package/dist/types/task/executor.d.ts +14 -0
  72. package/dist/types/task/index.d.ts +1 -0
  73. package/dist/types/task/render.d.ts +3 -2
  74. package/dist/types/telemetry-export.d.ts +1 -1
  75. package/dist/types/tools/archive-reader.d.ts +5 -0
  76. package/dist/types/tools/ast-edit.d.ts +3 -0
  77. package/dist/types/tools/ast-grep.d.ts +3 -0
  78. package/dist/types/tools/bash.d.ts +1 -0
  79. package/dist/types/tools/eval-render.d.ts +1 -8
  80. package/dist/types/tools/fetch.d.ts +15 -7
  81. package/dist/types/tools/find.d.ts +8 -4
  82. package/dist/types/tools/grouped-file-output.d.ts +95 -12
  83. package/dist/types/tools/memory-render.d.ts +4 -1
  84. package/dist/types/tools/plan-mode-guard.d.ts +8 -9
  85. package/dist/types/tools/render-utils.d.ts +13 -9
  86. package/dist/types/tools/renderers.d.ts +16 -2
  87. package/dist/types/tools/search.d.ts +5 -1
  88. package/dist/types/tools/sqlite-reader.d.ts +1 -0
  89. package/dist/types/tools/todo.d.ts +3 -2
  90. package/dist/types/tools/write.d.ts +5 -0
  91. package/dist/types/tui/output-block.d.ts +16 -4
  92. package/dist/types/tui/status-line.d.ts +3 -0
  93. package/dist/types/utils/enhanced-paste.d.ts +20 -0
  94. package/dist/types/web/scrapers/github.d.ts +22 -0
  95. package/dist/types/web/search/providers/kimi.d.ts +1 -1
  96. package/dist/types/web/search/providers/perplexity.d.ts +8 -1
  97. package/dist/types/web/search/types.d.ts +1 -1
  98. package/package.json +9 -9
  99. package/scripts/dev-launch +42 -0
  100. package/scripts/dev-launch-preload.ts +19 -0
  101. package/src/auto-thinking/classifier.ts +5 -1
  102. package/src/cli/args.ts +2 -2
  103. package/src/cli/dry-balance-cli.ts +52 -17
  104. package/src/cli/gallery-cli.ts +226 -0
  105. package/src/cli/gallery-fixtures/agentic.ts +292 -0
  106. package/src/cli/gallery-fixtures/codeintel.ts +188 -0
  107. package/src/cli/gallery-fixtures/edit.ts +194 -0
  108. package/src/cli/gallery-fixtures/fs.ts +153 -0
  109. package/src/cli/gallery-fixtures/index.ts +40 -0
  110. package/src/cli/gallery-fixtures/interaction.ts +49 -0
  111. package/src/cli/gallery-fixtures/memory.ts +81 -0
  112. package/src/cli/gallery-fixtures/misc.ts +250 -0
  113. package/src/cli/gallery-fixtures/search.ts +213 -0
  114. package/src/cli/gallery-fixtures/shell.ts +167 -0
  115. package/src/cli/gallery-fixtures/types.ts +41 -0
  116. package/src/cli/gallery-fixtures/web.ts +158 -0
  117. package/src/cli/gallery-screenshot.ts +279 -0
  118. package/src/cli-commands.ts +1 -0
  119. package/src/commands/gallery.ts +52 -0
  120. package/src/commands/launch.ts +1 -1
  121. package/src/commit/analysis/conventional.ts +2 -2
  122. package/src/commit/analysis/summary.ts +2 -2
  123. package/src/commit/changelog/generate.ts +2 -2
  124. package/src/commit/changelog/index.ts +2 -2
  125. package/src/commit/map-reduce/index.ts +3 -3
  126. package/src/commit/map-reduce/map-phase.ts +2 -2
  127. package/src/commit/map-reduce/reduce-phase.ts +2 -2
  128. package/src/commit/model-selection.ts +33 -9
  129. package/src/commit/pipeline.ts +4 -4
  130. package/src/config/api-key-resolver.ts +58 -0
  131. package/src/config/keybindings.ts +15 -6
  132. package/src/config/model-equivalence.ts +35 -12
  133. package/src/config/model-id-affixes.ts +39 -22
  134. package/src/config/model-registry.ts +41 -18
  135. package/src/config/settings-schema.ts +28 -5
  136. package/src/config/settings.ts +31 -2
  137. package/src/dap/client.ts +14 -16
  138. package/src/dap/config.ts +41 -2
  139. package/src/dap/defaults.json +1 -0
  140. package/src/dap/session.ts +1 -0
  141. package/src/dap/types.ts +10 -0
  142. package/src/debug/index.ts +40 -54
  143. package/src/edit/renderer.ts +111 -119
  144. package/src/eval/__tests__/agent-bridge.test.ts +75 -32
  145. package/src/eval/__tests__/llm-bridge.test.ts +90 -31
  146. package/src/eval/agent-bridge.ts +34 -7
  147. package/src/eval/llm-bridge.ts +8 -3
  148. package/src/extensibility/extensions/runner.ts +1 -0
  149. package/src/extensibility/plugins/doctor.ts +0 -1
  150. package/src/extensibility/plugins/marketplace-auto-update.ts +49 -0
  151. package/src/goals/tools/goal-tool.ts +37 -27
  152. package/src/internal-urls/docs-index.generated.ts +10 -10
  153. package/src/lsp/client.ts +104 -55
  154. package/src/lsp/types.ts +10 -0
  155. package/src/lsp/utils.ts +3 -2
  156. package/src/main.ts +53 -56
  157. package/src/memories/index.ts +12 -5
  158. package/src/memory-backend/index.ts +13 -1
  159. package/src/memory-backend/resolve.ts +3 -5
  160. package/src/memory-backend/types.ts +1 -1
  161. package/src/mnemopi/backend.ts +5 -1
  162. package/src/modes/acp/acp-agent.ts +33 -26
  163. package/src/modes/components/assistant-message.ts +2 -9
  164. package/src/modes/components/chat-block.ts +111 -0
  165. package/src/modes/components/copy-selector.ts +1 -44
  166. package/src/modes/components/custom-editor.ts +33 -1
  167. package/src/modes/components/custom-message.ts +1 -3
  168. package/src/modes/components/execution-shared.ts +1 -2
  169. package/src/modes/components/hook-message.ts +1 -3
  170. package/src/modes/components/overlay-box.ts +108 -0
  171. package/src/modes/components/plan-review-overlay.ts +799 -0
  172. package/src/modes/components/plan-toc.ts +138 -0
  173. package/src/modes/components/read-tool-group.ts +20 -4
  174. package/src/modes/components/skill-message.ts +0 -1
  175. package/src/modes/components/status-line.ts +3 -5
  176. package/src/modes/components/tips.txt +1 -0
  177. package/src/modes/components/todo-reminder.ts +0 -2
  178. package/src/modes/components/tool-execution.ts +115 -90
  179. package/src/modes/components/transcript-container.ts +84 -24
  180. package/src/modes/components/user-message.ts +1 -2
  181. package/src/modes/controllers/command-controller-shared.ts +7 -6
  182. package/src/modes/controllers/command-controller.ts +70 -57
  183. package/src/modes/controllers/event-controller.ts +41 -40
  184. package/src/modes/controllers/extension-ui-controller.ts +10 -73
  185. package/src/modes/controllers/input-controller.ts +135 -122
  186. package/src/modes/controllers/mcp-command-controller.ts +69 -60
  187. package/src/modes/controllers/selector-controller.ts +25 -27
  188. package/src/modes/controllers/streaming-reveal.ts +212 -0
  189. package/src/modes/controllers/tan-command-controller.ts +173 -0
  190. package/src/modes/index.ts +5 -4
  191. package/src/modes/interactive-mode.ts +171 -82
  192. package/src/modes/setup-version.ts +11 -0
  193. package/src/modes/setup-wizard/index.ts +3 -2
  194. package/src/modes/setup-wizard/scenes/web-search.ts +3 -2
  195. package/src/modes/setup-wizard/wizard-overlay.ts +1 -1
  196. package/src/modes/theme/theme-schema.json +1 -1
  197. package/src/modes/theme/theme.ts +8 -4
  198. package/src/modes/types.ts +19 -8
  199. package/src/modes/utils/context-usage.ts +10 -6
  200. package/src/modes/utils/copy-targets.ts +133 -27
  201. package/src/modes/utils/hotkeys-markdown.ts +1 -0
  202. package/src/modes/utils/ui-helpers.ts +44 -46
  203. package/src/plan-mode/approved-plan.ts +66 -43
  204. package/src/plan-mode/plan-protection.ts +4 -4
  205. package/src/prompts/system/background-tan-dispatch.md +8 -0
  206. package/src/prompts/system/plan-mode-active.md +67 -58
  207. package/src/prompts/system/plan-mode-approved.md +1 -1
  208. package/src/sdk.ts +32 -60
  209. package/src/session/agent-session.ts +89 -13
  210. package/src/session/messages.ts +26 -0
  211. package/src/session/session-manager.ts +13 -5
  212. package/src/slash-commands/builtin-registry.ts +37 -10
  213. package/src/slash-commands/helpers/usage-report.ts +2 -0
  214. package/src/slash-commands/types.ts +4 -6
  215. package/src/task/executor.ts +25 -4
  216. package/src/task/index.ts +4 -0
  217. package/src/task/render.ts +212 -148
  218. package/src/telemetry-export.ts +25 -7
  219. package/src/tools/archive-reader.ts +64 -0
  220. package/src/tools/ask.ts +119 -164
  221. package/src/tools/ast-edit.ts +98 -71
  222. package/src/tools/ast-grep.ts +37 -43
  223. package/src/tools/bash.ts +50 -6
  224. package/src/tools/debug.ts +20 -8
  225. package/src/tools/eval-backends.ts +6 -17
  226. package/src/tools/eval-render.ts +21 -18
  227. package/src/tools/eval.ts +5 -4
  228. package/src/tools/fetch.ts +391 -91
  229. package/src/tools/find.ts +44 -30
  230. package/src/tools/gh-renderer.ts +81 -42
  231. package/src/tools/grouped-file-output.ts +272 -48
  232. package/src/tools/image-gen.ts +150 -103
  233. package/src/tools/inspect-image-renderer.ts +63 -41
  234. package/src/tools/inspect-image.ts +8 -1
  235. package/src/tools/job.ts +3 -4
  236. package/src/tools/memory-render.ts +4 -1
  237. package/src/tools/plan-mode-guard.ts +21 -39
  238. package/src/tools/read.ts +23 -16
  239. package/src/tools/render-utils.ts +38 -40
  240. package/src/tools/renderers.ts +16 -1
  241. package/src/tools/report-tool-issue.ts +1 -1
  242. package/src/tools/resolve.ts +14 -0
  243. package/src/tools/search-tool-bm25.ts +36 -23
  244. package/src/tools/search.ts +189 -95
  245. package/src/tools/sqlite-reader.ts +9 -12
  246. package/src/tools/todo.ts +138 -59
  247. package/src/tools/write.ts +100 -60
  248. package/src/tui/output-block.ts +60 -13
  249. package/src/tui/status-line.ts +5 -1
  250. package/src/utils/commit-message-generator.ts +9 -1
  251. package/src/utils/enhanced-paste.ts +202 -0
  252. package/src/utils/title-generator.ts +2 -1
  253. package/src/web/scrapers/github.ts +255 -3
  254. package/src/web/scrapers/youtube.ts +3 -2
  255. package/src/web/search/providers/anthropic.ts +25 -19
  256. package/src/web/search/providers/exa.ts +11 -3
  257. package/src/web/search/providers/kimi.ts +28 -17
  258. package/src/web/search/providers/parallel.ts +35 -24
  259. package/src/web/search/providers/perplexity.ts +199 -51
  260. package/src/web/search/providers/synthetic.ts +8 -6
  261. package/src/web/search/providers/tavily.ts +9 -8
  262. package/src/web/search/providers/zai.ts +8 -6
  263. package/src/web/search/render.ts +39 -54
  264. package/src/web/search/types.ts +5 -1
  265. package/dist/types/eval/__tests__/shared-executors.test.d.ts +0 -1
  266. package/src/eval/__tests__/shared-executors.test.ts +0 -609
@@ -1,4 +1,6 @@
1
+ import { Database } from "bun:sqlite";
1
2
  import * as fs from "node:fs/promises";
3
+ import * as os from "node:os";
2
4
  import * as path from "node:path";
3
5
  import type { AgentToolResult } from "@oh-my-pi/pi-agent-core";
4
6
  import type { ImageContent, TextContent } from "@oh-my-pi/pi-ai";
@@ -8,6 +10,7 @@ import { $which, ptree, truncate } from "@oh-my-pi/pi-utils";
8
10
  import { parseHTML } from "linkedom";
9
11
  import { LRUCache } from "lru-cache/raw";
10
12
  import type { Settings } from "../config/settings";
13
+ import { readEditableNotebookText } from "../edit/notebook";
11
14
  import type { RenderResultOptions } from "../extensibility/custom-tools/types";
12
15
  import { type Theme, theme } from "../modes/theme/theme";
13
16
  import type { ToolSession } from "../sdk";
@@ -22,10 +25,12 @@ import { specialHandlers } from "../web/scrapers";
22
25
  import type { RenderResult } from "../web/scrapers/types";
23
26
  import { finalizeOutput, loadPage, looksLikeHtml, MAX_OUTPUT_CHARS } from "../web/scrapers/types";
24
27
  import { convertWithMarkit, fetchBinary } from "../web/scrapers/utils";
28
+ import { type ArchiveFormat, listArchiveRoot, sniffArchiveFormat } from "./archive-reader";
25
29
  import { applyListLimit } from "./list-limit";
26
30
  import { formatStyledArtifactReference, type OutputMeta } from "./output-meta";
27
31
  import { type LineRange, parseLineRanges } from "./path-utils";
28
- import { formatExpandHint, getDomain, replaceTabs } from "./render-utils";
32
+ import { formatBytes, formatExpandHint, getDomain, replaceTabs } from "./render-utils";
33
+ import { listTables, looksLikeSqlite, renderTableList } from "./sqlite-reader";
29
34
  import { ToolAbortError, ToolError } from "./tool-errors";
30
35
  import { toolResult } from "./tool-result";
31
36
  import { clampTimeout } from "./tool-timeouts";
@@ -46,8 +51,6 @@ const CONVERTIBLE_MIMES = new Set([
46
51
  "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
47
52
  "application/rtf",
48
53
  "application/epub+zip",
49
- "application/x-ipynb+json",
50
- "application/zip",
51
54
  "image/png",
52
55
  "image/jpeg",
53
56
  "image/gif",
@@ -67,7 +70,6 @@ const CONVERTIBLE_EXTENSIONS = new Set([
67
70
  ".xlsx",
68
71
  ".rtf",
69
72
  ".epub",
70
- ".ipynb",
71
73
  ".png",
72
74
  ".jpg",
73
75
  ".jpeg",
@@ -78,6 +80,27 @@ const CONVERTIBLE_EXTENSIONS = new Set([
78
80
  ".ogg",
79
81
  ]);
80
82
 
83
+ const NOTEBOOK_MIMES = new Set(["application/x-ipynb+json"]);
84
+ const NOTEBOOK_EXTENSIONS = new Set([".ipynb"]);
85
+
86
+ const SQLITE_MIMES = new Set([
87
+ "application/vnd.sqlite3",
88
+ "application/x-sqlite3",
89
+ "application/sqlite3",
90
+ "application/sqlite",
91
+ ]);
92
+ const SQLITE_EXTENSIONS = new Set([".sqlite", ".sqlite3", ".db", ".db3"]);
93
+
94
+ const ARCHIVE_MIMES = new Set([
95
+ "application/zip",
96
+ "application/x-zip-compressed",
97
+ "application/x-tar",
98
+ "application/tar",
99
+ "application/gzip",
100
+ "application/x-gzip",
101
+ ]);
102
+ const ARCHIVE_EXTENSIONS = new Set([".zip", ".tar", ".tar.gz", ".tgz", ".gz"]);
103
+
81
104
  const IMAGE_MIME_BY_EXTENSION = new Map<string, string>([
82
105
  [".png", "image/png"],
83
106
  [".jpg", "image/jpeg"],
@@ -261,6 +284,12 @@ function normalizeMime(contentType: string): string {
261
284
  return contentType.split(";")[0].trim().toLowerCase();
262
285
  }
263
286
 
287
+ function getFilenameExtensionHint(filename: string): string {
288
+ const lower = filename.toLowerCase();
289
+ if (lower.endsWith(".tar.gz")) return ".tar.gz";
290
+ return path.extname(filename).toLowerCase();
291
+ }
292
+
264
293
  /**
265
294
  * Get extension from URL or Content-Disposition
266
295
  */
@@ -269,7 +298,7 @@ function getExtensionHint(url: string, contentDisposition?: string): string {
269
298
  if (contentDisposition) {
270
299
  const match = contentDisposition.match(/filename[*]?=["']?([^"';\n]+)/i);
271
300
  if (match) {
272
- const ext = path.extname(match[1]).toLowerCase();
301
+ const ext = getFilenameExtensionHint(match[1]);
273
302
  if (ext) return ext;
274
303
  }
275
304
  }
@@ -277,7 +306,7 @@ function getExtensionHint(url: string, contentDisposition?: string): string {
277
306
  // Fall back to URL path
278
307
  try {
279
308
  const pathname = new URL(url).pathname;
280
- const ext = path.extname(pathname).toLowerCase();
309
+ const ext = getFilenameExtensionHint(pathname);
281
310
  if (ext) return ext;
282
311
  } catch {}
283
312
 
@@ -581,14 +610,25 @@ function parseFeedToMarkdown(content: string, maxItems = 10): string {
581
610
  */
582
611
  const REMOTE_READER_MAX_MS = 10_000;
583
612
 
613
+ /** Reader backends for {@link renderHtmlToText}, in default priority order. */
614
+ export type FetchProvider = "native" | "trafilatura" | "lynx" | "parallel" | "jina";
615
+
616
+ const FETCH_PROVIDER_ORDER: readonly FetchProvider[] = ["native", "trafilatura", "lynx", "parallel", "jina"];
617
+
584
618
  /**
585
- * Render HTML to markdown using Parallel, jina, trafilatura, lynx, then the
586
- * in-process native converter. The overall `timeout` budget bounds the call,
587
- * but remote reader requests are additionally capped at `REMOTE_READER_MAX_MS`
588
- * so that a hung remote endpoint cannot prevent local fallbacks from running.
589
- * Only a real `userSignal` cancellation aborts the chain remote per-attempt
590
- * timeouts and the overall reader-mode timeout still allow later renderers
591
- * (especially the purely-local native converter) to be tried.
619
+ * Render HTML to markdown by trying reader backends in priority order: native
620
+ * (in-process), trafilatura, lynx, Parallel, then Jina. The `providers.fetch`
621
+ * setting picks the order `auto` uses the default above; any specific backend
622
+ * is tried first, then the remaining backends as fallbacks. Every backend's
623
+ * output must clear the same quality gate (>100 non-whitespace chars and not
624
+ * {@link isLowQualityOutput}) before it is accepted, otherwise the next backend
625
+ * is tried.
626
+ *
627
+ * The overall `timeout` budget bounds the whole call; remote backends (Parallel,
628
+ * Jina) are additionally capped at `REMOTE_READER_MAX_MS` so a hung endpoint
629
+ * cannot starve later renderers — especially the purely-local native converter,
630
+ * which always works on already-loaded HTML. Only a real `userSignal`
631
+ * cancellation aborts the chain (#1449).
592
632
  */
593
633
  export async function renderHtmlToText(
594
634
  url: string,
@@ -607,92 +647,74 @@ export async function renderHtmlToText(
607
647
  signal: overallSignal,
608
648
  };
609
649
  const remoteBudgetMs = Math.min(timeout * 1000, REMOTE_READER_MAX_MS);
610
-
611
- // Try Parallel extract first when credentials are configured
612
- if (settings.get("providers.parallelFetch") && findParallelApiKey(storage)) {
613
- try {
650
+ // Per-attempt budget for remote endpoints so one stall cannot consume the
651
+ // whole reader-mode budget and starve the local fallbacks.
652
+ const remoteSignal = () => ptree.combineSignals(userSignal, remoteBudgetMs);
653
+
654
+ const runners: Record<FetchProvider, () => Promise<string | null>> = {
655
+ // Purely local, no network/subprocess: still works on already-loaded HTML
656
+ // even after remote/subprocess attempts are aborted by the budget.
657
+ native: () => htmlToMarkdown(html, { cleanContent: true }),
658
+ trafilatura: async () => {
659
+ const trafilatura = await ensureTool("trafilatura", { signal: overallSignal, silent: true });
660
+ if (!trafilatura) return null;
661
+ const result = await ptree.exec([trafilatura, "-u", url, "--output-format", "markdown"], execOptions);
662
+ return result.ok ? result.stdout : null;
663
+ },
664
+ lynx: async () => {
665
+ if (!hasCommand("lynx")) return null;
666
+ const result = await ptree.exec(["lynx", "-dump", "-nolist", "-width", "250", url], execOptions);
667
+ return result.ok ? result.stdout : null;
668
+ },
669
+ parallel: async () => {
670
+ if (!findParallelApiKey(storage)) return null;
614
671
  const parallelResult = await extractWithParallel(
615
672
  [url],
616
- {
617
- objective: "Extract the main content",
618
- excerpts: true,
619
- fullContent: false,
620
- signal: ptree.combineSignals(userSignal, remoteBudgetMs),
621
- },
673
+ { objective: "Extract the main content", excerpts: true, fullContent: false, signal: remoteSignal() },
622
674
  storage,
623
675
  );
624
676
  const firstDocument = parallelResult.results[0];
625
- if (firstDocument) {
626
- const content = getParallelExtractContent(firstDocument);
627
- if (content.trim().length > 100 && !isLowQualityOutput(content)) {
628
- return { content, ok: true, method: "parallel" };
629
- }
630
- }
631
- } catch {
632
- // Parallel extract failed or stalled; honour real cancellation only.
633
- userSignal?.throwIfAborted();
634
- }
635
- }
636
-
637
- // Try jina reader API with its own sub-budget so a stall cannot starve
638
- // later fallbacks (#1449).
639
- try {
640
- const jinaUrl = `https://r.jina.ai/${url}`;
641
- const response = await fetch(jinaUrl, {
642
- headers: { Accept: "text/markdown" },
643
- signal: ptree.combineSignals(userSignal, remoteBudgetMs),
644
- });
645
- if (response.ok) {
646
- const content = await response.text();
647
- if (content.trim().length > 100 && !isLowQualityOutput(content)) {
648
- return { content, ok: true, method: "jina" };
649
- }
650
- }
651
- } catch {
652
- // Jina failed or stalled; honour real cancellation only.
653
- userSignal?.throwIfAborted();
654
- }
677
+ return firstDocument ? getParallelExtractContent(firstDocument) : null;
678
+ },
679
+ jina: async () => {
680
+ const response = await fetch(`https://r.jina.ai/${url}`, {
681
+ headers: { Accept: "text/markdown" },
682
+ signal: remoteSignal(),
683
+ });
684
+ return response.ok ? await response.text() : null;
685
+ },
686
+ };
655
687
 
656
- // Try trafilatura (auto-install via uv/pip)
657
- try {
658
- const trafilatura = await ensureTool("trafilatura", { signal: overallSignal, silent: true });
659
- if (trafilatura) {
660
- const result = await ptree.exec([trafilatura, "-u", url, "--output-format", "markdown"], execOptions);
661
- if (result.ok && result.stdout.trim().length > 100) {
662
- return { content: result.stdout, ok: true, method: "trafilatura" };
663
- }
664
- }
665
- } catch {
666
- // trafilatura unavailable or stalled; continue to next method.
688
+ const preference = settings.get("providers.fetch");
689
+ const order: readonly FetchProvider[] =
690
+ preference === "auto"
691
+ ? FETCH_PROVIDER_ORDER
692
+ : [preference, ...FETCH_PROVIDER_ORDER.filter(method => method !== preference)];
693
+
694
+ // Highest-priority output that is substantial but fails the low-quality gate.
695
+ // Surfaced (ok: true) only when no backend clears the gate, so the caller's
696
+ // targeted fallbacks (llms.txt / document extraction) still run and we beat
697
+ // returning the unrendered raw HTML.
698
+ let lowQuality: { content: string; method: FetchProvider } | null = null;
699
+
700
+ for (const method of order) {
701
+ // Honour real user cancellation between attempts; remote per-attempt and
702
+ // overall-budget timeouts still fall through to later (local) renderers.
667
703
  userSignal?.throwIfAborted();
668
- }
669
-
670
- // Try lynx (can't auto-install, system package)
671
- try {
672
- const lynx = hasCommand("lynx");
673
- if (lynx) {
674
- const result = await ptree.exec(["lynx", "-dump", "-nolist", "-width", "250", url], execOptions);
675
- if (result.ok) {
676
- return { content: result.stdout, ok: true, method: "lynx" };
704
+ try {
705
+ const content = await runners[method]();
706
+ if (!content || content.trim().length <= 100) continue;
707
+ if (!isLowQualityOutput(content)) {
708
+ return { content, ok: true, method };
677
709
  }
710
+ lowQuality ??= { content, method };
711
+ } catch {
712
+ userSignal?.throwIfAborted();
678
713
  }
679
- } catch {
680
- // lynx failed or stalled; continue to native converter.
681
- userSignal?.throwIfAborted();
682
714
  }
683
715
 
684
- // Fall back to native converter (purely local, no network/subprocess).
685
- // Always attempted: even if remote renderers and subprocesses were aborted
686
- // by the overall reader-mode timeout, this still works on already-loaded
687
- // HTML (#1449).
688
- try {
689
- const content = await htmlToMarkdown(html, { cleanContent: true });
690
- if (content.trim().length > 100 && !isLowQualityOutput(content)) {
691
- return { content, ok: true, method: "native" };
692
- }
693
- } catch {
694
- // Native converter failed; nothing else to try.
695
- userSignal?.throwIfAborted();
716
+ if (lowQuality) {
717
+ return { content: lowQuality.content, ok: true, method: lowQuality.method };
696
718
  }
697
719
  return { content: "", ok: false, method: "none" };
698
720
  }
@@ -745,6 +767,254 @@ type FetchRenderResult = RenderResult & {
745
767
  image?: FetchImagePayload;
746
768
  };
747
769
 
770
+ const BINARY_SAMPLE_CHARS = 4096;
771
+ const URL_ARCHIVE_LIST_LIMIT = 500;
772
+ const URL_SQLITE_LIST_LIMIT = 500;
773
+
774
+ function sampleLooksBinary(text: string): boolean {
775
+ const limit = Math.min(text.length, BINARY_SAMPLE_CHARS);
776
+ if (limit === 0) return false;
777
+
778
+ let replacementCount = 0;
779
+ for (let index = 0; index < limit; index++) {
780
+ const code = text.charCodeAt(index);
781
+ if (code === 0) return true;
782
+ if (code === 0xfffd) replacementCount++;
783
+ }
784
+
785
+ return replacementCount >= 3 && replacementCount / limit > 0.01;
786
+ }
787
+
788
+ function isNotebookHint(mime: string, extensionHint: string): boolean {
789
+ return NOTEBOOK_MIMES.has(mime) || NOTEBOOK_EXTENSIONS.has(extensionHint);
790
+ }
791
+
792
+ function isSqliteHint(mime: string, extensionHint: string): boolean {
793
+ return SQLITE_MIMES.has(mime) || SQLITE_EXTENSIONS.has(extensionHint);
794
+ }
795
+
796
+ function isArchiveHint(mime: string, extensionHint: string): boolean {
797
+ return ARCHIVE_MIMES.has(mime) || ARCHIVE_EXTENSIONS.has(extensionHint);
798
+ }
799
+
800
+ function getArchiveFormatHint(mime: string, extensionHint: string): ArchiveFormat | undefined {
801
+ if (extensionHint === ".zip" || mime === "application/zip" || mime === "application/x-zip-compressed") {
802
+ return "zip";
803
+ }
804
+ if (extensionHint === ".tar" || mime === "application/x-tar" || mime === "application/tar") {
805
+ return "tar";
806
+ }
807
+ if (
808
+ extensionHint === ".tar.gz" ||
809
+ extensionHint === ".tgz" ||
810
+ extensionHint === ".gz" ||
811
+ mime === "application/gzip" ||
812
+ mime === "application/x-gzip"
813
+ ) {
814
+ return "tar.gz";
815
+ }
816
+ return undefined;
817
+ }
818
+
819
+ function formatErrorMessage(error: unknown): string {
820
+ return error instanceof Error ? error.message : String(error);
821
+ }
822
+
823
+ function binaryContentType(mime: string): string {
824
+ return mime || "application/octet-stream";
825
+ }
826
+
827
+ function buildBinaryNotice(finalUrl: string, mime: string, byteLength?: number): string {
828
+ const size = byteLength === undefined ? "unknown size" : formatBytes(byteLength);
829
+ return `[Binary content: ${binaryContentType(mime)}, ${size}] ${finalUrl}`;
830
+ }
831
+
832
+ function buildBinaryPayloadResult(
833
+ url: string,
834
+ finalUrl: string,
835
+ mime: string,
836
+ method: string,
837
+ content: string,
838
+ fetchedAt: string,
839
+ notes: string[],
840
+ ): FetchRenderResult {
841
+ const output = finalizeOutput(content);
842
+ return {
843
+ url,
844
+ finalUrl,
845
+ contentType: binaryContentType(mime),
846
+ method,
847
+ content: output.content,
848
+ fetchedAt,
849
+ truncated: output.truncated,
850
+ notes,
851
+ };
852
+ }
853
+
854
+ async function withTempBinaryFile<T>(
855
+ prefix: string,
856
+ extension: string,
857
+ bytes: Uint8Array,
858
+ readTempFile: (tempPath: string) => Promise<T>,
859
+ ): Promise<T> {
860
+ const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), prefix));
861
+ const tempPath = path.join(tempDir, `payload${extension}`);
862
+ try {
863
+ await Bun.write(tempPath, bytes);
864
+ return await readTempFile(tempPath);
865
+ } finally {
866
+ await fs.rm(tempDir, { recursive: true, force: true });
867
+ }
868
+ }
869
+
870
+ async function renderNotebookPayload(bytes: Uint8Array, displayUrl: string): Promise<string> {
871
+ return withTempBinaryFile("omp-url-notebook-", ".ipynb", bytes, tempPath =>
872
+ readEditableNotebookText(tempPath, displayUrl),
873
+ );
874
+ }
875
+
876
+ async function renderSqlitePayload(bytes: Uint8Array): Promise<string> {
877
+ return withTempBinaryFile("omp-url-sqlite-", ".sqlite", bytes, async tempPath => {
878
+ let db: Database | null = null;
879
+ try {
880
+ db = new Database(tempPath, { readonly: true, strict: true });
881
+ db.run("PRAGMA busy_timeout = 3000");
882
+ const listLimit = applyListLimit(listTables(db), { limit: URL_SQLITE_LIST_LIMIT });
883
+ return renderTableList(listLimit.items);
884
+ } finally {
885
+ db?.close();
886
+ }
887
+ });
888
+ }
889
+
890
+ async function tryRenderBinaryPayload(
891
+ url: string,
892
+ finalUrl: string,
893
+ mime: string,
894
+ extHint: string,
895
+ rawContent: string,
896
+ timeout: number,
897
+ signal: AbortSignal | undefined,
898
+ fetchedAt: string,
899
+ notes: readonly string[],
900
+ ): Promise<FetchRenderResult | null> {
901
+ const hasNotebookHint = isNotebookHint(mime, extHint);
902
+ const hasSqliteHint = isSqliteHint(mime, extHint);
903
+ const hasArchiveHint = isArchiveHint(mime, extHint);
904
+ const rawLooksBinary = sampleLooksBinary(rawContent);
905
+ if (!hasNotebookHint && !hasSqliteHint && !hasArchiveHint && !rawLooksBinary) {
906
+ return null;
907
+ }
908
+
909
+ const resultNotes = [...notes];
910
+ const binary = await fetchBinary(finalUrl, timeout, signal);
911
+ if (!binary.ok) {
912
+ resultNotes.push(binary.error ? `Binary fetch failed: ${binary.error}` : "Binary fetch failed");
913
+ return buildBinaryPayloadResult(
914
+ url,
915
+ finalUrl,
916
+ mime,
917
+ "binary",
918
+ buildBinaryNotice(finalUrl, mime),
919
+ fetchedAt,
920
+ resultNotes,
921
+ );
922
+ }
923
+
924
+ const binaryExtHint = getExtensionHint(finalUrl, binary.contentDisposition) || extHint;
925
+ if (isNotebookHint(mime, binaryExtHint)) {
926
+ try {
927
+ return buildBinaryPayloadResult(
928
+ url,
929
+ finalUrl,
930
+ mime,
931
+ "notebook",
932
+ await renderNotebookPayload(binary.buffer, finalUrl),
933
+ fetchedAt,
934
+ resultNotes,
935
+ );
936
+ } catch (error) {
937
+ resultNotes.push(`Notebook rendering failed: ${formatErrorMessage(error)}`);
938
+ return buildBinaryPayloadResult(
939
+ url,
940
+ finalUrl,
941
+ mime,
942
+ "binary",
943
+ buildBinaryNotice(finalUrl, mime, binary.buffer.byteLength),
944
+ fetchedAt,
945
+ resultNotes,
946
+ );
947
+ }
948
+ }
949
+
950
+ if (isSqliteHint(mime, binaryExtHint) || looksLikeSqlite(binary.buffer)) {
951
+ try {
952
+ return buildBinaryPayloadResult(
953
+ url,
954
+ finalUrl,
955
+ mime,
956
+ "sqlite",
957
+ await renderSqlitePayload(binary.buffer),
958
+ fetchedAt,
959
+ resultNotes,
960
+ );
961
+ } catch (error) {
962
+ resultNotes.push(`SQLite rendering failed: ${formatErrorMessage(error)}`);
963
+ return buildBinaryPayloadResult(
964
+ url,
965
+ finalUrl,
966
+ mime,
967
+ "binary",
968
+ buildBinaryNotice(finalUrl, mime, binary.buffer.byteLength),
969
+ fetchedAt,
970
+ resultNotes,
971
+ );
972
+ }
973
+ }
974
+
975
+ const hintedArchiveFormat = getArchiveFormatHint(mime, binaryExtHint);
976
+ const shouldArchiveSniff = hintedArchiveFormat !== undefined || !isConvertible(mime, binaryExtHint);
977
+ const archiveFormat = hintedArchiveFormat ?? (shouldArchiveSniff ? sniffArchiveFormat(binary.buffer) : undefined);
978
+ if (archiveFormat) {
979
+ try {
980
+ return buildBinaryPayloadResult(
981
+ url,
982
+ finalUrl,
983
+ mime,
984
+ "archive",
985
+ await listArchiveRoot(binary.buffer, archiveFormat, { limit: URL_ARCHIVE_LIST_LIMIT }),
986
+ fetchedAt,
987
+ resultNotes,
988
+ );
989
+ } catch (error) {
990
+ resultNotes.push(`Archive rendering failed: ${formatErrorMessage(error)}`);
991
+ return buildBinaryPayloadResult(
992
+ url,
993
+ finalUrl,
994
+ mime,
995
+ "binary",
996
+ buildBinaryNotice(finalUrl, mime, binary.buffer.byteLength),
997
+ fetchedAt,
998
+ resultNotes,
999
+ );
1000
+ }
1001
+ }
1002
+
1003
+ if (rawLooksBinary) {
1004
+ return buildBinaryPayloadResult(
1005
+ url,
1006
+ finalUrl,
1007
+ mime,
1008
+ "binary",
1009
+ buildBinaryNotice(finalUrl, mime, binary.buffer.byteLength),
1010
+ fetchedAt,
1011
+ resultNotes,
1012
+ );
1013
+ }
1014
+
1015
+ return null;
1016
+ }
1017
+
748
1018
  // =============================================================================
749
1019
  // Unified Special Handler Dispatch
750
1020
  // =============================================================================
@@ -991,6 +1261,19 @@ async function renderUrl(
991
1261
  }
992
1262
  }
993
1263
 
1264
+ const binaryPayloadResult = await tryRenderBinaryPayload(
1265
+ url,
1266
+ finalUrl,
1267
+ mime,
1268
+ extHint,
1269
+ rawContent,
1270
+ timeout,
1271
+ signal,
1272
+ fetchedAt,
1273
+ notes,
1274
+ );
1275
+ if (binaryPayloadResult) return binaryPayloadResult;
1276
+
994
1277
  // Step 4: Handle non-HTML text content
995
1278
  const isHtml = mime.includes("html") || mime.includes("xhtml");
996
1279
  const isJson = mime.includes("json");
@@ -999,7 +1282,7 @@ async function renderUrl(
999
1282
  const isFeed = mime.includes("rss") || mime.includes("atom") || mime.includes("feed");
1000
1283
 
1001
1284
  // Raw mode skips every text-shaping branch below (JSON pretty-print, feed-to-markdown,
1002
- // HTML extraction) and returns the response body verbatim. The image/markit branches
1285
+ // HTML extraction) and returns the response body verbatim. Binary-oriented branches
1003
1286
  // above already ran because raw isn't useful for binary payloads.
1004
1287
  if (raw) {
1005
1288
  const output = finalizeOutput(rawContent);
@@ -1141,10 +1424,27 @@ async function renderUrl(
1141
1424
  throw new ToolAbortError();
1142
1425
  }
1143
1426
 
1144
- // 5E: Render HTML with lynx or html2text
1427
+ // 5E: Render HTML via the reader-backend chain (native/trafilatura/lynx/parallel/jina)
1145
1428
  const htmlResult = await renderHtmlToText(finalUrl, rawContent, timeout, settings, signal, storage);
1146
1429
  if (!htmlResult.ok) {
1147
- notes.push("html rendering failed (lynx/html2text unavailable)");
1430
+ notes.push("html rendering failed (no reader backend produced usable output)");
1431
+
1432
+ const llmResult = await tryLlmEndpoints(finalUrl, timeout, signal);
1433
+ if (llmResult) {
1434
+ notes.push(`Used llms.txt fallback: ${llmResult.endpoint}`);
1435
+ const output = finalizeOutput(llmResult.content);
1436
+ return {
1437
+ url,
1438
+ finalUrl,
1439
+ contentType: "text/plain",
1440
+ method: "llms.txt",
1441
+ content: output.content,
1442
+ fetchedAt,
1443
+ truncated: output.truncated,
1444
+ notes,
1445
+ };
1446
+ }
1447
+
1148
1448
  const output = finalizeOutput(rawContent);
1149
1449
  return {
1150
1450
  url,