botholomew 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "botholomew",
3
- "version": "0.14.0",
3
+ "version": "0.15.0",
4
4
  "description": "An autonomous AI agent for knowledge work — works your task queue while you sleep.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -180,6 +180,10 @@ export async function endChatSession(session: ChatSession): Promise<void> {
180
180
  export async function clearChatSession(
181
181
  session: ChatSession,
182
182
  ): Promise<{ previousThreadId: string; newThreadId: string }> {
183
+ // Abort any in-flight stream up front so its callbacks don't continue to
184
+ // fire into the new thread (caused #190 — old messages reappearing on the
185
+ // next user submission).
186
+ abortActiveStream(session);
183
187
  const previousThreadId = session.threadId;
184
188
  await endThread(session.projectDir, previousThreadId);
185
189
  const newThreadId = await createThread(
@@ -5,13 +5,21 @@ export function registerChatCommand(program: Command) {
5
5
  .command("chat")
6
6
  .description(
7
7
  "Open the interactive chat TUI\n\n" +
8
- " Keyboard shortcuts:\n" +
8
+ " Tab navigation (Ctrl+<letter> from any tab):\n" +
9
+ " Ctrl+a Chat Ctrl+t Tasks Ctrl+w Workers\n" +
10
+ " Ctrl+o Tools Ctrl+r Threads ? Help (non-chat)\n" +
11
+ " Ctrl+n Context Ctrl+s Schedules Esc Return to Chat\n\n" +
12
+ " Chat input:\n" +
9
13
  " Enter Send message\n" +
10
- " ⌥+Enter Insert newline (multiline input)\n" +
11
- " ↑/↓ Browse input history\n\n" +
12
- " Commands:\n" +
13
- " /help Show keyboard shortcuts\n" +
14
- " /tools Open tool call inspector\n" +
14
+ " ⌥+Enter Insert newline\n" +
15
+ " ↑/↓ Browse input history\n" +
16
+ " Esc Steer / abort an in-flight turn\n" +
17
+ " Ctrl+J/K Navigate queued messages\n" +
18
+ " Ctrl+E/X Edit / remove the selected queued message\n\n" +
19
+ " Slash commands:\n" +
20
+ " /help Show chat-command reference (Help tab has the full keymap)\n" +
21
+ " /skills List available skills\n" +
22
+ " /clear End current thread and start a new one\n" +
15
23
  " /exit End the chat session",
16
24
  )
17
25
  .option("--thread-id <id>", "Resume an existing chat thread")
@@ -18,6 +18,10 @@ import { withDb } from "../db/connection.ts";
18
18
  import { indexStats } from "../db/embeddings.ts";
19
19
  import { migrate } from "../db/schema.ts";
20
20
  import { createMcpxClient } from "../mcpx/client.ts";
21
+ import {
22
+ type ContextFileMeta,
23
+ serializeContextFile,
24
+ } from "../utils/frontmatter.ts";
21
25
  import { logger } from "../utils/logger.ts";
22
26
 
23
27
  export function registerContextCommand(program: Command) {
@@ -46,21 +50,42 @@ export function registerContextCommand(program: Command) {
46
50
  const dir = program.opts().dir;
47
51
  const config = await loadConfig(dir);
48
52
  const mcpxClient = await createMcpxClient(dir);
49
- const spinner = createSpinner(`fetching ${url}`).start();
53
+ logger.info(`importing ${url}`);
50
54
  try {
51
55
  const fetched = await fetchUrl(url, config, mcpxClient, opts.prompt);
52
- spinner.update({ text: "writing to context/" });
53
56
  const dest = opts.path ?? deriveContextPath(url, fetched.source);
54
- await writeContextFile(dir, dest, fetched.content, {
57
+ const meta: ContextFileMeta = {
58
+ source_url: url,
59
+ imported_at: new Date().toISOString(),
60
+ };
61
+ // Title falls back to the URL when fetcher couldn't extract one —
62
+ // skip it in that case to avoid duplicating source_url.
63
+ if (fetched.title && fetched.title !== url) {
64
+ meta.title = fetched.title;
65
+ }
66
+ const body = serializeContextFile(meta, fetched.content);
67
+ await writeContextFile(dir, dest, body, {
55
68
  onConflict: opts.overwrite ? "overwrite" : "error",
56
69
  });
57
- spinner.success({
58
- text: `imported ${fetched.content.length} bytes → ${ansis.bold(`context/${dest}`)} (source: ${fetched.source ?? "http"})`,
70
+ logger.success(
71
+ `imported ${body.length} bytes → ${ansis.bold(`context/${dest}`)} (source: ${fetched.source ?? "http"})`,
72
+ );
73
+
74
+ // Reindex so the new file is searchable. reindexContext is
75
+ // incremental — files whose content_hash matches the index are
76
+ // skipped, so this only embeds the file we just wrote.
77
+ const dbPath = getDbPath(dir);
78
+ await withDb(dbPath, migrate);
79
+ const summary = await reindexContext(dir, config, dbPath, {
80
+ onProgress: (msg) => logger.dim(` ${msg}`),
59
81
  });
82
+ logger.success(
83
+ `indexed: ${summary.added} added, ${summary.updated} updated, ${summary.unchanged} unchanged, ${summary.chunksWritten} chunks written`,
84
+ );
60
85
  } catch (err) {
61
- spinner.error({
62
- text: `import failed: ${err instanceof Error ? err.message : String(err)}`,
63
- });
86
+ logger.error(
87
+ `import failed: ${err instanceof Error ? err.message : String(err)}`,
88
+ );
64
89
  process.exit(1);
65
90
  } finally {
66
91
  await mcpxClient?.close();
@@ -0,0 +1,8 @@
1
+ export class FetchFailureError extends Error {
2
+ readonly userMessage: string;
3
+ constructor(message: string) {
4
+ super(message);
5
+ this.name = "FetchFailureError";
6
+ this.userMessage = message;
7
+ }
8
+ }
@@ -15,8 +15,16 @@ import { mcpSearchTool } from "../tools/mcp/search.ts";
15
15
  import type { ToolContext } from "../tools/tool.ts";
16
16
  import { type AnyToolDefinition, toAnthropicTool } from "../tools/tool.ts";
17
17
  import { logger } from "../utils/logger.ts";
18
+ import { FetchFailureError } from "./fetcher-errors.ts";
19
+ import {
20
+ convertToMarkdown,
21
+ isMarkdownMimeType,
22
+ resolveEffectiveMimeType,
23
+ } from "./markdown-converter.ts";
18
24
  import { stripHtmlTags } from "./url-utils.ts";
19
25
 
26
+ export { FetchFailureError } from "./fetcher-errors.ts";
27
+
20
28
  const MAX_CONTENT_BYTES = 500_000;
21
29
  const MAX_TURNS = 10;
22
30
  const MAX_RESPONSE_TOKENS = 4_096;
@@ -36,29 +44,23 @@ export interface FetchedContent {
36
44
  source: string | null;
37
45
  }
38
46
 
39
- export class FetchFailureError extends Error {
40
- readonly userMessage: string;
41
- constructor(message: string) {
42
- super(message);
43
- this.name = "FetchFailureError";
44
- this.userMessage = message;
45
- }
46
- }
47
-
48
47
  const FETCHER_SYSTEM_PROMPT = `You are a content fetcher. Your job is to find the right MCP tool to retrieve the content at the given URL, run it, and tell the harness which result to save.
49
48
 
50
49
  **Important: the harness captures the full result of every mcp_exec call automatically.** You only see a short preview of each result so you can verify it looks reasonable. You do NOT need to read or copy the full content — you just identify which exec call to save.
51
50
 
52
- Strongly prefer markdown output. Most MCP tools support a markdown/format parameter — use it when available.
51
+ **Format preference: markdown, in order of preference.**
52
+ 1. When searching with mcp_search or mcp_list_tools, prefer tools whose names indicate markdown output: anything containing "markdown", "md", "AsMarkdown", "AsMd", "AsDocmd", or similar. For example, prefer "GoogleDocs_GetDocumentAsDocmd" over "GoogleDocs_GetDocumentAsHtml".
53
+ 2. If no markdown-named variant exists, use mcp_info to inspect the tool's input schema for a "format", "mime_type", "output_format", or similar parameter and request "markdown" (or "md") when available.
54
+ 3. If neither is possible, run the tool anyway. The harness will convert the captured content to markdown via a separate LLM call before saving — markdown-native tools are still preferred because they're cheaper and higher fidelity, but you do not have to find one.
53
55
 
54
56
  Workflow:
55
- 1. Use mcp_search or mcp_list_tools to find the best tool for this URL (e.g., Google Docs tools for docs.google.com, Firecrawl for generic web pages, GitHub tools for github.com).
57
+ 1. Use mcp_search or mcp_list_tools to find the best tool for this URL (e.g., Google Docs tools for docs.google.com, Firecrawl for generic web pages, GitHub tools for github.com). Apply the format preference above.
56
58
  2. Use mcp_info to inspect the tool's input schema.
57
59
  3. Call mcp_exec with the right arguments — request markdown format when supported.
58
- 4. Look at the preview returned by mcp_exec. If it looks like the right content, call accept_content with the exec_call_id (the tool_use_id of the mcp_exec call) and a sensible title.
60
+ 4. Look at the preview returned by mcp_exec. If it looks like the right content, call accept_content with the exec_call_id (the tool_use_id of the mcp_exec call), a sensible title, and the actual mime_type the tool returned (so the harness knows whether to convert).
59
61
 
60
62
  Terminal tools:
61
- - accept_content(exec_call_id, title, mime_type?) — save the full content captured from a previous mcp_exec call. The harness has the full content; you just supply the id, title, and optional mime_type (defaults to text/markdown).
63
+ - accept_content(exec_call_id, title, mime_type?) — save the content captured from a previous mcp_exec call. The harness has the full content; you supply the id, title, and the source mime_type (e.g., "text/html", "application/json", "text/markdown"). The harness converts to markdown before storage when needed.
62
64
  - request_http_fallback() — fall back to a basic HTTP fetch. Use only when no MCP tool can handle the URL after a genuine attempt. Tools like Firecrawl can handle most URLs, so don't give up on the first try.
63
65
  - report_failure(message) — surface an actionable message to the user (e.g., "this Google Doc is private — share it with your service account", "Firecrawl is not authenticated"). Use only when there is a specific next step the user must take.`;
64
66
 
@@ -147,14 +149,14 @@ export async function fetchUrl(
147
149
 
148
150
  if (!mcpxClient) {
149
151
  logger.dim(" no MCPX client — using HTTP fallback");
150
- return httpFallback(url);
152
+ return httpFallback(url, config);
151
153
  }
152
154
 
153
155
  const result = await runFetcherLoop(url, config, mcpxClient, promptAddition);
154
156
  if (result) return result;
155
157
 
156
158
  logger.dim(" agent signaled fallback — using HTTP");
157
- return httpFallback(url);
159
+ return httpFallback(url, config);
158
160
  }
159
161
 
160
162
  async function runFetcherLoop(
@@ -292,14 +294,26 @@ async function runFetcherLoop(
292
294
  });
293
295
  continue;
294
296
  }
295
- const mimeType = input.mime_type || cached.mimeType;
297
+ const claimedMimeType = input.mime_type || cached.mimeType;
296
298
  logger.dim(
297
- ` turn ${turn + 1}: accept_content: "${input.title}" (${cached.content.length} chars, ${mimeType}, from ${cached.server}/${cached.tool})`,
299
+ ` turn ${turn + 1}: accept_content: "${input.title}" (${cached.content.length} chars, claimed ${claimedMimeType}, from ${cached.server}/${cached.tool})`,
300
+ );
301
+ const truncated = cached.content.slice(0, MAX_CONTENT_BYTES);
302
+ // Always normalize via the converter. MCP tools frequently mislabel
303
+ // format — e.g. Google Docs' "Docmd" tool claims text/markdown but
304
+ // returns a structured `[H1 ...]` annotation format. The converter
305
+ // prompt handles already-clean markdown by echoing it unchanged.
306
+ logger.dim(` normalizing → markdown`);
307
+ const finalContent = await convertToMarkdown(
308
+ truncated,
309
+ claimedMimeType,
310
+ url,
311
+ config,
298
312
  );
299
313
  return {
300
314
  title: input.title,
301
- content: cached.content.slice(0, MAX_CONTENT_BYTES),
302
- mimeType,
315
+ content: finalContent,
316
+ mimeType: "text/markdown",
303
317
  sourceUrl: url,
304
318
  source: cached.server,
305
319
  };
@@ -405,7 +419,10 @@ async function runFetcherLoop(
405
419
  return null;
406
420
  }
407
421
 
408
- export async function httpFallback(url: string): Promise<FetchedContent> {
422
+ export async function httpFallback(
423
+ url: string,
424
+ config: Required<BotholomewConfig> | null = null,
425
+ ): Promise<FetchedContent> {
409
426
  const response = await fetch(url, {
410
427
  headers: { "User-Agent": "Botholomew/1.0" },
411
428
  signal: AbortSignal.timeout(HTTP_TIMEOUT_MS),
@@ -416,7 +433,8 @@ export async function httpFallback(url: string): Promise<FetchedContent> {
416
433
  }
417
434
 
418
435
  const contentType = response.headers.get("content-type") || "";
419
- const isHtml = contentType.includes("text/html");
436
+ const baseMimeType = contentType.split(";")[0]?.trim() || "text/plain";
437
+ const isHtml = baseMimeType === "text/html";
420
438
  let text = await response.text();
421
439
 
422
440
  let title = url;
@@ -425,21 +443,72 @@ export async function httpFallback(url: string): Promise<FetchedContent> {
425
443
  if (titleMatch?.[1]) {
426
444
  title = titleMatch[1].trim();
427
445
  }
428
- text = stripHtmlTags(text);
429
446
  }
430
447
 
431
448
  if (text.length > MAX_CONTENT_BYTES) {
432
449
  text = text.slice(0, MAX_CONTENT_BYTES);
433
450
  }
434
451
 
435
- const mimeType = isHtml
436
- ? "text/markdown"
437
- : contentType.split(";")[0] || "text/plain";
452
+ // No API key: we can't honestly produce markdown. Strip HTML tags so the
453
+ // saved file is at least readable, and label it text/plain so downstream
454
+ // consumers know it isn't real markdown. Other content types pass through.
455
+ if (!config?.anthropic_api_key) {
456
+ if (isHtml) {
457
+ return {
458
+ title,
459
+ content: stripHtmlTags(text),
460
+ mimeType: "text/plain",
461
+ sourceUrl: url,
462
+ source: null,
463
+ };
464
+ }
465
+ return {
466
+ title,
467
+ content: text,
468
+ mimeType: baseMimeType,
469
+ sourceUrl: url,
470
+ source: null,
471
+ };
472
+ }
473
+
474
+ // With an API key: convert anything non-text/non-markdown to markdown.
475
+ // Plain text short-circuits to avoid burning a conversion call on what's
476
+ // probably already a readable README/log/etc. text/markdown short-circuits
477
+ // too — but only after verifying the body actually looks like markdown.
478
+ // Some servers mislabel HTML as text/markdown.
479
+ const { mimeType: effectiveMimeType, sniffed } = resolveEffectiveMimeType(
480
+ baseMimeType,
481
+ text,
482
+ );
483
+ if (sniffed) {
484
+ logger.warn(
485
+ `server claimed ${baseMimeType} but body looks like ${effectiveMimeType} — converting anyway`,
486
+ );
487
+ }
488
+ if (
489
+ effectiveMimeType === "text/plain" ||
490
+ isMarkdownMimeType(effectiveMimeType)
491
+ ) {
492
+ return {
493
+ title,
494
+ content: text,
495
+ mimeType: effectiveMimeType,
496
+ sourceUrl: url,
497
+ source: null,
498
+ };
499
+ }
438
500
 
501
+ logger.dim(` converting ${effectiveMimeType} → markdown`);
502
+ const converted = await convertToMarkdown(
503
+ text,
504
+ effectiveMimeType,
505
+ url,
506
+ config,
507
+ );
439
508
  return {
440
509
  title,
441
- content: text,
442
- mimeType,
510
+ content: converted,
511
+ mimeType: "text/markdown",
443
512
  sourceUrl: url,
444
513
  source: null,
445
514
  };
@@ -0,0 +1,186 @@
1
+ import Anthropic from "@anthropic-ai/sdk";
2
+ import type { BotholomewConfig } from "../config/schemas.ts";
3
+ import { logger } from "../utils/logger.ts";
4
+ import { FetchFailureError } from "./fetcher-errors.ts";
5
+
6
+ const CONVERTER_MAX_TOKENS = 16_384;
7
+
8
+ const CONVERTER_SYSTEM_PROMPT = `You normalize documents to clean, well-structured Markdown.
9
+
10
+ **If the input is already clean, valid Markdown, return it verbatim with no edits.** Look for ATX headings (#, ##), bullet/numbered lists, fenced code blocks, inline code, links in [text](url) form, blockquotes, GFM tables. If the structure is consistently markdown-shaped, echo it back unchanged.
11
+
12
+ Otherwise, convert it. The input mime_type is a hint, not a guarantee — verify the actual content. Common non-markdown formats to recognize and convert:
13
+ - **HTML** — strip tags, scripts, styles, navigation/footer chrome; preserve headings, paragraphs, lists, tables, links, code.
14
+ - **JSON / XML / YAML** — render the structure as readable Markdown (headings/lists for objects, tables where appropriate, fenced code blocks for inline values).
15
+ - **DocMD (Google Docs structured format)** — lines like \`[H1 1-31 HEADING_1 tabId=t.0 ...] Title text\` or \`[P5 884-937 PARAGRAPH ...] Body text\`. Strip the bracket annotations entirely; map H1→#, H2→##, H3→###, P→paragraph; preserve the trailing text content.
16
+ - **RTF, plain text with mixed structure, ad-hoc formats** — extract the semantic content, drop the noise.
17
+
18
+ Rules for the output:
19
+ - Preserve all semantic content: headings, paragraphs, lists, tables, links, inline code, code blocks, blockquotes.
20
+ - Use ATX headings (#, ##, ###), fenced code blocks (\`\`\`lang), GFM-style tables, and reference- or inline-style links — whichever is cleanest.
21
+ - Strip metadata headers/IDs that aren't part of the document body (e.g. \`@document_id: ...\`, \`@revision_id: ...\`).
22
+ - Output **only** the Markdown. No preamble ("Here is the converted markdown:"), no trailing commentary, no wrapping the entire output in a code fence.`;
23
+
24
+ const MARKDOWN_MIME_TYPES = new Set([
25
+ "text/markdown",
26
+ "text/x-markdown",
27
+ "text/md",
28
+ ]);
29
+
30
+ export function isMarkdownMimeType(mimeType: string): boolean {
31
+ const base = mimeType.split(";")[0]?.trim().toLowerCase() ?? "";
32
+ return MARKDOWN_MIME_TYPES.has(base);
33
+ }
34
+
35
+ /**
36
+ * Sniff content for a non-markdown structure. Returns a mime type when the
37
+ * content has unmistakable markers of HTML / XML / JSON / etc., otherwise
38
+ * null. Used to verify a tool's claim of `text/markdown` — if the agent (or
39
+ * a defaulted mime type) lies about the format, we want to convert anyway.
40
+ *
41
+ * Markdown is a superset of plain text, so a null return ≠ "definitely
42
+ * markdown". It just means we found no strong contradicting signal.
43
+ */
44
+ export function sniffNonMarkdownMimeType(content: string): string | null {
45
+ const head = content.trimStart().slice(0, 4096);
46
+ if (!head) return null;
47
+
48
+ if (/^<!doctype\s+html/i.test(head)) return "text/html";
49
+ if (/^<html[\s>]/i.test(head)) return "text/html";
50
+ if (/^<\?xml[\s?]/i.test(head)) return "application/xml";
51
+
52
+ // JSON: parses as JSON top-to-bottom (use the full content, not the head).
53
+ const trimmed = content.trim();
54
+ if (
55
+ (trimmed.startsWith("{") && trimmed.endsWith("}")) ||
56
+ (trimmed.startsWith("[") && trimmed.endsWith("]"))
57
+ ) {
58
+ try {
59
+ JSON.parse(trimmed);
60
+ return "application/json";
61
+ } catch {
62
+ // fall through
63
+ }
64
+ }
65
+
66
+ // Heuristic HTML: dense tag markup. Markdown can contain occasional inline
67
+ // HTML, so we only flag it when tags dominate the sample.
68
+ const tagMatches = head.match(/<\/?[a-z][a-z0-9]*[\s/>]/gi) ?? [];
69
+ if (tagMatches.length >= 10) {
70
+ const charsPerTag = head.length / tagMatches.length;
71
+ if (charsPerTag < 80) return "text/html";
72
+ }
73
+
74
+ return null;
75
+ }
76
+
77
+ /**
78
+ * Decide the effective mime type for a piece of content. If the claim is
79
+ * markdown but the content sniffs as something else, trust the sniff so we
80
+ * convert instead of saving mislabeled garbage.
81
+ */
82
+ export function resolveEffectiveMimeType(
83
+ claimedMimeType: string,
84
+ content: string,
85
+ ): { mimeType: string; sniffed: boolean } {
86
+ if (!isMarkdownMimeType(claimedMimeType)) {
87
+ return { mimeType: claimedMimeType, sniffed: false };
88
+ }
89
+ const sniffed = sniffNonMarkdownMimeType(content);
90
+ if (sniffed) return { mimeType: sniffed, sniffed: true };
91
+ return { mimeType: claimedMimeType, sniffed: false };
92
+ }
93
+
94
+ function stripLeadingMarkdownFence(text: string): string {
95
+ const trimmed = text.trim();
96
+ const fenceMatch = trimmed.match(
97
+ /^```(?:markdown|md)?\s*\n([\s\S]*?)\n```\s*$/,
98
+ );
99
+ if (fenceMatch?.[1]) return fenceMatch[1];
100
+ return text;
101
+ }
102
+
103
+ /**
104
+ * Convert arbitrary content to Markdown via a single-shot LLM call.
105
+ *
106
+ * Does **not** short-circuit on `mimeType === "text/markdown"` — tools
107
+ * frequently mislabel their output (e.g. Google Docs' "DocMD" tool returns
108
+ * structured `[H1 ...]` annotations, not real markdown). The mime type is
109
+ * passed in as a hint for the model; the model decides whether the content
110
+ * is already markdown (echo unchanged) or needs converting.
111
+ *
112
+ * - Throws FetchFailureError when the response hits max_tokens (silently
113
+ * truncating the saved file would be worse than failing loudly).
114
+ * - On transient API errors, logs a warning and returns the raw content so
115
+ * the import still produces *something* the user can edit.
116
+ */
117
+ export async function convertToMarkdown(
118
+ content: string,
119
+ mimeType: string,
120
+ sourceUrl: string,
121
+ config: Required<BotholomewConfig>,
122
+ ): Promise<string> {
123
+ if (!config.anthropic_api_key) return content;
124
+
125
+ const client = new Anthropic({ apiKey: config.anthropic_api_key });
126
+ // Conversion is mechanical text-shaping — Haiku (the chunker model) is
127
+ // plenty smart for this and ~5x faster than Opus on long documents.
128
+ const model = config.chunker_model || config.model;
129
+
130
+ try {
131
+ const stream = client.messages.stream({
132
+ model,
133
+ max_tokens: CONVERTER_MAX_TOKENS,
134
+ system: CONVERTER_SYSTEM_PROMPT,
135
+ messages: [
136
+ {
137
+ role: "user",
138
+ content: `Convert this ${mimeType} content to Markdown. Source URL: ${sourceUrl}\n\n${content}`,
139
+ },
140
+ ],
141
+ });
142
+
143
+ let charsReceived = 0;
144
+ let lastLogged = 0;
145
+ const PROGRESS_INTERVAL_CHARS = 2_000;
146
+ for await (const event of stream) {
147
+ if (
148
+ event.type === "content_block_delta" &&
149
+ event.delta.type === "text_delta"
150
+ ) {
151
+ charsReceived += event.delta.text.length;
152
+ if (charsReceived - lastLogged >= PROGRESS_INTERVAL_CHARS) {
153
+ logger.dim(` ...converted ${charsReceived} chars`);
154
+ lastLogged = charsReceived;
155
+ }
156
+ }
157
+ }
158
+
159
+ const final = await stream.finalMessage();
160
+
161
+ if (final.stop_reason === "max_tokens") {
162
+ throw new FetchFailureError(
163
+ `Markdown conversion exceeded token budget (max_tokens=${CONVERTER_MAX_TOKENS}). The source document is too large to convert in one pass — try fetching a smaller section or a tool that supports pagination.`,
164
+ );
165
+ }
166
+
167
+ const text = final.content
168
+ .flatMap((block) => (block.type === "text" ? [block.text] : []))
169
+ .join("");
170
+
171
+ if (!text.trim()) {
172
+ logger.warn(
173
+ "markdown conversion returned empty output — saving raw content",
174
+ );
175
+ return content;
176
+ }
177
+
178
+ return stripLeadingMarkdownFence(text);
179
+ } catch (err) {
180
+ if (err instanceof FetchFailureError) throw err;
181
+ logger.warn(
182
+ `markdown conversion failed (${err instanceof Error ? err.message : String(err)}) — saving raw content`,
183
+ );
184
+ return content;
185
+ }
186
+ }