npm - llm-wiki-compiler - Versions diffs - 0.4.0 → 0.5.1 - Mend

llm-wiki-compiler 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/cli.js CHANGED Viewed

@@ -6,8 +6,8 @@ import { createRequire } from "module";
 import { Command } from "commander";
 // src/commands/ingest.ts
-import path3 from "path";
-import { mkdir as mkdir2, writeFile as writeFile2 } from "fs/promises";
+import path7 from "path";
+import { mkdir as mkdir2, readFile as readFile6, writeFile as writeFile2 } from "fs/promises";
 // src/utils/markdown.ts
 import { writeFile, rename, readFile, mkdir } from "fs/promises";
@@ -150,9 +150,17 @@ var LOCK_FILE = ".llmwiki/lock";
 var INDEX_FILE = "wiki/index.md";
 var MOC_FILE = "wiki/MOC.md";
 var EMBEDDINGS_FILE = ".llmwiki/embeddings.json";
+var IMAGE_EXTENSIONS = /* @__PURE__ */ new Set([".jpg", ".jpeg", ".png", ".gif", ".webp"]);
+var TRANSCRIPT_EXTENSIONS = /* @__PURE__ */ new Set([".vtt", ".srt"]);
+var IMAGE_DESCRIBE_MAX_TOKENS = 2048;
 var CANDIDATES_DIR = ".llmwiki/candidates";
 var CANDIDATES_ARCHIVE_DIR = ".llmwiki/candidates/archive";
 var EMBEDDING_TOP_K = 15;
+var CHUNK_TOP_K = 30;
+var CHUNK_RERANK_KEEP = 12;
+var CHUNK_TARGET_CHARS = 800;
+var CHUNK_MAX_CHARS = 1400;
+var CHUNK_MIN_CHARS = 200;
 var LOW_CONFIDENCE_THRESHOLD = 0.5;
 var MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS = 2;
 var EMBEDDING_MODELS = {
@@ -237,19 +245,24 @@ async function ingestWeb(url) {
 // src/ingest/file.ts
 import { readFile as readFile2 } from "fs/promises";
+import path3 from "path";
+// src/ingest/shared.ts
 import path2 from "path";
-var SUPPORTED_EXTENSIONS = /* @__PURE__ */ new Set([".md", ".txt"]);
 function titleFromFilename(filePath) {
   const basename = path2.basename(filePath, path2.extname(filePath));
   return basename.replace(/[-_]+/g, " ").trim();
 }
+// src/ingest/file.ts
+var SUPPORTED_EXTENSIONS = /* @__PURE__ */ new Set([".md", ".txt"]);
 function wrapPlainText(text) {
   return `\`\`\`
 ${text}
 \`\`\``;
 }
 async function ingestFile(filePath) {
-  const ext = path2.extname(filePath).toLowerCase();
+  const ext = path3.extname(filePath).toLowerCase();
   if (!SUPPORTED_EXTENSIONS.has(ext)) {
     throw new Error(
       `Unsupported file type "${ext}". Only .md and .txt files are supported.`
@@ -261,10 +274,439 @@ async function ingestFile(filePath) {
   return { title, content };
 }
+// src/ingest/pdf.ts
+import { readFile as readFile3 } from "fs/promises";
+function resolveTitle(filePath, info2) {
+  if (info2 && typeof info2 === "object") {
+    const titleField = info2["Title"];
+    if (typeof titleField === "string" && titleField.trim().length > 0) {
+      return titleField.trim();
+    }
+  }
+  return titleFromFilename(filePath);
+}
+async function ingestPdf(filePath) {
+  const { PDFParse } = await import("pdf-parse");
+  const buffer = await readFile3(filePath);
+  const parser = new PDFParse({ data: new Uint8Array(buffer) });
+  try {
+    const textResult = await parser.getText();
+    const infoResult = await parser.getInfo();
+    const title = resolveTitle(filePath, infoResult.info);
+    const content = textResult.text.trim();
+    return { title, content };
+  } finally {
+    await parser.destroy();
+  }
+}
+// src/ingest/image.ts
+import { readFile as readFile4 } from "fs/promises";
+import path5 from "path";
+import Anthropic2 from "@anthropic-ai/sdk";
+// src/providers/anthropic.ts
+import Anthropic from "@anthropic-ai/sdk";
+var VOYAGE_EMBEDDINGS_URL = "https://api.voyageai.com/v1/embeddings";
+function buildAnthropicClientOptions(options = {}) {
+  const trimmedBaseURL = options.baseURL?.trim();
+  const trimmedApiKey = options.apiKey?.trim();
+  const trimmedAuthToken = options.authToken?.trim();
+  const result = {};
+  if (trimmedApiKey) {
+    result.apiKey = trimmedApiKey;
+  }
+  if (trimmedAuthToken) {
+    result.authToken = trimmedAuthToken;
+  }
+  if (!trimmedBaseURL) {
+    return result;
+  }
+  const normalizedBaseURL = trimmedBaseURL.endsWith("/") && trimmedBaseURL.length > 1 ? trimmedBaseURL.slice(0, -1) : trimmedBaseURL;
+  result.baseURL = normalizedBaseURL;
+  return result;
+}
+var AnthropicProvider = class {
+  client;
+  model;
+  constructor(model, options = {}) {
+    this.model = model;
+    this.client = new Anthropic(buildAnthropicClientOptions(options));
+  }
+  /** Send a single non-streaming completion request. */
+  async complete(system, messages, maxTokens) {
+    const response = await this.client.messages.create({
+      model: this.model,
+      max_tokens: maxTokens,
+      system,
+      messages
+    });
+    const textBlock = response.content.find((block) => block.type === "text");
+    return textBlock?.type === "text" ? textBlock.text : "";
+  }
+  /** Stream a completion, invoking onToken for each text chunk. */
+  async stream(system, messages, maxTokens, onToken) {
+    const stream = this.client.messages.stream({
+      model: this.model,
+      max_tokens: maxTokens,
+      system,
+      messages
+    });
+    let fullText = "";
+    for await (const event of stream) {
+      if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
+        fullText += event.delta.text;
+        onToken?.(event.delta.text);
+      }
+    }
+    return fullText;
+  }
+  /** Call Claude with tool definitions and return the parsed tool input as JSON. */
+  async toolCall(system, messages, tools, maxTokens) {
+    const anthropicTools = tools.map((t) => ({
+      name: t.name,
+      description: t.description,
+      input_schema: t.input_schema
+    }));
+    const response = await this.client.messages.create({
+      model: this.model,
+      max_tokens: maxTokens,
+      system,
+      messages,
+      tools: anthropicTools
+    });
+    const toolBlock = response.content.find((block) => block.type === "tool_use");
+    if (toolBlock?.type === "tool_use") {
+      return JSON.stringify(toolBlock.input);
+    }
+    const textBlock = response.content.find((block) => block.type === "text");
+    return textBlock?.type === "text" ? textBlock.text : "";
+  }
+  /**
+   * Produce a single embedding vector via the Voyage API.
+   *
+   * Anthropic does not ship a first-party embeddings endpoint, so we delegate
+   * to Voyage (their recommended partner). Requires VOYAGE_API_KEY.
+   */
+  async embed(text) {
+    const apiKey = process.env.VOYAGE_API_KEY?.trim();
+    if (!apiKey) {
+      throw new Error(
+        "VOYAGE_API_KEY is not set. Anthropic embeddings use Voyage \u2014 set VOYAGE_API_KEY to enable semantic search."
+      );
+    }
+    const response = await fetch(VOYAGE_EMBEDDINGS_URL, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${apiKey}`
+      },
+      body: JSON.stringify({ input: text, model: EMBEDDING_MODELS.anthropic })
+    });
+    if (!response.ok) {
+      const detail = await response.text();
+      throw new Error(`Voyage embeddings request failed (${response.status}): ${detail}`);
+    }
+    const json = await response.json();
+    const vector = json.data?.[0]?.embedding;
+    if (!Array.isArray(vector)) {
+      throw new Error("Voyage embeddings response did not include a vector.");
+    }
+    return vector;
+  }
+};
+// src/utils/claude-settings.ts
+import { readFileSync } from "fs";
+import { homedir } from "os";
+import path4 from "path";
+var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
+function isRecord(value) {
+  return typeof value === "object" && value !== null;
+}
+function normalize(value) {
+  if (typeof value !== "string") return void 0;
+  const trimmed = value.trim();
+  return trimmed.length > 0 ? trimmed : void 0;
+}
+function resolveClaudeSettingsPath(env) {
+  return env[CLAUDE_SETTINGS_PATH_ENV] ?? path4.join(homedir(), ".claude", "settings.json");
+}
+function readClaudeSettingsFile(settingsPath) {
+  try {
+    return readFileSync(settingsPath, "utf8");
+  } catch (err) {
+    if (isRecord(err) && err.code === "ENOENT") {
+      return void 0;
+    }
+    const message = err instanceof Error ? err.message : String(err);
+    throw new Error(`Failed to read Claude settings at "${settingsPath}": ${message}`);
+  }
+}
+function readClaudeSettingsEnv(env = process.env) {
+  const settingsPath = resolveClaudeSettingsPath(env);
+  const raw = readClaudeSettingsFile(settingsPath);
+  if (!raw) return void 0;
+  let parsed;
+  try {
+    parsed = JSON.parse(raw);
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    throw new Error(`Failed to parse Claude settings at "${settingsPath}": ${message}`);
+  }
+  if (!isRecord(parsed) || !isRecord(parsed.env)) {
+    return void 0;
+  }
+  const values = {
+    ANTHROPIC_API_KEY: normalize(parsed.env.ANTHROPIC_API_KEY),
+    ANTHROPIC_AUTH_TOKEN: normalize(parsed.env.ANTHROPIC_AUTH_TOKEN),
+    ANTHROPIC_BASE_URL: normalize(parsed.env.ANTHROPIC_BASE_URL),
+    ANTHROPIC_MODEL: normalize(parsed.env.ANTHROPIC_MODEL)
+  };
+  if (!values.ANTHROPIC_API_KEY && !values.ANTHROPIC_AUTH_TOKEN && !values.ANTHROPIC_BASE_URL && !values.ANTHROPIC_MODEL) {
+    return void 0;
+  }
+  return values;
+}
+function tryReadClaudeSettingsEnv(env) {
+  try {
+    return readClaudeSettingsEnv(env);
+  } catch {
+    return void 0;
+  }
+}
+function validateAnthropicBaseURL(value) {
+  const normalized = value.trim();
+  try {
+    const parsed = new URL(normalized);
+    if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
+      throw new Error("Must use http:// or https:// protocol.");
+    }
+  } catch (err) {
+    const message = err instanceof Error ? err.message : "Must be a valid http(s) URL.";
+    throw new Error(`Invalid ANTHROPIC_BASE_URL: "${normalized}". ${message}`);
+  }
+  return normalized;
+}
+function resolveAnthropicAuthFromEnv(env = process.env) {
+  const explicitApiKey = normalize(env.ANTHROPIC_API_KEY);
+  if (explicitApiKey) return { apiKey: explicitApiKey };
+  const explicitAuthToken = normalize(env.ANTHROPIC_AUTH_TOKEN);
+  if (explicitAuthToken) return { authToken: explicitAuthToken };
+  const fallback = readClaudeSettingsEnv(env);
+  if (fallback?.ANTHROPIC_API_KEY) return { apiKey: fallback.ANTHROPIC_API_KEY };
+  if (fallback?.ANTHROPIC_AUTH_TOKEN) return { authToken: fallback.ANTHROPIC_AUTH_TOKEN };
+  return {};
+}
+function resolveAnthropicModelFromEnv(env = process.env) {
+  const explicitModel = env.LLMWIKI_MODEL;
+  if (explicitModel !== void 0) return explicitModel;
+  return tryReadClaudeSettingsEnv(env)?.ANTHROPIC_MODEL;
+}
+function resolveAnthropicBaseURLFromEnv(env = process.env) {
+  const explicitBaseURL = normalize(env.ANTHROPIC_BASE_URL);
+  if (explicitBaseURL) return validateAnthropicBaseURL(explicitBaseURL);
+  const fallbackBaseURL = tryReadClaudeSettingsEnv(env)?.ANTHROPIC_BASE_URL;
+  if (!fallbackBaseURL) return void 0;
+  return validateAnthropicBaseURL(fallbackBaseURL);
+}
+// src/ingest/image.ts
+var EXTENSION_TO_MIME = {
+  ".jpg": "image/jpeg",
+  ".jpeg": "image/jpeg",
+  ".png": "image/png",
+  ".gif": "image/gif",
+  ".webp": "image/webp"
+};
+function mimeTypeForExtension(ext) {
+  const mimeType = EXTENSION_TO_MIME[ext.toLowerCase()];
+  if (!mimeType) {
+    throw new Error(
+      `Unsupported image extension "${ext}". Supported: ${Object.keys(EXTENSION_TO_MIME).join(", ")}`
+    );
+  }
+  return mimeType;
+}
+function buildClient() {
+  const baseURL = resolveAnthropicBaseURLFromEnv();
+  const auth = resolveAnthropicAuthFromEnv();
+  return new Anthropic2(buildAnthropicClientOptions({ baseURL, ...auth }));
+}
+async function describeImageWithVision(client, model, imageData, mimeType) {
+  const response = await client.messages.create({
+    model,
+    max_tokens: IMAGE_DESCRIBE_MAX_TOKENS,
+    messages: [
+      {
+        role: "user",
+        content: [
+          {
+            type: "image",
+            source: { type: "base64", media_type: mimeType, data: imageData }
+          },
+          {
+            type: "text",
+            text: "Extract and transcribe all text visible in this image. Then provide a detailed description of any non-text visual content. Format your response as markdown."
+          }
+        ]
+      }
+    ]
+  });
+  const textBlock = response.content.find((block) => block.type === "text");
+  return textBlock?.type === "text" ? textBlock.text : "";
+}
+async function ingestImage(filePath) {
+  const providerName = process.env.LLMWIKI_PROVIDER ?? "anthropic";
+  if (providerName !== "anthropic") {
+    throw new Error(
+      `Image ingest requires the Anthropic provider (vision). Current provider: "${providerName}". Set LLMWIKI_PROVIDER=anthropic and ANTHROPIC_API_KEY to use image ingest.`
+    );
+  }
+  const ext = path5.extname(filePath).toLowerCase();
+  const mimeType = mimeTypeForExtension(ext);
+  const imageBuffer = await readFile4(filePath);
+  const imageData = imageBuffer.toString("base64");
+  const client = buildClient();
+  const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
+  const content = await describeImageWithVision(client, model, imageData, mimeType);
+  const title = titleFromFilename(filePath);
+  return { title, content };
+}
+// src/ingest/transcript.ts
+import { readFile as readFile5 } from "fs/promises";
+import path6 from "path";
+import { YoutubeTranscript } from "youtube-transcript";
+var YOUTUBE_URL_PATTERN = /^https?:\/\/(www\.)?(youtube\.com\/watch|youtu\.be\/)/;
+var SRT_SEQUENCE_PATTERN = /^\d+$/;
+var TIMESTAMP_PATTERN = /\d{2}:\d{2}[:.]\d{2}/;
+var MS_PER_MINUTE = 6e4;
+var MS_PER_SECOND = 1e3;
+function isYoutubeUrl(source2) {
+  return YOUTUBE_URL_PATTERN.test(source2);
+}
+function extractVideoId(url) {
+  const match = url.match(/(?:v=|youtu\.be\/)([^&?/]+)/);
+  if (!match) {
+    throw new Error(`Could not extract video ID from YouTube URL: ${url}`);
+  }
+  return match[1];
+}
+function formatOffset(offsetMs) {
+  const minutes = Math.floor(offsetMs / MS_PER_MINUTE);
+  const seconds = Math.floor(offsetMs % MS_PER_MINUTE / MS_PER_SECOND);
+  return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}`;
+}
+async function fetchYoutubeTranscript(url) {
+  const videoId = extractVideoId(url);
+  const segments = await YoutubeTranscript.fetchTranscript(videoId);
+  if (!segments || segments.length === 0) {
+    throw new Error(`No transcript available for YouTube video: ${url}`);
+  }
+  const lines = segments.map((seg) => `[${formatOffset(seg.offset)}] ${seg.text}`);
+  return {
+    title: `YouTube Transcript ${videoId}`,
+    content: lines.join("\n")
+  };
+}
+function isCueTimestamp(trimmed) {
+  return TIMESTAMP_PATTERN.test(trimmed) && trimmed.includes("-->");
+}
+function parseVtt(raw, filePath) {
+  const lines = raw.split("\n");
+  const output = [];
+  let inCue = false;
+  for (const line of lines) {
+    const trimmed = line.trim();
+    if (trimmed === "WEBVTT" || trimmed === "") {
+      inCue = false;
+      continue;
+    }
+    if (isCueTimestamp(trimmed)) {
+      output.push(`
+**[${trimmed}]**`);
+      inCue = true;
+      continue;
+    }
+    if (inCue && trimmed.length > 0) {
+      output.push(trimmed);
+    }
+  }
+  return { title: titleFromFilename(filePath), content: output.join("\n").trim() };
+}
+function parseSrt(raw, filePath) {
+  const lines = raw.split("\n");
+  const output = [];
+  for (const line of lines) {
+    const trimmed = line.trim();
+    if (trimmed === "" || SRT_SEQUENCE_PATTERN.test(trimmed)) {
+      continue;
+    }
+    if (isCueTimestamp(trimmed)) {
+      output.push(`
+**[${trimmed}]**`);
+      continue;
+    }
+    if (trimmed.length > 0) {
+      output.push(trimmed);
+    }
+  }
+  return { title: titleFromFilename(filePath), content: output.join("\n").trim() };
+}
+function parsePlainTranscript(raw, filePath) {
+  return { title: titleFromFilename(filePath), content: raw.trim() };
+}
+async function ingestTranscript(source2) {
+  if (isYoutubeUrl(source2)) {
+    return fetchYoutubeTranscript(source2);
+  }
+  const ext = path6.extname(source2).toLowerCase();
+  const raw = await readFile5(source2, "utf-8");
+  if (ext === ".vtt") return parseVtt(raw, source2);
+  if (ext === ".srt") return parseSrt(raw, source2);
+  if (ext === ".txt") return parsePlainTranscript(raw, source2);
+  throw new Error(
+    `Unsupported transcript file type "${ext}". Supported: .vtt, .srt, .txt`
+  );
+}
 // src/commands/ingest.ts
 function isUrl(source2) {
   return source2.startsWith("http://") || source2.startsWith("https://");
 }
+var TXT_SNIFF_BYTES = 2048;
+var SPEAKER_TAG_PATTERN = /^([A-Z][a-zA-Z .'-]{0,40}):\s/gm;
+var TIMESTAMP_PATTERN2 = /^\s*\d{1,2}:\d{2}(:\d{2})?/;
+var MIN_TIMESTAMP_MATCHES = 3;
+var MIN_SPEAKER_REPEAT_COUNT = 2;
+var MIN_DISTINCT_SPEAKERS = 2;
+function countSpeakerOccurrences(sample) {
+  const counts = /* @__PURE__ */ new Map();
+  SPEAKER_TAG_PATTERN.lastIndex = 0;
+  let match;
+  while ((match = SPEAKER_TAG_PATTERN.exec(sample)) !== null) {
+    const name = match[1].trim();
+    counts.set(name, (counts.get(name) ?? 0) + 1);
+  }
+  return counts;
+}
+function hasSpeakerDialoguePattern(sample) {
+  const counts = countSpeakerOccurrences(sample);
+  const distinctSpeakers = counts.size;
+  const hasEnoughSpeakers = distinctSpeakers >= MIN_DISTINCT_SPEAKERS;
+  const hasRepeatedSpeaker = [...counts.values()].some(
+    (n) => n >= MIN_SPEAKER_REPEAT_COUNT
+  );
+  return hasEnoughSpeakers && hasRepeatedSpeaker;
+}
+async function looksLikeTxtTranscript(filePath) {
+  const raw = await readFile6(filePath, "utf-8");
+  const sample = raw.slice(0, TXT_SNIFF_BYTES);
+  if (hasSpeakerDialoguePattern(sample)) return true;
+  const timestampMatches = sample.match(new RegExp(TIMESTAMP_PATTERN2.source, "gm"));
+  return (timestampMatches?.length ?? 0) >= MIN_TIMESTAMP_MATCHES;
+}
 function enforceCharLimit(content) {
   if (content.length <= MAX_SOURCE_CHARS) {
     return { content, truncated: false, originalChars: content.length };
@@ -297,12 +739,30 @@ function enforceMinContent(content) {
     );
   }
 }
-function buildDocument(title, source2, result) {
+async function detectSourceType(source2) {
+  if (!isUrl(source2)) {
+    const ext = path7.extname(source2).toLowerCase();
+    if (ext === ".pdf") return "pdf";
+    if (IMAGE_EXTENSIONS.has(ext)) return "image";
+    if (TRANSCRIPT_EXTENSIONS.has(ext)) return "transcript";
+    if (ext === ".txt") {
+      const isTranscript = await looksLikeTxtTranscript(source2);
+      return isTranscript ? "transcript" : "file";
+    }
+    return "file";
+  }
+  if (isYoutubeUrl(source2)) return "transcript";
+  return "web";
+}
+function buildDocument(title, source2, result, sourceType) {
   const meta = {
     title,
     source: source2,
     ingestedAt: (/* @__PURE__ */ new Date()).toISOString()
   };
+  if (sourceType !== void 0) {
+    meta.sourceType = sourceType;
+  }
   if (result.truncated) {
     meta.truncated = true;
     meta.originalChars = result.originalChars;
@@ -313,30 +773,46 @@ function buildDocument(title, source2, result) {
 ${result.content}
 `;
 }
+async function fetchContent(source2, sourceType) {
+  switch (sourceType) {
+    case "web":
+      return ingestWeb(source2);
+    case "pdf":
+      return ingestPdf(source2);
+    case "image":
+      return ingestImage(source2);
+    case "transcript":
+      return ingestTranscript(source2);
+    case "file":
+      return ingestFile(source2);
+  }
+}
 async function saveSource(title, document) {
   const filename = `${slugify(title)}.md`;
-  const destPath = path3.join(SOURCES_DIR, filename);
+  const destPath = path7.join(SOURCES_DIR, filename);
   await mkdir2(SOURCES_DIR, { recursive: true });
   await writeFile2(destPath, document, "utf-8");
   return destPath;
 }
 async function ingestSource(source2) {
-  status("*", info(`Ingesting: ${source2}`));
-  const { title, content } = isUrl(source2) ? await ingestWeb(source2) : await ingestFile(source2);
+  const sourceType = await detectSourceType(source2);
+  status("*", info(`Ingesting [${sourceType}]: ${source2}`));
+  const { title, content } = await fetchContent(source2, sourceType);
   const result = enforceCharLimit(content);
   enforceMinContent(result.content);
-  const document = buildDocument(title, source2, result);
+  const document = buildDocument(title, source2, result, sourceType);
   const savedPath = await saveSource(title, document);
   return {
-    filename: path3.basename(savedPath),
+    filename: path7.basename(savedPath),
     charCount: result.content.length,
     truncated: result.truncated,
-    source: source2
+    source: source2,
+    sourceType
   };
 }
 async function ingest(source2) {
   const result = await ingestSource(source2);
-  const savedPath = path3.join(SOURCES_DIR, result.filename);
+  const savedPath = path7.join(SOURCES_DIR, result.filename);
   status(
     "+",
     success(`Saved ${bold(result.filename)} \u2192 ${source(savedPath)}`)
@@ -348,23 +824,23 @@ async function ingest(source2) {
 import { existsSync as existsSync7 } from "fs";
 // src/compiler/index.ts
-import { readFile as readFile10 } from "fs/promises";
-import path18 from "path";
+import { readFile as readFile14 } from "fs/promises";
+import path21 from "path";
 // src/utils/state.ts
-import { readFile as readFile3, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
+import { readFile as readFile7, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
 import { existsSync } from "fs";
-import path4 from "path";
+import path8 from "path";
 function emptyState() {
   return { version: 1, indexHash: "", sources: {} };
 }
 async function readState(root) {
-  const filePath = path4.join(root, STATE_FILE);
+  const filePath = path8.join(root, STATE_FILE);
   if (!existsSync(filePath)) {
     return emptyState();
   }
   try {
-    const raw = await readFile3(filePath, "utf-8");
+    const raw = await readFile7(filePath, "utf-8");
     return JSON.parse(raw);
   } catch {
     const bakPath = filePath + ".bak";
@@ -374,9 +850,9 @@ async function readState(root) {
   }
 }
 async function writeState(root, state) {
-  const dir = path4.join(root, LLMWIKI_DIR);
+  const dir = path8.join(root, LLMWIKI_DIR);
   await mkdir3(dir, { recursive: true });
-  const filePath = path4.join(root, STATE_FILE);
+  const filePath = path8.join(root, STATE_FILE);
   const tmpPath = filePath + ".tmp";
   await writeFile3(tmpPath, JSON.stringify(state, null, 2), "utf-8");
   await rename2(tmpPath, filePath);
@@ -393,18 +869,18 @@ async function removeSourceState(root, sourceFile) {
 }
 // src/compiler/source-state.ts
-import path6 from "path";
+import path10 from "path";
 // src/compiler/hasher.ts
 import { createHash } from "crypto";
-import { readFile as readFile4, readdir } from "fs/promises";
-import path5 from "path";
+import { readFile as readFile8, readdir } from "fs/promises";
+import path9 from "path";
 async function hashFile(filePath) {
-  const content = await readFile4(filePath, "utf-8");
+  const content = await readFile8(filePath, "utf-8");
   return createHash("sha256").update(content).digest("hex");
 }
 async function detectChanges(root, prevState) {
-  const sourcesPath = path5.join(root, SOURCES_DIR);
+  const sourcesPath = path9.join(root, SOURCES_DIR);
   const currentFiles = await listSourceFiles(sourcesPath);
   const changes = [];
   for (const file of currentFiles) {
@@ -424,7 +900,7 @@ async function listSourceFiles(sourcesPath) {
   }
 }
 async function classifyFile(root, file, prevState) {
-  const filePath = path5.join(root, SOURCES_DIR, file);
+  const filePath = path9.join(root, SOURCES_DIR, file);
   const hash = await hashFile(filePath);
   const prev = prevState.sources[file];
   if (!prev) return "new";
@@ -447,133 +923,22 @@ async function buildExtractionSourceStates(root, extractions) {
   return snapshot;
 }
 async function buildEntry(root, result, compiledAt) {
-  const filePath = path6.join(root, SOURCES_DIR, result.sourceFile);
+  const filePath = path10.join(root, SOURCES_DIR, result.sourceFile);
   const hash = await hashFile(filePath);
-  return {
-    hash,
-    concepts: result.concepts.map((concept) => slugify(concept.concept)),
-    compiledAt
-  };
-}
-function pickStatesForSources(allStates, sourceFiles) {
-  const picked = {};
-  for (const file of sourceFiles) {
-    const entry = allStates[file];
-    if (entry) picked[file] = entry;
-  }
-  return picked;
-}
-// src/providers/anthropic.ts
-import Anthropic from "@anthropic-ai/sdk";
-var VOYAGE_EMBEDDINGS_URL = "https://api.voyageai.com/v1/embeddings";
-function buildAnthropicClientOptions(options = {}) {
-  const trimmedBaseURL = options.baseURL?.trim();
-  const trimmedApiKey = options.apiKey?.trim();
-  const trimmedAuthToken = options.authToken?.trim();
-  const result = {};
-  if (trimmedApiKey) {
-    result.apiKey = trimmedApiKey;
-  }
-  if (trimmedAuthToken) {
-    result.authToken = trimmedAuthToken;
-  }
-  if (!trimmedBaseURL) {
-    return result;
-  }
-  const normalizedBaseURL = trimmedBaseURL.endsWith("/") && trimmedBaseURL.length > 1 ? trimmedBaseURL.slice(0, -1) : trimmedBaseURL;
-  result.baseURL = normalizedBaseURL;
-  return result;
-}
-var AnthropicProvider = class {
-  client;
-  model;
-  constructor(model, options = {}) {
-    this.model = model;
-    this.client = new Anthropic(buildAnthropicClientOptions(options));
-  }
-  /** Send a single non-streaming completion request. */
-  async complete(system, messages, maxTokens) {
-    const response = await this.client.messages.create({
-      model: this.model,
-      max_tokens: maxTokens,
-      system,
-      messages
-    });
-    const textBlock = response.content.find((block) => block.type === "text");
-    return textBlock?.type === "text" ? textBlock.text : "";
-  }
-  /** Stream a completion, invoking onToken for each text chunk. */
-  async stream(system, messages, maxTokens, onToken) {
-    const stream = this.client.messages.stream({
-      model: this.model,
-      max_tokens: maxTokens,
-      system,
-      messages
-    });
-    let fullText = "";
-    for await (const event of stream) {
-      if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
-        fullText += event.delta.text;
-        onToken?.(event.delta.text);
-      }
-    }
-    return fullText;
-  }
-  /** Call Claude with tool definitions and return the parsed tool input as JSON. */
-  async toolCall(system, messages, tools, maxTokens) {
-    const anthropicTools = tools.map((t) => ({
-      name: t.name,
-      description: t.description,
-      input_schema: t.input_schema
-    }));
-    const response = await this.client.messages.create({
-      model: this.model,
-      max_tokens: maxTokens,
-      system,
-      messages,
-      tools: anthropicTools
-    });
-    const toolBlock = response.content.find((block) => block.type === "tool_use");
-    if (toolBlock?.type === "tool_use") {
-      return JSON.stringify(toolBlock.input);
-    }
-    const textBlock = response.content.find((block) => block.type === "text");
-    return textBlock?.type === "text" ? textBlock.text : "";
-  }
-  /**
-   * Produce a single embedding vector via the Voyage API.
-   *
-   * Anthropic does not ship a first-party embeddings endpoint, so we delegate
-   * to Voyage (their recommended partner). Requires VOYAGE_API_KEY.
-   */
-  async embed(text) {
-    const apiKey = process.env.VOYAGE_API_KEY?.trim();
-    if (!apiKey) {
-      throw new Error(
-        "VOYAGE_API_KEY is not set. Anthropic embeddings use Voyage \u2014 set VOYAGE_API_KEY to enable semantic search."
-      );
-    }
-    const response = await fetch(VOYAGE_EMBEDDINGS_URL, {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        Authorization: `Bearer ${apiKey}`
-      },
-      body: JSON.stringify({ input: text, model: EMBEDDING_MODELS.anthropic })
-    });
-    if (!response.ok) {
-      const detail = await response.text();
-      throw new Error(`Voyage embeddings request failed (${response.status}): ${detail}`);
-    }
-    const json = await response.json();
-    const vector = json.data?.[0]?.embedding;
-    if (!Array.isArray(vector)) {
-      throw new Error("Voyage embeddings response did not include a vector.");
-    }
-    return vector;
+  return {
+    hash,
+    concepts: result.concepts.map((concept) => slugify(concept.concept)),
+    compiledAt
+  };
+}
+function pickStatesForSources(allStates, sourceFiles) {
+  const picked = {};
+  for (const file of sourceFiles) {
+    const entry = allStates[file];
+    if (entry) picked[file] = entry;
   }
-};
+  return picked;
+}
 // src/providers/openai.ts
 import OpenAI from "openai";
@@ -704,101 +1069,6 @@ var MiniMaxProvider = class extends OpenAIProvider {
   }
 };
-// src/utils/claude-settings.ts
-import { readFileSync } from "fs";
-import { homedir } from "os";
-import path7 from "path";
-var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
-function isRecord(value) {
-  return typeof value === "object" && value !== null;
-}
-function normalize(value) {
-  if (typeof value !== "string") return void 0;
-  const trimmed = value.trim();
-  return trimmed.length > 0 ? trimmed : void 0;
-}
-function resolveClaudeSettingsPath(env) {
-  return env[CLAUDE_SETTINGS_PATH_ENV] ?? path7.join(homedir(), ".claude", "settings.json");
-}
-function readClaudeSettingsFile(settingsPath) {
-  try {
-    return readFileSync(settingsPath, "utf8");
-  } catch (err) {
-    if (isRecord(err) && err.code === "ENOENT") {
-      return void 0;
-    }
-    const message = err instanceof Error ? err.message : String(err);
-    throw new Error(`Failed to read Claude settings at "${settingsPath}": ${message}`);
-  }
-}
-function readClaudeSettingsEnv(env = process.env) {
-  const settingsPath = resolveClaudeSettingsPath(env);
-  const raw = readClaudeSettingsFile(settingsPath);
-  if (!raw) return void 0;
-  let parsed;
-  try {
-    parsed = JSON.parse(raw);
-  } catch (err) {
-    const message = err instanceof Error ? err.message : String(err);
-    throw new Error(`Failed to parse Claude settings at "${settingsPath}": ${message}`);
-  }
-  if (!isRecord(parsed) || !isRecord(parsed.env)) {
-    return void 0;
-  }
-  const values = {
-    ANTHROPIC_API_KEY: normalize(parsed.env.ANTHROPIC_API_KEY),
-    ANTHROPIC_AUTH_TOKEN: normalize(parsed.env.ANTHROPIC_AUTH_TOKEN),
-    ANTHROPIC_BASE_URL: normalize(parsed.env.ANTHROPIC_BASE_URL),
-    ANTHROPIC_MODEL: normalize(parsed.env.ANTHROPIC_MODEL)
-  };
-  if (!values.ANTHROPIC_API_KEY && !values.ANTHROPIC_AUTH_TOKEN && !values.ANTHROPIC_BASE_URL && !values.ANTHROPIC_MODEL) {
-    return void 0;
-  }
-  return values;
-}
-function tryReadClaudeSettingsEnv(env) {
-  try {
-    return readClaudeSettingsEnv(env);
-  } catch {
-    return void 0;
-  }
-}
-function validateAnthropicBaseURL(value) {
-  const normalized = value.trim();
-  try {
-    const parsed = new URL(normalized);
-    if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
-      throw new Error("Must use http:// or https:// protocol.");
-    }
-  } catch (err) {
-    const message = err instanceof Error ? err.message : "Must be a valid http(s) URL.";
-    throw new Error(`Invalid ANTHROPIC_BASE_URL: "${normalized}". ${message}`);
-  }
-  return normalized;
-}
-function resolveAnthropicAuthFromEnv(env = process.env) {
-  const explicitApiKey = normalize(env.ANTHROPIC_API_KEY);
-  if (explicitApiKey) return { apiKey: explicitApiKey };
-  const explicitAuthToken = normalize(env.ANTHROPIC_AUTH_TOKEN);
-  if (explicitAuthToken) return { authToken: explicitAuthToken };
-  const fallback = readClaudeSettingsEnv(env);
-  if (fallback?.ANTHROPIC_API_KEY) return { apiKey: fallback.ANTHROPIC_API_KEY };
-  if (fallback?.ANTHROPIC_AUTH_TOKEN) return { authToken: fallback.ANTHROPIC_AUTH_TOKEN };
-  return {};
-}
-function resolveAnthropicModelFromEnv(env = process.env) {
-  const explicitModel = env.LLMWIKI_MODEL;
-  if (explicitModel !== void 0) return explicitModel;
-  return tryReadClaudeSettingsEnv(env)?.ANTHROPIC_MODEL;
-}
-function resolveAnthropicBaseURLFromEnv(env = process.env) {
-  const explicitBaseURL = normalize(env.ANTHROPIC_BASE_URL);
-  if (explicitBaseURL) return validateAnthropicBaseURL(explicitBaseURL);
-  const fallbackBaseURL = tryReadClaudeSettingsEnv(env)?.ANTHROPIC_BASE_URL;
-  if (!fallbackBaseURL) return void 0;
-  return validateAnthropicBaseURL(fallbackBaseURL);
-}
 // src/utils/provider.ts
 var SUPPORTED_PROVIDERS = /* @__PURE__ */ new Set(["anthropic", "openai", "ollama", "minimax"]);
 function getProvider() {
@@ -891,8 +1161,8 @@ async function callClaude(options) {
 }
 // src/utils/lock.ts
-import { open, readFile as readFile5, unlink, mkdir as mkdir4 } from "fs/promises";
-import path8 from "path";
+import { open, readFile as readFile9, unlink, mkdir as mkdir4 } from "fs/promises";
+import path11 from "path";
 var RECLAIM_SUFFIX = ".reclaim";
 var MAX_ACQUIRE_ATTEMPTS = 2;
 function isProcessAlive(pid) {
@@ -904,8 +1174,8 @@ function isProcessAlive(pid) {
   }
 }
 async function acquireLock(root) {
-  const lockPath = path8.join(root, LOCK_FILE);
-  await mkdir4(path8.join(root, LLMWIKI_DIR), { recursive: true });
+  const lockPath = path11.join(root, LOCK_FILE);
+  await mkdir4(path11.join(root, LLMWIKI_DIR), { recursive: true });
   for (let attempt = 0; attempt < MAX_ACQUIRE_ATTEMPTS; attempt++) {
     const created = await tryCreateLock(lockPath);
     if (created) return true;
@@ -968,7 +1238,7 @@ async function tryCreateLock(lockPath) {
 }
 async function isLockStale(lockPath) {
   try {
-    const content = await readFile5(lockPath, "utf-8");
+    const content = await readFile9(lockPath, "utf-8");
     const pid = parseInt(content.trim(), 10);
     if (isNaN(pid)) return true;
     return !isProcessAlive(pid);
@@ -977,7 +1247,7 @@ async function isLockStale(lockPath) {
   }
 }
 async function releaseLock(root) {
-  const lockPath = path8.join(root, LOCK_FILE);
+  const lockPath = path11.join(root, LOCK_FILE);
   try {
     await unlink(lockPath);
   } catch {
@@ -1220,8 +1490,8 @@ function buildDefaultSchema() {
 // src/schema/loader.ts
 import { existsSync as existsSync2 } from "fs";
-import { readFile as readFile6 } from "fs/promises";
-import path9 from "path";
+import { readFile as readFile10 } from "fs/promises";
+import path12 from "path";
 import yaml2 from "js-yaml";
 var SCHEMA_CANDIDATE_PATHS = [
   ".llmwiki/schema.json",
@@ -1232,7 +1502,7 @@ var SCHEMA_CANDIDATE_PATHS = [
 ];
 function findSchemaPath(root) {
   for (const candidate of SCHEMA_CANDIDATE_PATHS) {
-    const absolute = path9.join(root, candidate);
+    const absolute = path12.join(root, candidate);
     if (existsSync2(absolute)) return absolute;
   }
   return null;
@@ -1285,12 +1555,12 @@ async function loadSchema(root) {
   const defaults = buildDefaultSchema();
   const schemaPath = findSchemaPath(root);
   if (!schemaPath) return defaults;
-  const raw = await readFile6(schemaPath, "utf-8");
+  const raw = await readFile10(schemaPath, "utf-8");
   const parsed = parseSchemaFile(schemaPath, raw);
   return applyOverrides(defaults, parsed, schemaPath);
 }
 function defaultSchemaInitPath(root) {
-  return path9.join(root, SCHEMA_CANDIDATE_PATHS[0]);
+  return path12.join(root, SCHEMA_CANDIDATE_PATHS[0]);
 }
 // src/schema/helpers.ts
@@ -1462,7 +1732,7 @@ async function freezeFailedExtractions(root, results, frozenSlugs) {
 }
 // src/compiler/orphan.ts
-import path10 from "path";
+import path13 from "path";
 async function markOrphaned(root, sourceFile, state) {
   const sourceEntry = state.sources[sourceFile];
   if (!sourceEntry) return;
@@ -1488,7 +1758,7 @@ async function orphanUnownedFrozenPages(root, frozenSlugs) {
   }
 }
 async function orphanPage(root, slug, reason) {
-  const pagePath = path10.join(root, CONCEPTS_DIR, `${slug}.md`);
+  const pagePath = path13.join(root, CONCEPTS_DIR, `${slug}.md`);
   const content = await safeReadFile(pagePath);
   if (!content) return;
   const { meta } = parseFrontmatter(content);
@@ -1499,18 +1769,18 @@ async function orphanPage(root, slug, reason) {
 }
 // src/compiler/resolver.ts
-import { readdir as readdir2, readFile as readFile7 } from "fs/promises";
-import path11 from "path";
+import { readdir as readdir2, readFile as readFile11 } from "fs/promises";
+import path14 from "path";
 import { existsSync as existsSync3 } from "fs";
 async function buildTitleIndex(root) {
-  const conceptsDir = path11.join(root, CONCEPTS_DIR);
+  const conceptsDir = path14.join(root, CONCEPTS_DIR);
   if (!existsSync3(conceptsDir)) return [];
   const files = await readdir2(conceptsDir);
   const pages = [];
   for (const file of files) {
     if (!file.endsWith(".md")) continue;
-    const filePath = path11.join(conceptsDir, file);
-    const content = await readFile7(filePath, "utf-8");
+    const filePath = path14.join(conceptsDir, file);
+    const content = await readFile11(filePath, "utf-8");
     const { meta } = parseFrontmatter(content);
     if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
       pages.push({
@@ -1596,7 +1866,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
   let count = 0;
   for (const page of titleIndex) {
     if (newSlugs.includes(page.slug)) continue;
-    const content = await readFile7(page.filePath, "utf-8");
+    const content = await readFile11(page.filePath, "utf-8");
     const { body } = parseFrontmatter(content);
     const linked = addWikilinks(body, newTitles, page.title);
     if (linked !== body) {
@@ -1608,7 +1878,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
   return count;
 }
 async function linkPage(page, titleIndex) {
-  const content = await readFile7(page.filePath, "utf-8");
+  const content = await readFile11(page.filePath, "utf-8");
   const { body } = parseFrontmatter(content);
   const linked = addWikilinks(body, titleIndex, page.title);
   if (linked === body) return false;
@@ -1619,17 +1889,17 @@ async function linkPage(page, titleIndex) {
 // src/compiler/indexgen.ts
 import { readdir as readdir3 } from "fs/promises";
-import path12 from "path";
+import path15 from "path";
 async function generateIndex(root) {
   status("*", info("Generating index..."));
-  const conceptsPath = path12.join(root, CONCEPTS_DIR);
-  const queriesPath = path12.join(root, QUERIES_DIR);
+  const conceptsPath = path15.join(root, CONCEPTS_DIR);
+  const queriesPath = path15.join(root, QUERIES_DIR);
   const concepts = await collectPageSummaries(conceptsPath);
   const queries = await collectPageSummaries(queriesPath);
   concepts.sort((a, b) => a.title.localeCompare(b.title));
   queries.sort((a, b) => a.title.localeCompare(b.title));
   const indexContent = buildIndexContent(concepts, queries);
-  const indexPath = path12.join(root, INDEX_FILE);
+  const indexPath = path15.join(root, INDEX_FILE);
   await atomicWrite(indexPath, indexContent);
   const total = concepts.length + queries.length;
   status("+", success(`Index updated with ${total} pages.`));
@@ -1643,7 +1913,7 @@ async function scanWikiPages(dirPath) {
   }
   const scanned = [];
   for (const file of files.filter((f) => f.endsWith(".md"))) {
-    const content = await safeReadFile(path12.join(dirPath, file));
+    const content = await safeReadFile(path15.join(dirPath, file));
     const { meta } = parseFrontmatter(content);
     scanned.push({ slug: file.replace(/\.md$/, ""), meta });
   }
@@ -1680,7 +1950,7 @@ function buildIndexContent(concepts, queries) {
 // src/compiler/obsidian.ts
 import { readdir as readdir4 } from "fs/promises";
-import path13 from "path";
+import path16 from "path";
 var ABBREVIATION_MIN_WORDS = 3;
 var SWAP_CONJUNCTIONS = [" and ", " or "];
 function addObsidianMeta(frontmatter, conceptTitle, tags) {
@@ -1722,11 +1992,11 @@ function generateAbbreviation(title) {
   return abbreviation;
 }
 async function generateMOC(root) {
-  const conceptsPath = path13.join(root, CONCEPTS_DIR);
+  const conceptsPath = path16.join(root, CONCEPTS_DIR);
   const pages = await loadConceptPages(conceptsPath);
   const tagGroups = groupPagesByTag(pages);
   const content = buildMOCContent(tagGroups);
-  await atomicWrite(path13.join(root, MOC_FILE), content);
+  await atomicWrite(path16.join(root, MOC_FILE), content);
 }
 async function loadConceptPages(conceptsPath) {
   let files;
@@ -1738,7 +2008,7 @@ async function loadConceptPages(conceptsPath) {
   const pages = [];
   for (const file of files) {
     if (!file.endsWith(".md")) continue;
-    const content = await safeReadFile(path13.join(conceptsPath, file));
+    const content = await safeReadFile(path16.join(conceptsPath, file));
     if (!content) continue;
     const { meta } = parseFrontmatter(content);
     if (meta.orphaned) continue;
@@ -1789,9 +2059,143 @@ function buildMOCContent(tagGroups) {
 }
 // src/utils/embeddings.ts
-import { readFile as readFile8, readdir as readdir5 } from "fs/promises";
+import { readFile as readFile12, readdir as readdir5 } from "fs/promises";
 import { existsSync as existsSync4 } from "fs";
-import path14 from "path";
+import path17 from "path";
+// src/utils/retrieval.ts
+import { createHash as createHash2 } from "crypto";
+function hashChunkText(text) {
+  return createHash2("sha256").update(text, "utf8").digest("hex").slice(0, 16);
+}
+function splitIntoChunks(body) {
+  const paragraphs = extractParagraphs(body);
+  if (paragraphs.length === 0) return [];
+  const chunks = [];
+  let buffer = "";
+  for (const paragraph of paragraphs) {
+    for (const piece of splitOversizedParagraph(paragraph)) {
+      buffer = appendParagraph(buffer, piece, chunks);
+    }
+  }
+  if (buffer.length > 0) chunks.push(buffer);
+  return mergeTrailingFragment(chunks);
+}
+function appendParagraph(buffer, paragraph, chunks) {
+  const candidate = buffer ? `${buffer}
+${paragraph}` : paragraph;
+  if (candidate.length <= CHUNK_TARGET_CHARS) return candidate;
+  if (buffer.length > 0) {
+    chunks.push(buffer);
+    return paragraph;
+  }
+  chunks.push(candidate);
+  return "";
+}
+function mergeTrailingFragment(chunks) {
+  if (chunks.length < 2) return chunks;
+  const last = chunks[chunks.length - 1];
+  if (last.length >= CHUNK_MIN_CHARS) return chunks;
+  const previous = chunks[chunks.length - 2];
+  if (previous.length + last.length + 2 > CHUNK_MAX_CHARS) return chunks;
+  const merged = chunks.slice(0, -2);
+  merged.push(`${previous}
+${last}`);
+  return merged;
+}
+function extractParagraphs(body) {
+  return body.split(/\n{2,}/).map((p) => p.trim()).filter((p) => p.length > 0);
+}
+function splitOversizedParagraph(paragraph) {
+  if (paragraph.length <= CHUNK_MAX_CHARS) return [paragraph];
+  const sentences = paragraph.split(/(?<=[.!?])\s+/);
+  const pieces = [];
+  let buffer = "";
+  for (const sentence of sentences) {
+    if ((buffer + " " + sentence).length > CHUNK_MAX_CHARS && buffer.length > 0) {
+      pieces.push(buffer.trim());
+      buffer = sentence;
+    } else {
+      buffer = buffer ? `${buffer} ${sentence}` : sentence;
+    }
+  }
+  if (buffer.length > 0) pieces.push(buffer.trim());
+  return pieces.flatMap(hardCut);
+}
+function hardCut(text) {
+  if (text.length <= CHUNK_MAX_CHARS) return [text];
+  const pieces = [];
+  for (let start = 0; start < text.length; start += CHUNK_MAX_CHARS) {
+    pieces.push(text.slice(start, start + CHUNK_MAX_CHARS));
+  }
+  return pieces;
+}
+function rerankWithBm25(query, candidates) {
+  if (candidates.length === 0) return [];
+  const queryTerms = tokenize(query);
+  if (queryTerms.length === 0) {
+    return candidates.map((candidate) => ({ candidate, score: candidate.baseScore }));
+  }
+  const docs = candidates.map((c) => tokenize(c.text));
+  const stats = buildCorpusStats(docs);
+  return rankByBm25Score(candidates, docs, queryTerms, stats);
+}
+function rankByBm25Score(candidates, docs, queryTerms, stats) {
+  const scored = candidates.map((candidate, index) => {
+    const lexical = bm25Score(queryTerms, docs[index], stats);
+    return { candidate, score: lexical + candidate.baseScore * BASE_SCORE_WEIGHT };
+  });
+  scored.sort((a, b) => b.score - a.score);
+  return scored;
+}
+function tokenize(text) {
+  return text.toLowerCase().match(/[a-z0-9]+/g) ?? [];
+}
+function buildCorpusStats(docs) {
+  const docFreq = /* @__PURE__ */ new Map();
+  let totalLen = 0;
+  for (const tokens of docs) {
+    totalLen += tokens.length;
+    const unique = new Set(tokens);
+    for (const term of unique) docFreq.set(term, (docFreq.get(term) ?? 0) + 1);
+  }
+  const totalDocs = docs.length;
+  const avgDocLen = totalDocs > 0 ? totalLen / totalDocs : 0;
+  return { docFreq, avgDocLen, totalDocs };
+}
+var BM25_K1 = 1.5;
+var BM25_B = 0.75;
+var BASE_SCORE_WEIGHT = 0.5;
+function bm25Score(queryTerms, docTokens, stats) {
+  if (docTokens.length === 0 || stats.totalDocs === 0) return 0;
+  const termFreq = countTerms(docTokens);
+  const lengthRatio = docTokens.length / (stats.avgDocLen || 1);
+  let total = 0;
+  for (const term of queryTerms) {
+    const tf = termFreq.get(term) ?? 0;
+    if (tf === 0) continue;
+    const idf = idfWeight(stats.docFreq.get(term) ?? 0, stats.totalDocs);
+    const numerator = tf * (BM25_K1 + 1);
+    const denominator = tf + BM25_K1 * (1 - BM25_B + BM25_B * lengthRatio);
+    total += idf * (numerator / denominator);
+  }
+  return total;
+}
+function idfWeight(docFrequency, totalDocs) {
+  const numerator = totalDocs - docFrequency + 0.5;
+  const denominator = docFrequency + 0.5;
+  return Math.log(1 + numerator / denominator);
+}
+function countTerms(tokens) {
+  const counts = /* @__PURE__ */ new Map();
+  for (const token of tokens) counts.set(token, (counts.get(token) ?? 0) + 1);
+  return counts;
+}
+// src/utils/embeddings.ts
+var STORE_VERSION = 2;
 function cosineSimilarity(a, b) {
   if (a.length !== b.length || a.length === 0) return 0;
   let dot = 0;
@@ -1813,24 +2217,27 @@ function findTopK(queryVec, store, k) {
   scored.sort((left, right) => right.score - left.score);
   return scored.slice(0, k).map((item) => item.entry);
 }
+function findTopKChunks(queryVec, chunks, k) {
+  const scored = chunks.map((chunk) => ({
+    chunk,
+    score: cosineSimilarity(queryVec, chunk.vector)
+  }));
+  scored.sort((left, right) => right.score - left.score);
+  return scored.slice(0, k);
+}
 async function readEmbeddingStore(root) {
-  const filePath = path14.join(root, EMBEDDINGS_FILE);
+  const filePath = path17.join(root, EMBEDDINGS_FILE);
   if (!existsSync4(filePath)) return null;
-  const raw = await readFile8(filePath, "utf-8");
+  const raw = await readFile12(filePath, "utf-8");
   return JSON.parse(raw);
 }
 async function writeEmbeddingStore(root, store) {
-  const filePath = path14.join(root, EMBEDDINGS_FILE);
+  const filePath = path17.join(root, EMBEDDINGS_FILE);
   await atomicWrite(filePath, JSON.stringify(store, null, 2));
 }
 async function findRelevantPages(root, question) {
-  const store = await readEmbeddingStore(root);
-  if (!store || store.entries.length === 0) return [];
-  const activeModel = resolveEmbeddingModel();
-  if (store.model !== activeModel) {
-    warnStaleEmbeddingStore(store.model, activeModel);
-    return [];
-  }
+  const store = await loadActiveStore(root, (s) => s.entries.length > 0);
+  if (!store) return [];
   const queryVec = await getProvider().embed(question);
   return findTopK(queryVec, store, EMBEDDING_TOP_K).map((entry) => ({
     slug: entry.slug,
@@ -1838,10 +2245,26 @@ async function findRelevantPages(root, question) {
     summary: entry.summary
   }));
 }
+async function findRelevantChunks(root, question, k) {
+  const store = await loadActiveStore(root, (s) => Boolean(s.chunks && s.chunks.length > 0));
+  if (!store) return [];
+  const queryVec = await getProvider().embed(question);
+  return findTopKChunks(queryVec, store.chunks ?? [], k);
+}
+async function loadActiveStore(root, hasContent) {
+  const store = await readEmbeddingStore(root);
+  if (!store || !hasContent(store)) return null;
+  const activeModel = resolveEmbeddingModel();
+  if (store.model !== activeModel) {
+    warnStaleEmbeddingStore(store.model, activeModel);
+    return null;
+  }
+  return store;
+}
 async function collectPageRecords(root) {
   const records = [];
   for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
-    const absDir = path14.join(root, dir);
+    const absDir = path17.join(root, dir);
     let files;
     try {
       files = await readdir5(absDir);
@@ -1849,18 +2272,23 @@ async function collectPageRecords(root) {
       continue;
     }
     for (const file of files.filter((f) => f.endsWith(".md"))) {
-      const content = await safeReadFile(path14.join(absDir, file));
-      const { meta } = parseFrontmatter(content);
-      if (meta.orphaned || typeof meta.title !== "string") continue;
-      records.push({
-        slug: file.replace(/\.md$/, ""),
-        title: meta.title,
-        summary: typeof meta.summary === "string" ? meta.summary : ""
-      });
+      const record = await readPageRecord(absDir, file);
+      if (record) records.push(record);
     }
   }
   return records;
 }
+async function readPageRecord(absDir, file) {
+  const content = await safeReadFile(path17.join(absDir, file));
+  const { meta, body } = parseFrontmatter(content);
+  if (meta.orphaned || typeof meta.title !== "string") return null;
+  return {
+    slug: file.replace(/\.md$/, ""),
+    title: meta.title,
+    summary: typeof meta.summary === "string" ? meta.summary : "",
+    body
+  };
+}
 function buildEmbeddingText(record) {
   return record.summary ? `${record.title}
@@ -1913,6 +2341,56 @@ function mergeEntries(existing, fresh, liveSlugs) {
   }
   return Array.from(bySlug.values());
 }
+async function refreshChunkEmbeddings(records, existing, forceAll) {
+  const liveSlugs = new Set(records.map((r) => r.slug));
+  const existingByKey = indexChunksByKey(existing.filter((c) => liveSlugs.has(c.slug)));
+  const now = (/* @__PURE__ */ new Date()).toISOString();
+  const fresh = [];
+  for (const record of records) {
+    const pageChunks = await embedRecordChunks(record, existingByKey, forceAll, now);
+    fresh.push(...pageChunks);
+  }
+  return fresh;
+}
+async function embedRecordChunks(record, existingByKey, forceAll, now) {
+  const provider = getProvider();
+  const chunkTexts = splitIntoChunks(record.body);
+  const out = [];
+  for (let i = 0; i < chunkTexts.length; i++) {
+    const text = chunkTexts[i];
+    const contentHash = hashChunkText(text);
+    const reused = pickReusableChunk(existingByKey, record.slug, i, contentHash, forceAll);
+    if (reused) {
+      out.push({ ...reused, title: record.title });
+      continue;
+    }
+    const vector = await provider.embed(text);
+    out.push({
+      slug: record.slug,
+      title: record.title,
+      chunkIndex: i,
+      contentHash,
+      text,
+      vector,
+      updatedAt: now
+    });
+  }
+  return out;
+}
+function indexChunksByKey(chunks) {
+  const byKey = /* @__PURE__ */ new Map();
+  for (const chunk of chunks) byKey.set(chunkKey(chunk.slug, chunk.chunkIndex), chunk);
+  return byKey;
+}
+function chunkKey(slug, chunkIndex) {
+  return `${slug}#${chunkIndex}`;
+}
+function pickReusableChunk(byKey, slug, chunkIndex, contentHash, forceAll) {
+  if (forceAll) return null;
+  const existing = byKey.get(chunkKey(slug, chunkIndex));
+  if (!existing) return null;
+  return existing.contentHash === contentHash ? existing : null;
+}
 async function updateEmbeddings(root, changedSlugs) {
   const records = await collectPageRecords(root);
   const liveSlugs = new Set(records.map((r) => r.slug));
@@ -1921,29 +2399,51 @@ async function updateEmbeddings(root, changedSlugs) {
   const modelChanged = Boolean(existingStore && existingStore.model !== embeddingModel);
   const toEmbed = new Set(changedSlugs.filter((slug) => liveSlugs.has(slug)));
   const previousEntries = modelChanged ? [] : existingStore?.entries ?? [];
-  if (!existingStore || modelChanged) {
+  const previousChunks = modelChanged ? [] : existingStore?.chunks ?? [];
+  const isEmptyStore = isStoreEmpty(existingStore);
+  if (!existingStore || modelChanged || isEmptyStore && liveSlugs.size > 0) {
     for (const record of records) toEmbed.add(record.slug);
   }
-  if (!modelChanged && toEmbed.size === 0 && previousEntries.every((e) => liveSlugs.has(e.slug))) {
+  if (!shouldRunEmbedding(modelChanged, toEmbed, previousEntries, previousChunks, liveSlugs)) {
     return;
   }
   const freshEntries = await embedPages(records, toEmbed);
   const mergedEntries = mergeEntries(previousEntries, freshEntries, liveSlugs);
-  const dimensions = mergedEntries[0]?.vector.length ?? 0;
+  const mergedChunks = await refreshChunkEmbeddings(records, previousChunks, modelChanged);
+  await persistRefreshedStore(root, embeddingModel, mergedEntries, mergedChunks);
+}
+async function persistRefreshedStore(root, embeddingModel, entries, chunks) {
+  const dimensions = entries[0]?.vector.length ?? chunks[0]?.vector.length ?? 0;
   const store = {
-    version: 1,
+    version: STORE_VERSION,
     model: embeddingModel,
     dimensions,
-    entries: mergedEntries
+    entries,
+    chunks
   };
   await writeEmbeddingStore(root, store);
-  status("*", dim(`Embeddings updated (${mergedEntries.length} pages).`));
+  status(
+    "*",
+    dim(`Embeddings updated (${entries.length} pages, ${chunks.length} chunks).`)
+  );
+}
+function isStoreEmpty(store) {
+  if (!store) return false;
+  return store.entries.length === 0 && (!store.chunks || store.chunks.length === 0);
+}
+function shouldRunEmbedding(modelChanged, toEmbed, previousEntries, previousChunks, liveSlugs) {
+  if (modelChanged) return true;
+  if (toEmbed.size > 0) return true;
+  if (!previousEntries.every((e) => liveSlugs.has(e.slug))) return true;
+  if (!previousChunks.every((c) => liveSlugs.has(c.slug))) return true;
+  if (previousEntries.length > 0 && previousChunks.length === 0 && liveSlugs.size > 0) return true;
+  return false;
 }
 // src/compiler/candidates.ts
 import { readdir as readdir6, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
 import { existsSync as existsSync5 } from "fs";
-import path15 from "path";
+import path18 from "path";
 import { randomBytes } from "crypto";
 var ID_SUFFIX_BYTES = 4;
 var CANDIDATE_EXT = ".json";
@@ -1952,10 +2452,10 @@ function buildCandidateId(slug) {
   return `${slug}-${suffix}`;
 }
 function candidatePath(root, id) {
-  return path15.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
+  return path18.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
 }
 function archivePath(root, id) {
-  return path15.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
+  return path18.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
 }
 async function writeCandidate(root, draft) {
   const candidate = {
@@ -2006,7 +2506,7 @@ function isValidCandidate(value) {
   return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
 }
 async function listCandidates(root) {
-  const dir = path15.join(root, CANDIDATES_DIR);
+  const dir = path18.join(root, CANDIDATES_DIR);
   if (!existsSync5(dir)) return [];
   const entries = await readdir6(dir, { withFileTypes: true });
   const candidates = [];
@@ -2033,7 +2533,7 @@ async function archiveCandidate(root, id) {
   const sourcePath = candidatePath(root, id);
   if (!existsSync5(sourcePath)) return false;
   const target = archivePath(root, id);
-  await mkdir5(path15.dirname(target), { recursive: true });
+  await mkdir5(path18.dirname(target), { recursive: true });
   try {
     await rename3(sourcePath, target);
   } catch {
@@ -2045,9 +2545,9 @@ async function archiveCandidate(root, id) {
 }
 // src/linter/rules.ts
-import { readdir as readdir7, readFile as readFile9 } from "fs/promises";
+import { readdir as readdir7, readFile as readFile13 } from "fs/promises";
 import { existsSync as existsSync6 } from "fs";
-import path16 from "path";
+import path19 from "path";
 var MIN_BODY_LENGTH = 50;
 var WIKILINK_PATTERN2 = /\[\[([^\]]+)\]\]/g;
 var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
@@ -2068,22 +2568,22 @@ async function readMarkdownFiles(dirPath) {
   const mdFiles = entries.filter((f) => f.endsWith(".md"));
   const results = await Promise.all(
     mdFiles.map(async (fileName) => {
-      const filePath = path16.join(dirPath, fileName);
-      const content = await readFile9(filePath, "utf-8");
+      const filePath = path19.join(dirPath, fileName);
+      const content = await readFile13(filePath, "utf-8");
       return { filePath, content };
     })
   );
   return results;
 }
 async function collectAllPages(root) {
-  const conceptPages = await readMarkdownFiles(path16.join(root, CONCEPTS_DIR));
-  const queryPages = await readMarkdownFiles(path16.join(root, QUERIES_DIR));
+  const conceptPages = await readMarkdownFiles(path19.join(root, CONCEPTS_DIR));
+  const queryPages = await readMarkdownFiles(path19.join(root, QUERIES_DIR));
   return [...conceptPages, ...queryPages];
 }
 function buildPageSlugSet(pages) {
   const slugs = /* @__PURE__ */ new Set();
   for (const page of pages) {
-    const baseName = path16.basename(page.filePath, ".md");
+    const baseName = path19.basename(page.filePath, ".md");
     slugs.add(baseName.toLowerCase());
   }
   return slugs;
@@ -2318,7 +2818,7 @@ function countLines(content) {
 }
 async function checkBrokenCitations(root) {
   const pages = await collectAllPages(root);
-  const sourcesDir = path16.join(root, SOURCES_DIR);
+  const sourcesDir = path19.join(root, SOURCES_DIR);
   const results = [];
   const lineCountCache = /* @__PURE__ */ new Map();
   for (const page of pages) {
@@ -2333,7 +2833,7 @@ async function collectBrokenForMarker(captured, line, pageFile, sourcesDir, line
     const trimmed = part.trim();
     if (trimmed.length === 0) continue;
     const filename = stripSpanSuffix(trimmed);
-    const citedPath = path16.join(sourcesDir, filename);
+    const citedPath = path19.join(sourcesDir, filename);
     if (!existsSync6(citedPath)) {
       out.push({
         rule: "broken-citation",
@@ -2387,7 +2887,7 @@ async function checkMalformedClaimCitations(root) {
 // src/compiler/page-renderer.ts
 import { readdir as readdir8 } from "fs/promises";
-import path17 from "path";
+import path20 from "path";
 // src/compiler/provenance.ts
 function addProvenanceMeta(fields, concept) {
@@ -2417,7 +2917,7 @@ function reportContradictionWarnings(conceptTitle, concept) {
 // src/compiler/page-renderer.ts
 var RELATED_PAGE_CONTEXT_LIMIT = 5;
 async function renderMergedPageContent(root, entry, schema) {
-  const pagePath = path17.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
+  const pagePath = path20.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
   const existingPage = await safeReadFile(pagePath);
   const relatedPages = await loadRelatedPages(root, entry.slug);
   const system = buildPagePrompt(
@@ -2456,7 +2956,7 @@ function buildMergedFrontmatter(entry, existingPage, schema) {
   return buildFrontmatter(frontmatterFields);
 }
 async function loadRelatedPages(root, excludeSlug) {
-  const conceptsPath = path17.join(root, CONCEPTS_DIR);
+  const conceptsPath = path20.join(root, CONCEPTS_DIR);
   let files;
   try {
     files = await readdir8(conceptsPath);
@@ -2466,7 +2966,7 @@ async function loadRelatedPages(root, excludeSlug) {
   const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
   const contents = [];
   for (const f of related) {
-    const content = await safeReadFile(path17.join(conceptsPath, f));
+    const content = await safeReadFile(path20.join(conceptsPath, f));
     if (!content) continue;
     const { meta } = parseFrontmatter(content);
     if (meta.orphaned) continue;
@@ -2667,9 +3167,9 @@ function printChangesSummary(changes) {
 }
 async function extractForSource(root, sourceFile) {
   status("*", info(`Extracting: ${sourceFile}`));
-  const sourcePath = path18.join(root, SOURCES_DIR, sourceFile);
-  const sourceContent = await readFile10(sourcePath, "utf-8");
-  const existingIndex = await safeReadFile(path18.join(root, INDEX_FILE));
+  const sourcePath = path21.join(root, SOURCES_DIR, sourceFile);
+  const sourceContent = await readFile14(sourcePath, "utf-8");
+  const existingIndex = await safeReadFile(path21.join(root, INDEX_FILE));
   const concepts = await extractConcepts(sourceContent, existingIndex);
   if (concepts.length > 0) {
     const names = concepts.map((c) => c.concept).join(", ");
@@ -2732,7 +3232,7 @@ async function generateMergedPage(root, entry, schema, options, sourceStates) {
   if (options.review) {
     return await persistReviewCandidate(root, entry, fullPage, sourceStates, schema);
   }
-  const pagePath = path18.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
+  const pagePath = path21.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
   const error2 = await writePageIfValid(pagePath, fullPage, entry.concept.concept);
   return { error: error2 ?? void 0 };
 }
@@ -2760,7 +3260,7 @@ async function generateSeedPages(root, schema, generation) {
 }
 async function generateSingleSeedPage(root, schema, seed) {
   const slug = slugify(seed.title);
-  const pagePath = path18.join(root, CONCEPTS_DIR, `${slug}.md`);
+  const pagePath = path21.join(root, CONCEPTS_DIR, `${slug}.md`);
   const relatedContent = await loadSeedRelatedPages(root, seed.relatedSlugs ?? []);
   const rule = schema.kinds[seed.kind];
   const system = buildSeedPagePrompt(seed, rule, relatedContent);
@@ -2792,7 +3292,7 @@ async function loadSeedRelatedPages(root, slugs) {
   if (slugs.length === 0) return "";
   const contents = [];
   for (const slug of slugs) {
-    const pagePath = path18.join(root, CONCEPTS_DIR, `${slug}.md`);
+    const pagePath = path21.join(root, CONCEPTS_DIR, `${slug}.md`);
     const content = await safeReadFile(pagePath);
     if (content) contents.push(content);
   }
@@ -2847,7 +3347,7 @@ async function compileCommand(options = {}) {
 // src/commands/query.ts
 import { existsSync as existsSync8 } from "fs";
-import path19 from "path";
+import path22 from "path";
 var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
 var PAGE_SELECTION_TOOL = {
   name: "select_pages",
@@ -2895,16 +3395,92 @@ ${indexContent}`;
 function buildFilteredIndex(candidates) {
   return candidates.map((entry) => `- **${entry.slug}**: ${entry.title} \u2014 ${entry.summary}`).join("\n");
 }
-async function selectRelevantPages(root, question) {
+async function selectRelevantPages(root, question, debug) {
+  const chunkSelection = await trySelectViaChunks(root, question, debug);
+  if (chunkSelection) return chunkSelection;
   const candidates = await tryFindRelevantPages(root, question);
   if (candidates.length > 0) {
     const filteredIndex = buildFilteredIndex(candidates);
     const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
-    return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2 };
+    return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2, chunks: [] };
   }
-  const indexContent = await safeReadFile(path19.join(root, INDEX_FILE));
+  const indexContent = await safeReadFile(path22.join(root, INDEX_FILE));
   const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
-  return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning };
+  return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning, chunks: [] };
+}
+async function trySelectViaChunks(root, question, debug) {
+  const ranked = await tryFindRelevantChunks(root, question);
+  if (ranked.length === 0) return null;
+  const reranked = rerankWithBm25(
+    question,
+    ranked.map(({ chunk, score }) => ({ text: chunk.text, baseScore: score, chunk }))
+  );
+  const kept = reranked.slice(0, CHUNK_RERANK_KEEP);
+  const reorderingHappened = wasReordered(ranked, kept.map((k) => k.candidate.chunk));
+  const chunkCitations = toChunkCitations(kept);
+  const pageSlugs = collapseToPages(chunkCitations, QUERY_PAGE_LIMIT);
+  const reasoning = buildChunkReasoning(chunkCitations, pageSlugs);
+  return {
+    pages: pageSlugs,
+    rawPages: pageSlugs,
+    reasoning,
+    chunks: chunkCitations,
+    debug: debug ? buildDebug(chunkCitations, pageSlugs, reorderingHappened) : void 0
+  };
+}
+function wasReordered(before, after) {
+  const limit = Math.min(before.length, after.length);
+  for (let i = 0; i < limit; i++) {
+    if (before[i].chunk !== after[i]) return true;
+  }
+  return false;
+}
+function toChunkCitations(ranked) {
+  return ranked.map(({ candidate, score }) => ({
+    slug: candidate.chunk.slug,
+    title: candidate.chunk.title,
+    chunkIndex: candidate.chunk.chunkIndex,
+    score,
+    text: candidate.chunk.text
+  }));
+}
+function collapseToPages(chunks, limit) {
+  const slugs = [];
+  const seen = /* @__PURE__ */ new Set();
+  for (const chunk of chunks) {
+    if (seen.has(chunk.slug)) continue;
+    seen.add(chunk.slug);
+    slugs.push(chunk.slug);
+    if (slugs.length >= limit) break;
+  }
+  return slugs;
+}
+function buildChunkReasoning(chunks, pages) {
+  const top = chunks.slice(0, pages.length);
+  const summary = top.map((c) => `${c.slug}#${c.chunkIndex} (${c.score.toFixed(3)})`).join(", ");
+  return `Selected ${pages.length} page(s) from ${chunks.length} reranked chunks: ${summary}`;
+}
+function buildDebug(chunks, pageSlugs, reranked) {
+  const bestPerPage = /* @__PURE__ */ new Map();
+  for (const c of chunks) {
+    const prev = bestPerPage.get(c.slug);
+    if (prev === void 0 || c.score > prev) bestPerPage.set(c.slug, c.score);
+  }
+  return {
+    pages: pageSlugs.map((slug) => ({ slug, score: bestPerPage.get(slug) ?? 0 })),
+    chunks,
+    usedChunks: true,
+    reranked
+  };
+}
+async function tryFindRelevantChunks(root, question) {
+  try {
+    return await findRelevantChunks(root, question, CHUNK_TOP_K);
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    status("!", dim(`Chunk pre-filter unavailable (${message}); falling back.`));
+    return [];
+  }
 }
 async function tryFindRelevantPages(root, question) {
   try {
@@ -2920,7 +3496,7 @@ async function loadSelectedPages(root, slugs) {
   for (const slug of slugs) {
     let content = "";
     for (const dir of PAGE_DIRS) {
-      const candidate = await safeReadFile(path19.join(root, dir, `${slug}.md`));
+      const candidate = await safeReadFile(path22.join(root, dir, `${slug}.md`));
       if (!candidate) continue;
       const { meta } = parseFrontmatter(candidate);
       if (meta.orphaned) continue;
@@ -2937,11 +3513,12 @@ ${content}`);
   return sections.join("\n\n");
 }
 var ANSWER_SYSTEM_PROMPT = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
-async function callAnswerLLM(question, pagesContent, onToken) {
+async function callAnswerLLM(question, pagesContent, chunks, onToken) {
+  const provenance = chunks.length > 0 ? buildChunkProvenance(chunks) : "";
   const userMessage = `Question: ${question}
 Relevant wiki pages:
-${pagesContent}`;
+${pagesContent}${provenance}`;
   return callClaude({
     system: ANSWER_SYSTEM_PROMPT,
     messages: [{ role: "user", content: userMessage }],
@@ -2949,6 +3526,16 @@ ${pagesContent}`;
     onToken
   });
 }
+function buildChunkProvenance(chunks) {
+  const sections = chunks.map(
+    (chunk) => `--- ${chunk.slug} (chunk ${chunk.chunkIndex}) ---
+${chunk.text}`
+  );
+  return `
+Most relevant excerpts (from chunk-level retrieval):
+${sections.join("\n\n")}`;
+}
 function summarizeAnswer(answer) {
   const firstLine = answer.trim().split(/\n/)[0] ?? "";
   const firstSentence = firstLine.split(/(?<=[.!?])\s/)[0] ?? firstLine;
@@ -2956,7 +3543,7 @@ function summarizeAnswer(answer) {
 }
 async function saveQueryPage(root, question, answer) {
   const slug = slugify(question);
-  const filePath = path19.join(root, QUERIES_DIR, `${slug}.md`);
+  const filePath = path22.join(root, QUERIES_DIR, `${slug}.md`);
   const frontmatter = buildFrontmatter({
     title: question,
     summary: summarizeAnswer(answer),
@@ -2982,30 +3569,42 @@ ${answer}
   return slug;
 }
 async function generateAnswer(root, question, options = {}) {
-  if (!existsSync8(path19.join(root, INDEX_FILE))) {
+  if (!existsSync8(path22.join(root, INDEX_FILE))) {
     throw new Error("Wiki index not found. Run `llmwiki compile` first.");
   }
-  const { pages, reasoning } = await selectRelevantPages(root, question);
-  options.onPageSelection?.(pages, reasoning);
-  const pagesContent = await loadSelectedPages(root, pages);
+  const selection = await selectRelevantPages(root, question, Boolean(options.debug));
+  options.onPageSelection?.(selection.pages, selection.reasoning);
+  const pagesContent = await loadSelectedPages(root, selection.pages);
   if (!pagesContent) {
-    return { answer: "", selectedPages: pages, reasoning };
-  }
-  const answer = await callAnswerLLM(question, pagesContent, options.onToken);
-  let saved;
-  if (options.save) {
-    saved = await saveQueryPage(root, question, answer);
+    return buildEmptyResult(selection);
   }
-  return { answer, selectedPages: pages, reasoning, saved };
+  const answer = await callAnswerLLM(question, pagesContent, selection.chunks, options.onToken);
+  const saved = options.save ? await saveQueryPage(root, question, answer) : void 0;
+  return {
+    answer,
+    selectedPages: selection.pages,
+    reasoning: selection.reasoning,
+    saved,
+    debug: selection.debug
+  };
+}
+function buildEmptyResult(selection) {
+  return {
+    answer: "",
+    selectedPages: selection.pages,
+    reasoning: selection.reasoning,
+    debug: selection.debug
+  };
 }
 async function queryCommand(root, question, options) {
-  if (!existsSync8(path19.join(root, INDEX_FILE))) {
+  if (!existsSync8(path22.join(root, INDEX_FILE))) {
     status("!", error("Wiki index not found. Run `llmwiki compile` first."));
     return;
   }
   header("Selecting relevant pages");
   const result = await generateAnswer(root, question, {
     save: options.save,
+    debug: options.debug,
     onToken: (text) => process.stdout.write(text),
     onPageSelection: (pages, reasoning) => {
       status("i", dim(`Reasoning: ${reasoning}`));
@@ -3014,6 +3613,7 @@ async function queryCommand(root, question, options) {
     }
   });
   process.stdout.write("\n");
+  if (result.debug) printDebugSnapshot(result.debug);
   if (!result.answer) {
     status("!", error("No matching pages found. Try refining your question."));
     return;
@@ -3024,14 +3624,34 @@ async function queryCommand(root, question, options) {
     status("\u2192", dim("Tip: use --save to add this answer to your wiki"));
   }
 }
+function printDebugSnapshot(debug) {
+  header("Retrieval debug");
+  status(
+    "i",
+    dim(
+      `Source: ${debug.usedChunks ? "chunk-level" : "page-level"}; reranked: ${debug.reranked ? "yes" : "no"}`
+    )
+  );
+  for (const page of debug.pages) {
+    status("\u2022", `${page.slug} (best chunk score ${page.score.toFixed(3)})`);
+  }
+  for (const chunk of debug.chunks) {
+    const preview = chunk.text.slice(0, DEBUG_CHUNK_PREVIEW_CHARS).replace(/\s+/g, " ").trim();
+    status(
+      "\xB7",
+      dim(`${chunk.slug}#${chunk.chunkIndex} score=${chunk.score.toFixed(3)} :: ${preview}\u2026`)
+    );
+  }
+}
+var DEBUG_CHUNK_PREVIEW_CHARS = 120;
 // src/commands/watch.ts
 import { watch as chokidarWatch } from "chokidar";
 import { existsSync as existsSync9 } from "fs";
-import path20 from "path";
+import path23 from "path";
 var DEBOUNCE_MS = 500;
 async function watchCommand() {
-  const sourcesPath = path20.resolve(SOURCES_DIR);
+  const sourcesPath = path23.resolve(SOURCES_DIR);
   if (!existsSync9(sourcesPath)) {
     status(
       "!",
@@ -3066,7 +3686,7 @@ async function watchCommand() {
   const scheduleCompile = (eventPath, event) => {
     status(
       "~",
-      dim(`${event}: ${path20.basename(eventPath)}`)
+      dim(`${event}: ${path23.basename(eventPath)}`)
     );
     if (debounceTimer) clearTimeout(debounceTimer);
     debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
@@ -3153,7 +3773,7 @@ async function lintCommand() {
 // src/commands/schema.ts
 import { existsSync as existsSync10 } from "fs";
 import { mkdir as mkdir6, writeFile as writeFile5 } from "fs/promises";
-import path21 from "path";
+import path24 from "path";
 async function schemaInitCommand() {
   const root = process.cwd();
   const defaults = buildDefaultSchema();
@@ -3162,7 +3782,7 @@ async function schemaInitCommand() {
     status("!", warn(`Schema file already exists at ${targetPath}`));
     return;
   }
-  await mkdir6(path21.dirname(targetPath), { recursive: true });
+  await mkdir6(path24.dirname(targetPath), { recursive: true });
   const serializable = {
     version: defaults.version,
     defaultKind: defaults.defaultKind,
@@ -3221,7 +3841,7 @@ async function reviewShowCommand(id) {
 }
 // src/commands/review-approve.ts
-import path22 from "path";
+import path25 from "path";
 // src/commands/review-helpers.ts
 async function runReviewUnderLock(id, underLock) {
@@ -3253,7 +3873,7 @@ async function approveUnderLock(root, id) {
     process.exitCode = 1;
     return;
   }
-  const pagePath = path22.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
+  const pagePath = path25.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
   await atomicWrite(pagePath, candidate.body);
   status("+", success(`Approved \u2192 ${source(pagePath)}`));
   await persistCandidateSourceStates(root, candidate);
@@ -3313,7 +3933,7 @@ import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 // src/mcp/tools.ts
-import path23 from "path";
+import path26 from "path";
 import { z } from "zod";
 // src/mcp/provider-check.ts
@@ -3406,15 +4026,16 @@ function registerQueryTool(server, root) {
     "query_wiki",
     {
       title: "Query Wiki",
-      description: "Ask a natural-language question. Selects relevant pages with the LLM, loads them, and returns a grounded answer with citations. Set save=true to persist the answer as a wiki page. Requires an LLM provider.",
+      description: "Ask a natural-language question. Selects relevant pages with the LLM, loads them, and returns a grounded answer with citations. Set save=true to persist the answer as a wiki page. Set debug=true to include the selected chunks and their scores. Requires an LLM provider.",
       inputSchema: {
         question: z.string().describe("The natural-language question to answer."),
-        save: z.boolean().optional().describe("Persist the answer as a wiki/queries/ page when true.")
+        save: z.boolean().optional().describe("Persist the answer as a wiki/queries/ page when true."),
+        debug: z.boolean().optional().describe("Include retrieval debug info (selected chunks/pages + scores).")
       }
     },
-    async ({ question, save }) => {
+    async ({ question, save, debug }) => {
       ensureProviderAvailable();
-      const result = await generateAnswer(root, question, { save });
+      const result = await generateAnswer(root, question, { save, debug });
       return jsonResult(result);
     }
   );
@@ -3438,15 +4059,30 @@ function registerSearchTool(server, root) {
   );
 }
 async function pickSearchSlugs(root, question) {
+  try {
+    const chunks = await findRelevantChunks(root, question, CHUNK_TOP_K);
+    if (chunks.length > 0) return dedupePreservingOrder(chunks.map((c) => c.chunk.slug));
+  } catch {
+  }
   try {
     const candidates = await findRelevantPages(root, question);
     if (candidates.length > 0) return candidates.map((c) => c.slug);
   } catch {
   }
-  const indexContent = await safeReadFile(path23.join(root, INDEX_FILE));
+  const indexContent = await safeReadFile(path26.join(root, INDEX_FILE));
   const { pages } = await selectPages(question, indexContent);
   return pages;
 }
+function dedupePreservingOrder(slugs) {
+  const seen = /* @__PURE__ */ new Set();
+  const out = [];
+  for (const slug of slugs) {
+    if (seen.has(slug)) continue;
+    seen.add(slug);
+    out.push(slug);
+  }
+  return out;
+}
 function registerReadTool(server, root) {
   server.registerTool(
     "read_page",
@@ -3492,8 +4128,8 @@ function registerStatusTool(server, root) {
   );
 }
 async function collectStatus(root) {
-  const concepts = await collectPageSummaries(path23.join(root, CONCEPTS_DIR));
-  const queries = await collectPageSummaries(path23.join(root, QUERIES_DIR));
+  const concepts = await collectPageSummaries(path26.join(root, CONCEPTS_DIR));
+  const queries = await collectPageSummaries(path26.join(root, QUERIES_DIR));
   const state = await readState(root);
   const changes = await detectChanges(root, state);
   const orphans = await findOrphanedSlugs(root);
@@ -3510,7 +4146,7 @@ async function collectStatus(root) {
   };
 }
 async function findOrphanedSlugs(root) {
-  const scanned = await scanWikiPages(path23.join(root, CONCEPTS_DIR));
+  const scanned = await scanWikiPages(path26.join(root, CONCEPTS_DIR));
   return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
 }
 async function loadPageRecords(root, slugs) {
@@ -3523,7 +4159,7 @@ async function loadPageRecords(root, slugs) {
 }
 async function readPage(root, slug) {
   for (const dir of PAGE_DIRS2) {
-    const content = await safeReadFile(path23.join(root, dir, `${slug}.md`));
+    const content = await safeReadFile(path26.join(root, dir, `${slug}.md`));
     if (!content) continue;
     const { meta, body } = parseFrontmatter(content);
     if (meta.orphaned) continue;
@@ -3538,7 +4174,7 @@ async function readPage(root, slug) {
 }
 // src/mcp/resources.ts
-import path24 from "path";
+import path27 from "path";
 import { readdir as readdir9 } from "fs/promises";
 import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
 function jsonContent(uri, payload) {
@@ -3572,7 +4208,7 @@ function registerIndexResource(server, root) {
       mimeType: "text/markdown"
     },
     async (uri) => {
-      const content = await safeReadFile(path24.join(root, INDEX_FILE));
+      const content = await safeReadFile(path27.join(root, INDEX_FILE));
       return { contents: [markdownContent(uri, content)] };
     }
   );
@@ -3639,7 +4275,7 @@ function registerQueryResource(server, root) {
   );
 }
 async function listSources(root) {
-  const sourcesPath = path24.join(root, SOURCES_DIR);
+  const sourcesPath = path27.join(root, SOURCES_DIR);
   let files;
   try {
     files = await readdir9(sourcesPath);
@@ -3648,14 +4284,14 @@ async function listSources(root) {
   }
   const records = [];
   for (const file of files.filter((f) => f.endsWith(".md"))) {
-    const content = await safeReadFile(path24.join(sourcesPath, file));
+    const content = await safeReadFile(path27.join(sourcesPath, file));
     const { meta } = parseFrontmatter(content);
     records.push({ filename: file, ...meta });
   }
   return records;
 }
 async function loadPageWithMeta(root, dir, slug) {
-  const filePath = path24.join(root, dir, `${slug}.md`);
+  const filePath = path27.join(root, dir, `${slug}.md`);
   const content = await safeReadFile(filePath);
   if (!content) {
     throw new Error(`Page not found: ${dir}/${slug}.md`);
@@ -3664,7 +4300,7 @@ async function loadPageWithMeta(root, dir, slug) {
   return { slug, meta, body: body.trim() };
 }
 async function listPagesUnder(root, dir, scheme) {
-  const pagesPath = path24.join(root, dir);
+  const pagesPath = path27.join(root, dir);
   let files;
   try {
     files = await readdir9(pagesPath);
@@ -3748,7 +4384,7 @@ reviewCommand.command("reject <id>").description("Reject a candidate and archive
     process.exit(1);
   }
 });
-program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").action(async (question, options) => {
+program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").option("--debug", "Print which pages and chunks were selected and their scores").action(async (question, options) => {
   try {
     requireProvider();
     await queryCommand(process.cwd(), question, options);