npm - llm-wiki-compiler - Versions diffs - 0.3.0 → 0.5.0 - Mend

llm-wiki-compiler 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/cli.js CHANGED Viewed

@@ -6,13 +6,15 @@ import { createRequire } from "module";
 import { Command } from "commander";
 // src/commands/ingest.ts
-import path3 from "path";
-import { mkdir as mkdir2, writeFile as writeFile2 } from "fs/promises";
+import path7 from "path";
+import { mkdir as mkdir2, readFile as readFile6, writeFile as writeFile2 } from "fs/promises";
 // src/utils/markdown.ts
 import { writeFile, rename, readFile, mkdir } from "fs/promises";
 import path from "path";
 import yaml from "js-yaml";
+var SPAN_SUFFIX_PATTERN = /^(?<file>[^:#]+)(?:(?::(?<colonStart>\d+)(?:-(?<colonEnd>\d+))?)|(?:#L(?<hashStart>\d+)(?:-L(?<hashEnd>\d+))?))?$/;
+var MIN_LINE_NUMBER = 1;
 var VALID_PROVENANCE_STATES = /* @__PURE__ */ new Set([
   "extracted",
   "merged",
@@ -49,6 +51,23 @@ async function atomicWrite(filePath, content) {
   await writeFile(tmpPath, content, "utf-8");
   await rename(tmpPath, filePath);
 }
+function isValidLineRange(start, end) {
+  return start >= MIN_LINE_NUMBER && end >= start;
+}
+function isMalformedCitationEntry(entry) {
+  const trimmed = entry.trim();
+  if (trimmed.length === 0) return true;
+  if (!trimmed.includes(":") && !trimmed.includes("#")) return false;
+  const match = SPAN_SUFFIX_PATTERN.exec(trimmed);
+  if (!match || !match.groups) return true;
+  const { colonStart, colonEnd, hashStart, hashEnd } = match.groups;
+  const start = colonStart ?? hashStart;
+  const end = colonEnd ?? hashEnd;
+  if (start === void 0) return false;
+  const startLine = Number(start);
+  const endLine = end === void 0 ? startLine : Number(end);
+  return !isValidLineRange(startLine, endLine);
+}
 async function safeReadFile(filePath) {
   try {
     return await readFile(filePath, "utf-8");
@@ -120,6 +139,8 @@ var PROVIDER_MODELS = {
   minimax: "MiniMax-M2.7"
 };
 var OLLAMA_DEFAULT_HOST = "http://localhost:11434/v1";
+var OPENAI_DEFAULT_TIMEOUT_MS = 10 * 60 * 1e3;
+var OLLAMA_DEFAULT_TIMEOUT_MS = 30 * 60 * 1e3;
 var SOURCES_DIR = "sources";
 var CONCEPTS_DIR = "wiki/concepts";
 var QUERIES_DIR = "wiki/queries";
@@ -129,9 +150,17 @@ var LOCK_FILE = ".llmwiki/lock";
 var INDEX_FILE = "wiki/index.md";
 var MOC_FILE = "wiki/MOC.md";
 var EMBEDDINGS_FILE = ".llmwiki/embeddings.json";
+var IMAGE_EXTENSIONS = /* @__PURE__ */ new Set([".jpg", ".jpeg", ".png", ".gif", ".webp"]);
+var TRANSCRIPT_EXTENSIONS = /* @__PURE__ */ new Set([".vtt", ".srt"]);
+var IMAGE_DESCRIBE_MAX_TOKENS = 2048;
 var CANDIDATES_DIR = ".llmwiki/candidates";
 var CANDIDATES_ARCHIVE_DIR = ".llmwiki/candidates/archive";
 var EMBEDDING_TOP_K = 15;
+var CHUNK_TOP_K = 30;
+var CHUNK_RERANK_KEEP = 12;
+var CHUNK_TARGET_CHARS = 800;
+var CHUNK_MAX_CHARS = 1400;
+var CHUNK_MIN_CHARS = 200;
 var LOW_CONFIDENCE_THRESHOLD = 0.5;
 var MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS = 2;
 var EMBEDDING_MODELS = {
@@ -216,19 +245,24 @@ async function ingestWeb(url) {
 // src/ingest/file.ts
 import { readFile as readFile2 } from "fs/promises";
+import path3 from "path";
+// src/ingest/shared.ts
 import path2 from "path";
-var SUPPORTED_EXTENSIONS = /* @__PURE__ */ new Set([".md", ".txt"]);
 function titleFromFilename(filePath) {
   const basename = path2.basename(filePath, path2.extname(filePath));
   return basename.replace(/[-_]+/g, " ").trim();
 }
+// src/ingest/file.ts
+var SUPPORTED_EXTENSIONS = /* @__PURE__ */ new Set([".md", ".txt"]);
 function wrapPlainText(text) {
   return `\`\`\`
 ${text}
 \`\`\``;
 }
 async function ingestFile(filePath) {
-  const ext = path2.extname(filePath).toLowerCase();
+  const ext = path3.extname(filePath).toLowerCase();
   if (!SUPPORTED_EXTENSIONS.has(ext)) {
     throw new Error(
       `Unsupported file type "${ext}". Only .md and .txt files are supported.`
@@ -240,208 +274,36 @@ async function ingestFile(filePath) {
   return { title, content };
 }
-// src/commands/ingest.ts
-function isUrl(source2) {
-  return source2.startsWith("http://") || source2.startsWith("https://");
-}
-function enforceCharLimit(content) {
-  if (content.length <= MAX_SOURCE_CHARS) {
-    return { content, truncated: false, originalChars: content.length };
-  }
-  status(
-    "!",
-    warn(
-      `Content truncated from ${content.length.toLocaleString()} to ${MAX_SOURCE_CHARS.toLocaleString()} characters.`
-    )
-  );
-  return {
-    content: content.slice(0, MAX_SOURCE_CHARS),
-    truncated: true,
-    originalChars: content.length
-  };
-}
-function enforceMinContent(content) {
-  const length = content.trim().length;
-  if (length === 0) {
-    throw new Error(
-      "No readable content could be extracted from the source."
-    );
-  }
-  if (length < MIN_SOURCE_CHARS) {
-    status(
-      "!",
-      warn(
-        `Content seems very short (${length} chars, minimum recommended is ${MIN_SOURCE_CHARS}).`
-      )
-    );
-  }
-}
-function buildDocument(title, source2, result) {
-  const meta = {
-    title,
-    source: source2,
-    ingestedAt: (/* @__PURE__ */ new Date()).toISOString()
-  };
-  if (result.truncated) {
-    meta.truncated = true;
-    meta.originalChars = result.originalChars;
+// src/ingest/pdf.ts
+import { readFile as readFile3 } from "fs/promises";
+function resolveTitle(filePath, info2) {
+  if (info2 && typeof info2 === "object") {
+    const titleField = info2["Title"];
+    if (typeof titleField === "string" && titleField.trim().length > 0) {
+      return titleField.trim();
+    }
   }
-  const frontmatter = buildFrontmatter(meta);
-  return `${frontmatter}
-${result.content}
-`;
-}
-async function saveSource(title, document) {
-  const filename = `${slugify(title)}.md`;
-  const destPath = path3.join(SOURCES_DIR, filename);
-  await mkdir2(SOURCES_DIR, { recursive: true });
-  await writeFile2(destPath, document, "utf-8");
-  return destPath;
-}
-async function ingestSource(source2) {
-  status("*", info(`Ingesting: ${source2}`));
-  const { title, content } = isUrl(source2) ? await ingestWeb(source2) : await ingestFile(source2);
-  const result = enforceCharLimit(content);
-  enforceMinContent(result.content);
-  const document = buildDocument(title, source2, result);
-  const savedPath = await saveSource(title, document);
-  return {
-    filename: path3.basename(savedPath),
-    charCount: result.content.length,
-    truncated: result.truncated,
-    source: source2
-  };
-}
-async function ingest(source2) {
-  const result = await ingestSource(source2);
-  const savedPath = path3.join(SOURCES_DIR, result.filename);
-  status(
-    "+",
-    success(`Saved ${bold(result.filename)} \u2192 ${source(savedPath)}`)
-  );
-  status("\u2192", dim("Next: llmwiki compile"));
-}
-// src/commands/compile.ts
-import { existsSync as existsSync5 } from "fs";
-// src/compiler/index.ts
-import { readFile as readFile8 } from "fs/promises";
-import path16 from "path";
-// src/utils/state.ts
-import { readFile as readFile3, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
-import { existsSync } from "fs";
-import path4 from "path";
-function emptyState() {
-  return { version: 1, indexHash: "", sources: {} };
+  return titleFromFilename(filePath);
 }
-async function readState(root) {
-  const filePath = path4.join(root, STATE_FILE);
-  if (!existsSync(filePath)) {
-    return emptyState();
-  }
+async function ingestPdf(filePath) {
+  const { PDFParse } = await import("pdf-parse");
+  const buffer = await readFile3(filePath);
+  const parser = new PDFParse({ data: new Uint8Array(buffer) });
   try {
-    const raw = await readFile3(filePath, "utf-8");
-    return JSON.parse(raw);
-  } catch {
-    const bakPath = filePath + ".bak";
-    console.warn(`\u26A0 Corrupt state.json \u2014 backed up to ${bakPath}, starting fresh.`);
-    await copyFile(filePath, bakPath);
-    return emptyState();
+    const textResult = await parser.getText();
+    const infoResult = await parser.getInfo();
+    const title = resolveTitle(filePath, infoResult.info);
+    const content = textResult.text.trim();
+    return { title, content };
+  } finally {
+    await parser.destroy();
   }
 }
-async function writeState(root, state) {
-  const dir = path4.join(root, LLMWIKI_DIR);
-  await mkdir3(dir, { recursive: true });
-  const filePath = path4.join(root, STATE_FILE);
-  const tmpPath = filePath + ".tmp";
-  await writeFile3(tmpPath, JSON.stringify(state, null, 2), "utf-8");
-  await rename2(tmpPath, filePath);
-}
-async function updateSourceState(root, sourceFile, entry) {
-  const state = await readState(root);
-  state.sources[sourceFile] = entry;
-  await writeState(root, state);
-}
-async function removeSourceState(root, sourceFile) {
-  const state = await readState(root);
-  delete state.sources[sourceFile];
-  await writeState(root, state);
-}
-// src/compiler/source-state.ts
-import path6 from "path";
-// src/compiler/hasher.ts
-import { createHash } from "crypto";
-import { readFile as readFile4, readdir } from "fs/promises";
+// src/ingest/image.ts
+import { readFile as readFile4 } from "fs/promises";
 import path5 from "path";
-async function hashFile(filePath) {
-  const content = await readFile4(filePath, "utf-8");
-  return createHash("sha256").update(content).digest("hex");
-}
-async function detectChanges(root, prevState) {
-  const sourcesPath = path5.join(root, SOURCES_DIR);
-  const currentFiles = await listSourceFiles(sourcesPath);
-  const changes = [];
-  for (const file of currentFiles) {
-    const status2 = await classifyFile(root, file, prevState);
-    changes.push({ file, status: status2 });
-  }
-  const deletedChanges = findDeletedFiles(currentFiles, prevState);
-  changes.push(...deletedChanges);
-  return changes;
-}
-async function listSourceFiles(sourcesPath) {
-  try {
-    const entries = await readdir(sourcesPath);
-    return entries.filter((f) => f.endsWith(".md"));
-  } catch {
-    return [];
-  }
-}
-async function classifyFile(root, file, prevState) {
-  const filePath = path5.join(root, SOURCES_DIR, file);
-  const hash = await hashFile(filePath);
-  const prev = prevState.sources[file];
-  if (!prev) return "new";
-  if (prev.hash !== hash) return "changed";
-  return "unchanged";
-}
-function findDeletedFiles(currentFiles, prevState) {
-  const currentSet = new Set(currentFiles);
-  return Object.keys(prevState.sources).filter((file) => !currentSet.has(file)).map((file) => ({ file, status: "deleted" }));
-}
-// src/compiler/source-state.ts
-async function buildExtractionSourceStates(root, extractions) {
-  const snapshot = {};
-  const compiledAt = (/* @__PURE__ */ new Date()).toISOString();
-  for (const result of extractions) {
-    if (result.concepts.length === 0) continue;
-    snapshot[result.sourceFile] = await buildEntry(root, result, compiledAt);
-  }
-  return snapshot;
-}
-async function buildEntry(root, result, compiledAt) {
-  const filePath = path6.join(root, SOURCES_DIR, result.sourceFile);
-  const hash = await hashFile(filePath);
-  return {
-    hash,
-    concepts: result.concepts.map((concept) => slugify(concept.concept)),
-    compiledAt
-  };
-}
-function pickStatesForSources(allStates, sourceFiles) {
-  const picked = {};
-  for (const file of sourceFiles) {
-    const entry = allStates[file];
-    if (entry) picked[file] = entry;
-  }
-  return picked;
-}
+import Anthropic2 from "@anthropic-ai/sdk";
 // src/providers/anthropic.ts
 import Anthropic from "@anthropic-ai/sdk";
@@ -554,160 +416,46 @@ var AnthropicProvider = class {
   }
 };
-// src/providers/openai.ts
-import OpenAI from "openai";
-function translateToolToOpenAI(tool) {
-  return {
-    type: "function",
-    function: {
-      name: tool.name,
-      description: tool.description,
-      parameters: tool.input_schema
+// src/utils/claude-settings.ts
+import { readFileSync } from "fs";
+import { homedir } from "os";
+import path4 from "path";
+var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
+function isRecord(value) {
+  return typeof value === "object" && value !== null;
+}
+function normalize(value) {
+  if (typeof value !== "string") return void 0;
+  const trimmed = value.trim();
+  return trimmed.length > 0 ? trimmed : void 0;
+}
+function resolveClaudeSettingsPath(env) {
+  return env[CLAUDE_SETTINGS_PATH_ENV] ?? path4.join(homedir(), ".claude", "settings.json");
+}
+function readClaudeSettingsFile(settingsPath) {
+  try {
+    return readFileSync(settingsPath, "utf8");
+  } catch (err) {
+    if (isRecord(err) && err.code === "ENOENT") {
+      return void 0;
     }
-  };
+    const message = err instanceof Error ? err.message : String(err);
+    throw new Error(`Failed to read Claude settings at "${settingsPath}": ${message}`);
+  }
 }
-var OpenAIProvider = class {
-  client;
-  embeddingsClient;
-  model;
-  configuredEmbeddingModel;
-  constructor(model, options = {}) {
-    this.model = model;
-    this.configuredEmbeddingModel = options.embeddingModel;
-    const resolvedKey = options.apiKey ?? process.env.OPENAI_API_KEY ?? "";
-    this.client = new OpenAI({
-      apiKey: resolvedKey,
-      baseURL: options.baseURL ?? null
-    });
-    this.embeddingsClient = options.embeddingsBaseURL ? new OpenAI({ apiKey: resolvedKey, baseURL: options.embeddingsBaseURL }) : this.client;
+function readClaudeSettingsEnv(env = process.env) {
+  const settingsPath = resolveClaudeSettingsPath(env);
+  const raw = readClaudeSettingsFile(settingsPath);
+  if (!raw) return void 0;
+  let parsed;
+  try {
+    parsed = JSON.parse(raw);
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    throw new Error(`Failed to parse Claude settings at "${settingsPath}": ${message}`);
   }
-  /** Send a single non-streaming completion request. */
-  async complete(system, messages, maxTokens) {
-    const response = await this.client.chat.completions.create({
-      model: this.model,
-      max_tokens: maxTokens,
-      messages: [{ role: "system", content: system }, ...messages]
-    });
-    return response.choices[0]?.message?.content ?? "";
-  }
-  /** Stream a completion, invoking onToken for each text chunk. */
-  async stream(system, messages, maxTokens, onToken) {
-    const stream = await this.client.chat.completions.create({
-      model: this.model,
-      max_tokens: maxTokens,
-      messages: [{ role: "system", content: system }, ...messages],
-      stream: true
-    });
-    let fullText = "";
-    for await (const chunk of stream) {
-      const delta = chunk.choices[0]?.delta?.content;
-      if (delta) {
-        fullText += delta;
-        onToken?.(delta);
-      }
-    }
-    return fullText;
-  }
-  /** Call the model with tool definitions and return the parsed tool input as JSON. */
-  async toolCall(system, messages, tools, maxTokens) {
-    const openaiTools = tools.map(translateToolToOpenAI);
-    const response = await this.client.chat.completions.create({
-      model: this.model,
-      max_tokens: maxTokens,
-      messages: [{ role: "system", content: system }, ...messages],
-      tools: openaiTools
-    });
-    const toolCalls = response.choices[0]?.message?.tool_calls;
-    if (toolCalls && toolCalls.length > 0) {
-      return toolCalls[0].function.arguments;
-    }
-    return response.choices[0]?.message?.content ?? "";
-  }
-  /**
-   * Produce a single embedding vector via the OpenAI embeddings API.
-   * Subclasses (e.g. Ollama) override embeddingModel() to pick a different model.
-   */
-  async embed(text) {
-    const response = await this.embeddingsClient.embeddings.create({
-      model: this.embeddingModel(),
-      input: text
-    });
-    const vector = response.data[0]?.embedding;
-    if (!Array.isArray(vector)) {
-      throw new Error("OpenAI embeddings response did not include a vector.");
-    }
-    return vector;
-  }
-  /** Default embedding model for this provider. Subclasses may override. */
-  embeddingModel() {
-    return this.configuredEmbeddingModel ?? EMBEDDING_MODELS.openai;
-  }
-};
-// src/providers/ollama.ts
-var OllamaProvider = class extends OpenAIProvider {
-  constructor(model, options) {
-    super(model, {
-      baseURL: options.baseURL,
-      apiKey: "ollama",
-      embeddingsBaseURL: options.embeddingsBaseURL,
-      embeddingModel: options.embeddingModel
-    });
-  }
-  /** Ollama ships a dedicated embedding model (nomic-embed-text). */
-  embeddingModel() {
-    return this.configuredEmbeddingModel ?? EMBEDDING_MODELS.ollama;
-  }
-};
-// src/providers/minimax.ts
-var MINIMAX_BASE_URL = "https://api.minimax.io/v1";
-var MiniMaxProvider = class extends OpenAIProvider {
-  constructor(model, apiKey) {
-    super(model, { baseURL: MINIMAX_BASE_URL, apiKey });
-  }
-};
-// src/utils/claude-settings.ts
-import { readFileSync } from "fs";
-import { homedir } from "os";
-import path7 from "path";
-var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
-function isRecord(value) {
-  return typeof value === "object" && value !== null;
-}
-function normalize(value) {
-  if (typeof value !== "string") return void 0;
-  const trimmed = value.trim();
-  return trimmed.length > 0 ? trimmed : void 0;
-}
-function resolveClaudeSettingsPath(env) {
-  return env[CLAUDE_SETTINGS_PATH_ENV] ?? path7.join(homedir(), ".claude", "settings.json");
-}
-function readClaudeSettingsFile(settingsPath) {
-  try {
-    return readFileSync(settingsPath, "utf8");
-  } catch (err) {
-    if (isRecord(err) && err.code === "ENOENT") {
-      return void 0;
-    }
-    const message = err instanceof Error ? err.message : String(err);
-    throw new Error(`Failed to read Claude settings at "${settingsPath}": ${message}`);
-  }
-}
-function readClaudeSettingsEnv(env = process.env) {
-  const settingsPath = resolveClaudeSettingsPath(env);
-  const raw = readClaudeSettingsFile(settingsPath);
-  if (!raw) return void 0;
-  let parsed;
-  try {
-    parsed = JSON.parse(raw);
-  } catch (err) {
-    const message = err instanceof Error ? err.message : String(err);
-    throw new Error(`Failed to parse Claude settings at "${settingsPath}": ${message}`);
-  }
-  if (!isRecord(parsed) || !isRecord(parsed.env)) {
-    return void 0;
+  if (!isRecord(parsed) || !isRecord(parsed.env)) {
+    return void 0;
   }
   const values = {
     ANTHROPIC_API_KEY: normalize(parsed.env.ANTHROPIC_API_KEY),
@@ -763,312 +511,878 @@ function resolveAnthropicBaseURLFromEnv(env = process.env) {
   return validateAnthropicBaseURL(fallbackBaseURL);
 }
-// src/utils/provider.ts
-var SUPPORTED_PROVIDERS = /* @__PURE__ */ new Set(["anthropic", "openai", "ollama", "minimax"]);
-function getProvider() {
-  const providerName = getProviderName();
-  switch (providerName) {
-    case "anthropic":
-      return getAnthropicProvider();
-    case "openai":
-      return new OpenAIProvider(getModelForProvider("openai"), {
-        baseURL: readOptionalEnv("OPENAI_BASE_URL"),
-        embeddingsBaseURL: readOptionalEnv("OPENAI_EMBEDDINGS_BASE_URL"),
-        embeddingModel: readOptionalEnv("LLMWIKI_EMBEDDING_MODEL")
-      });
-    case "ollama":
-      return new OllamaProvider(getModelForProvider("ollama"), {
-        baseURL: readOptionalEnv("OLLAMA_HOST") ?? OLLAMA_DEFAULT_HOST,
-        embeddingsBaseURL: readOptionalEnv("OLLAMA_EMBEDDINGS_HOST"),
-        embeddingModel: readOptionalEnv("LLMWIKI_EMBEDDING_MODEL")
-      });
-    case "minimax":
-      return getMiniMaxProvider();
-    default:
-      throw new Error(`Unhandled provider: ${providerName}`);
-  }
-}
-function readOptionalEnv(name) {
-  const value = process.env[name]?.trim();
-  return value ? value : void 0;
-}
-function getModelForProvider(providerName) {
-  return process.env.LLMWIKI_MODEL ?? PROVIDER_MODELS[providerName];
-}
-function getMiniMaxProvider() {
-  const apiKey = process.env.MINIMAX_API_KEY;
-  if (!apiKey) {
+// src/ingest/image.ts
+var EXTENSION_TO_MIME = {
+  ".jpg": "image/jpeg",
+  ".jpeg": "image/jpeg",
+  ".png": "image/png",
+  ".gif": "image/gif",
+  ".webp": "image/webp"
+};
+function mimeTypeForExtension(ext) {
+  const mimeType = EXTENSION_TO_MIME[ext.toLowerCase()];
+  if (!mimeType) {
     throw new Error(
-      "MiniMax provider requires MINIMAX_API_KEY environment variable.\n  Set it with: export MINIMAX_API_KEY=your_key"
+      `Unsupported image extension "${ext}". Supported: ${Object.keys(EXTENSION_TO_MIME).join(", ")}`
     );
   }
-  return new MiniMaxProvider(getModelForProvider("minimax"), apiKey);
+  return mimeType;
 }
-function getAnthropicProvider() {
-  const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
+function buildClient() {
   const baseURL = resolveAnthropicBaseURLFromEnv();
   const auth = resolveAnthropicAuthFromEnv();
-  return new AnthropicProvider(model, {
-    baseURL,
-    ...auth
+  return new Anthropic2(buildAnthropicClientOptions({ baseURL, ...auth }));
+}
+async function describeImageWithVision(client, model, imageData, mimeType) {
+  const response = await client.messages.create({
+    model,
+    max_tokens: IMAGE_DESCRIBE_MAX_TOKENS,
+    messages: [
+      {
+        role: "user",
+        content: [
+          {
+            type: "image",
+            source: { type: "base64", media_type: mimeType, data: imageData }
+          },
+          {
+            type: "text",
+            text: "Extract and transcribe all text visible in this image. Then provide a detailed description of any non-text visual content. Format your response as markdown."
+          }
+        ]
+      }
+    ]
   });
+  const textBlock = response.content.find((block) => block.type === "text");
+  return textBlock?.type === "text" ? textBlock.text : "";
 }
-function getProviderName() {
-  const providerName = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
-  if (!SUPPORTED_PROVIDERS.has(providerName)) {
+async function ingestImage(filePath) {
+  const providerName = process.env.LLMWIKI_PROVIDER ?? "anthropic";
+  if (providerName !== "anthropic") {
     throw new Error(
-      `Unknown provider "${providerName}". Supported: ${[...SUPPORTED_PROVIDERS].join(", ")}`
+      `Image ingest requires the Anthropic provider (vision). Current provider: "${providerName}". Set LLMWIKI_PROVIDER=anthropic and ANTHROPIC_API_KEY to use image ingest.`
     );
   }
-  return providerName;
-}
-function getActiveProviderName() {
-  return getProviderName();
+  const ext = path5.extname(filePath).toLowerCase();
+  const mimeType = mimeTypeForExtension(ext);
+  const imageBuffer = await readFile4(filePath);
+  const imageData = imageBuffer.toString("base64");
+  const client = buildClient();
+  const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
+  const content = await describeImageWithVision(client, model, imageData, mimeType);
+  const title = titleFromFilename(filePath);
+  return { title, content };
 }
-// src/utils/llm.ts
-function sleep(ms) {
-  return new Promise((resolve) => setTimeout(resolve, ms));
-}
-async function callClaude(options) {
-  const { system, messages, tools, maxTokens = 4096, stream = false, onToken } = options;
-  const provider = getProvider();
-  for (let attempt = 0; attempt <= RETRY_COUNT; attempt++) {
-    try {
-      if (stream) {
-        return await provider.stream(system, messages, maxTokens, onToken);
-      }
-      if (tools && tools.length > 0) {
-        return await provider.toolCall(system, messages, tools, maxTokens);
-      }
-      return await provider.complete(system, messages, maxTokens);
-    } catch (error2) {
-      if (attempt === RETRY_COUNT) throw error2;
-      const delayMs = RETRY_BASE_MS * Math.pow(RETRY_MULTIPLIER, attempt);
-      const errMsg = error2 instanceof Error ? error2.message : String(error2);
-      console.warn(`\u26A0 API call failed (attempt ${attempt + 1}/${RETRY_COUNT + 1}): ${errMsg}`);
-      console.warn(`  Retrying in ${delayMs / 1e3}s...`);
-      await sleep(delayMs);
-    }
+// src/ingest/transcript.ts
+import { readFile as readFile5 } from "fs/promises";
+import path6 from "path";
+import { YoutubeTranscript as YoutubeTranscriptUntyped } from "youtube-transcript/dist/youtube-transcript.esm.js";
+var YoutubeTranscript = YoutubeTranscriptUntyped;
+var YOUTUBE_URL_PATTERN = /^https?:\/\/(www\.)?(youtube\.com\/watch|youtu\.be\/)/;
+var SRT_SEQUENCE_PATTERN = /^\d+$/;
+var TIMESTAMP_PATTERN = /\d{2}:\d{2}[:.]\d{2}/;
+var MS_PER_MINUTE = 6e4;
+var MS_PER_SECOND = 1e3;
+function isYoutubeUrl(source2) {
+  return YOUTUBE_URL_PATTERN.test(source2);
+}
+function extractVideoId(url) {
+  const match = url.match(/(?:v=|youtu\.be\/)([^&?/]+)/);
+  if (!match) {
+    throw new Error(`Could not extract video ID from YouTube URL: ${url}`);
   }
-  throw new Error("Unreachable");
+  return match[1];
 }
-// src/utils/lock.ts
-import { open, readFile as readFile5, unlink, mkdir as mkdir4 } from "fs/promises";
-import path8 from "path";
-var RECLAIM_SUFFIX = ".reclaim";
-var MAX_ACQUIRE_ATTEMPTS = 2;
-function isProcessAlive(pid) {
-  try {
-    process.kill(pid, 0);
-    return true;
-  } catch {
-    return false;
+function formatOffset(offsetMs) {
+  const minutes = Math.floor(offsetMs / MS_PER_MINUTE);
+  const seconds = Math.floor(offsetMs % MS_PER_MINUTE / MS_PER_SECOND);
+  return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}`;
+}
+async function fetchYoutubeTranscript(url) {
+  const videoId = extractVideoId(url);
+  const segments = await YoutubeTranscript.fetchTranscript(videoId);
+  if (!segments || segments.length === 0) {
+    throw new Error(`No transcript available for YouTube video: ${url}`);
   }
+  const lines = segments.map((seg) => `[${formatOffset(seg.offset)}] ${seg.text}`);
+  return {
+    title: `YouTube Transcript ${videoId}`,
+    content: lines.join("\n")
+  };
 }
-async function acquireLock(root) {
-  const lockPath = path8.join(root, LOCK_FILE);
-  await mkdir4(path8.join(root, LLMWIKI_DIR), { recursive: true });
-  for (let attempt = 0; attempt < MAX_ACQUIRE_ATTEMPTS; attempt++) {
-    const created = await tryCreateLock(lockPath);
-    if (created) return true;
-    const stale = await isLockStale(lockPath);
-    if (!stale) {
-      status("!", warn("Another compilation is running."));
-      return false;
+function isCueTimestamp(trimmed) {
+  return TIMESTAMP_PATTERN.test(trimmed) && trimmed.includes("-->");
+}
+function parseVtt(raw, filePath) {
+  const lines = raw.split("\n");
+  const output = [];
+  let inCue = false;
+  for (const line of lines) {
+    const trimmed = line.trim();
+    if (trimmed === "WEBVTT" || trimmed === "") {
+      inCue = false;
+      continue;
+    }
+    if (isCueTimestamp(trimmed)) {
+      output.push(`
+**[${trimmed}]**`);
+      inCue = true;
+      continue;
+    }
+    if (inCue && trimmed.length > 0) {
+      output.push(trimmed);
     }
-    const reclaimed = await reclaimStaleLock(root, lockPath);
-    if (reclaimed) return true;
   }
-  status("!", warn("Could not acquire lock after retrying."));
-  return false;
+  return { title: titleFromFilename(filePath), content: output.join("\n").trim() };
 }
-async function reclaimStaleLock(root, lockPath) {
-  const reclaimPath = lockPath + RECLAIM_SUFFIX;
-  const gotReclaimLock = await acquireReclaimLock(reclaimPath);
-  if (!gotReclaimLock) return false;
-  try {
-    if (!await isLockStale(lockPath)) {
-      return false;
-    }
-    try {
-      await unlink(lockPath);
-    } catch {
+function parseSrt(raw, filePath) {
+  const lines = raw.split("\n");
+  const output = [];
+  for (const line of lines) {
+    const trimmed = line.trim();
+    if (trimmed === "" || SRT_SEQUENCE_PATTERN.test(trimmed)) {
+      continue;
     }
-    const acquired = await tryCreateLock(lockPath);
-    if (acquired) {
-      status("i", dim("Reclaimed stale lock from dead process."));
+    if (isCueTimestamp(trimmed)) {
+      output.push(`
+**[${trimmed}]**`);
+      continue;
     }
-    return acquired;
-  } finally {
-    try {
-      await unlink(reclaimPath);
-    } catch {
+    if (trimmed.length > 0) {
+      output.push(trimmed);
     }
   }
+  return { title: titleFromFilename(filePath), content: output.join("\n").trim() };
 }
-async function acquireReclaimLock(reclaimPath) {
-  if (await tryCreateLock(reclaimPath)) return true;
-  if (!await isLockStale(reclaimPath)) return false;
-  try {
-    await unlink(reclaimPath);
-  } catch {
+function parsePlainTranscript(raw, filePath) {
+  return { title: titleFromFilename(filePath), content: raw.trim() };
+}
+async function ingestTranscript(source2) {
+  if (isYoutubeUrl(source2)) {
+    return fetchYoutubeTranscript(source2);
   }
-  return false;
+  const ext = path6.extname(source2).toLowerCase();
+  const raw = await readFile5(source2, "utf-8");
+  if (ext === ".vtt") return parseVtt(raw, source2);
+  if (ext === ".srt") return parseSrt(raw, source2);
+  if (ext === ".txt") return parsePlainTranscript(raw, source2);
+  throw new Error(
+    `Unsupported transcript file type "${ext}". Supported: .vtt, .srt, .txt`
+  );
 }
-async function tryCreateLock(lockPath) {
-  try {
-    const fd = await open(lockPath, "wx");
-    await fd.writeFile(String(process.pid), "utf-8");
-    await fd.close();
-    return true;
-  } catch (err) {
-    if (err instanceof Error && "code" in err && err.code === "EEXIST") {
-      return false;
-    }
-    throw err;
+// src/commands/ingest.ts
+function isUrl(source2) {
+  return source2.startsWith("http://") || source2.startsWith("https://");
+}
+var TXT_SNIFF_BYTES = 2048;
+var SPEAKER_TAG_PATTERN = /^([A-Z][a-zA-Z .'-]{0,40}):\s/gm;
+var TIMESTAMP_PATTERN2 = /^\s*\d{1,2}:\d{2}(:\d{2})?/;
+var MIN_TIMESTAMP_MATCHES = 3;
+var MIN_SPEAKER_REPEAT_COUNT = 2;
+var MIN_DISTINCT_SPEAKERS = 2;
+function countSpeakerOccurrences(sample) {
+  const counts = /* @__PURE__ */ new Map();
+  SPEAKER_TAG_PATTERN.lastIndex = 0;
+  let match;
+  while ((match = SPEAKER_TAG_PATTERN.exec(sample)) !== null) {
+    const name = match[1].trim();
+    counts.set(name, (counts.get(name) ?? 0) + 1);
+  }
+  return counts;
+}
+function hasSpeakerDialoguePattern(sample) {
+  const counts = countSpeakerOccurrences(sample);
+  const distinctSpeakers = counts.size;
+  const hasEnoughSpeakers = distinctSpeakers >= MIN_DISTINCT_SPEAKERS;
+  const hasRepeatedSpeaker = [...counts.values()].some(
+    (n) => n >= MIN_SPEAKER_REPEAT_COUNT
+  );
+  return hasEnoughSpeakers && hasRepeatedSpeaker;
+}
+async function looksLikeTxtTranscript(filePath) {
+  const raw = await readFile6(filePath, "utf-8");
+  const sample = raw.slice(0, TXT_SNIFF_BYTES);
+  if (hasSpeakerDialoguePattern(sample)) return true;
+  const timestampMatches = sample.match(new RegExp(TIMESTAMP_PATTERN2.source, "gm"));
+  return (timestampMatches?.length ?? 0) >= MIN_TIMESTAMP_MATCHES;
+}
+function enforceCharLimit(content) {
+  if (content.length <= MAX_SOURCE_CHARS) {
+    return { content, truncated: false, originalChars: content.length };
   }
+  status(
+    "!",
+    warn(
+      `Content truncated from ${content.length.toLocaleString()} to ${MAX_SOURCE_CHARS.toLocaleString()} characters.`
+    )
+  );
+  return {
+    content: content.slice(0, MAX_SOURCE_CHARS),
+    truncated: true,
+    originalChars: content.length
+  };
 }
-async function isLockStale(lockPath) {
-  try {
-    const content = await readFile5(lockPath, "utf-8");
-    const pid = parseInt(content.trim(), 10);
-    if (isNaN(pid)) return true;
-    return !isProcessAlive(pid);
-  } catch {
-    return true;
+function enforceMinContent(content) {
+  const length = content.trim().length;
+  if (length === 0) {
+    throw new Error(
+      "No readable content could be extracted from the source."
+    );
+  }
+  if (length < MIN_SOURCE_CHARS) {
+    status(
+      "!",
+      warn(
+        `Content seems very short (${length} chars, minimum recommended is ${MIN_SOURCE_CHARS}).`
+      )
+    );
   }
 }
-async function releaseLock(root) {
-  const lockPath = path8.join(root, LOCK_FILE);
-  try {
-    await unlink(lockPath);
-  } catch {
+async function detectSourceType(source2) {
+  if (!isUrl(source2)) {
+    const ext = path7.extname(source2).toLowerCase();
+    if (ext === ".pdf") return "pdf";
+    if (IMAGE_EXTENSIONS.has(ext)) return "image";
+    if (TRANSCRIPT_EXTENSIONS.has(ext)) return "transcript";
+    if (ext === ".txt") {
+      const isTranscript = await looksLikeTxtTranscript(source2);
+      return isTranscript ? "transcript" : "file";
+    }
+    return "file";
   }
+  if (isYoutubeUrl(source2)) return "transcript";
+  return "web";
 }
-// src/compiler/prompts.ts
-var PROVENANCE_STATE_VALUES = [
-  "extracted",
-  "merged",
-  "inferred",
-  "ambiguous"
-];
-var CONCEPT_EXTRACTION_TOOL = {
-  name: "extract_concepts",
-  description: "Extract knowledge concepts from a source document",
-  input_schema: {
-    type: "object",
-    properties: {
-      concepts: {
-        type: "array",
-        items: {
-          type: "object",
-          properties: {
-            concept: {
-              type: "string",
-              description: "Human-readable concept title"
-            },
-            summary: {
-              type: "string",
-              description: "One-line description"
-            },
-            is_new: {
-              type: "boolean",
-              description: "True if this is a new concept not in existing wiki"
-            },
-            tags: {
-              type: "array",
-              items: { type: "string" },
-              description: "2-4 categorical tags for organizing this concept (e.g., 'machine-learning', 'optimization')"
-            },
-            confidence: {
-              type: "number",
-              description: "Confidence in this concept on a 0..1 scale (1 = directly stated, 0 = highly speculative)."
-            },
-            provenance_state: {
-              type: "string",
-              enum: PROVENANCE_STATE_VALUES,
-              description: "How this concept was produced: 'extracted' (direct from source), 'merged' (synthesised across sources), 'inferred' (model deduction), or 'ambiguous' (sources disagree)."
-            },
-            contradicted_by: {
-              type: "array",
-              items: {
-                type: "object",
-                properties: {
-                  slug: { type: "string", description: "Slug of the contradicting concept." },
-                  reason: { type: "string", description: "Brief reason for the contradiction." }
-                },
-                required: ["slug"]
-              },
-              description: "Slugs of other concepts whose evidence contradicts this one."
-            },
-            inferred_paragraphs: {
-              type: "integer",
-              description: "Estimated number of paragraphs in the page that will be inferred rather than directly cited."
-            }
-          },
-          required: ["concept", "summary", "is_new"]
-        }
-      }
-    },
-    required: ["concepts"]
+function buildDocument(title, source2, result, sourceType) {
+  const meta = {
+    title,
+    source: source2,
+    ingestedAt: (/* @__PURE__ */ new Date()).toISOString()
+  };
+  if (sourceType !== void 0) {
+    meta.sourceType = sourceType;
   }
-};
-function buildExtractionPrompt(sourceContent, existingIndex) {
-  const indexSection = existingIndex ? `
-Here is the existing wiki index \u2014 avoid duplicating concepts already covered:
+  if (result.truncated) {
+    meta.truncated = true;
+    meta.originalChars = result.originalChars;
+  }
+  const frontmatter = buildFrontmatter(meta);
+  return `${frontmatter}
-${existingIndex}` : "\n\nNo existing wiki pages yet.";
-  return [
-    "You are a knowledge extraction engine. Analyze the following source document",
-    "and identify 3-8 distinct, meaningful concepts worth documenting as wiki pages.",
-    "Each concept should be a standalone topic that someone might look up.",
-    "Focus on key ideas, techniques, patterns, or entities \u2014 not trivial details.",
-    "Use the extract_concepts tool to return your findings.",
-    "",
-    "For every concept, emit provenance metadata so downstream tools can reason",
-    "about reliability:",
-    "  - confidence: 0..1 \u2014 how certain you are the source supports this concept.",
-    "  - provenance_state: 'extracted' if directly stated, 'merged' if synthesised",
-    "    from multiple parts of the source, 'inferred' if reasoned from context,",
-    "    or 'ambiguous' if the source is contradictory or unclear.",
-    "  - contradicted_by: slugs of other concepts (in this batch or the index)",
-    "    whose evidence conflicts with this one.",
-    "  - inferred_paragraphs: estimated number of paragraphs in the resulting",
-    "    page that will be inferred rather than directly citable.",
-    indexSection,
-    "\n\n--- SOURCE DOCUMENT ---\n\n",
-    sourceContent
-  ].join("\n");
+${result.content}
+`;
+}
+async function fetchContent(source2, sourceType) {
+  switch (sourceType) {
+    case "web":
+      return ingestWeb(source2);
+    case "pdf":
+      return ingestPdf(source2);
+    case "image":
+      return ingestImage(source2);
+    case "transcript":
+      return ingestTranscript(source2);
+    case "file":
+      return ingestFile(source2);
+  }
+}
+async function saveSource(title, document) {
+  const filename = `${slugify(title)}.md`;
+  const destPath = path7.join(SOURCES_DIR, filename);
+  await mkdir2(SOURCES_DIR, { recursive: true });
+  await writeFile2(destPath, document, "utf-8");
+  return destPath;
+}
+async function ingestSource(source2) {
+  const sourceType = await detectSourceType(source2);
+  status("*", info(`Ingesting [${sourceType}]: ${source2}`));
+  const { title, content } = await fetchContent(source2, sourceType);
+  const result = enforceCharLimit(content);
+  enforceMinContent(result.content);
+  const document = buildDocument(title, source2, result, sourceType);
+  const savedPath = await saveSource(title, document);
+  return {
+    filename: path7.basename(savedPath),
+    charCount: result.content.length,
+    truncated: result.truncated,
+    source: source2,
+    sourceType
+  };
+}
+async function ingest(source2) {
+  const result = await ingestSource(source2);
+  const savedPath = path7.join(SOURCES_DIR, result.filename);
+  status(
+    "+",
+    success(`Saved ${bold(result.filename)} \u2192 ${source(savedPath)}`)
+  );
+  status("\u2192", dim("Next: llmwiki compile"));
 }
-function buildPagePrompt(concept, sourceContent, existingPage, relatedPages) {
-  const existingSection = existingPage ? `
-Existing page to update:
+// src/commands/compile.ts
+import { existsSync as existsSync7 } from "fs";
-${existingPage}` : "";
-  const relatedSection = relatedPages ? `
+// src/compiler/index.ts
+import { readFile as readFile14 } from "fs/promises";
+import path21 from "path";
-Related wiki pages for cross-referencing:
+// src/utils/state.ts
+import { readFile as readFile7, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
+import { existsSync } from "fs";
+import path8 from "path";
+function emptyState() {
+  return { version: 1, indexHash: "", sources: {} };
+}
+async function readState(root) {
+  const filePath = path8.join(root, STATE_FILE);
+  if (!existsSync(filePath)) {
+    return emptyState();
+  }
+  try {
+    const raw = await readFile7(filePath, "utf-8");
+    return JSON.parse(raw);
+  } catch {
+    const bakPath = filePath + ".bak";
+    console.warn(`\u26A0 Corrupt state.json \u2014 backed up to ${bakPath}, starting fresh.`);
+    await copyFile(filePath, bakPath);
+    return emptyState();
+  }
+}
+async function writeState(root, state) {
+  const dir = path8.join(root, LLMWIKI_DIR);
+  await mkdir3(dir, { recursive: true });
+  const filePath = path8.join(root, STATE_FILE);
+  const tmpPath = filePath + ".tmp";
+  await writeFile3(tmpPath, JSON.stringify(state, null, 2), "utf-8");
+  await rename2(tmpPath, filePath);
+}
+async function updateSourceState(root, sourceFile, entry) {
+  const state = await readState(root);
+  state.sources[sourceFile] = entry;
+  await writeState(root, state);
+}
+async function removeSourceState(root, sourceFile) {
+  const state = await readState(root);
+  delete state.sources[sourceFile];
+  await writeState(root, state);
+}
-${relatedPages}` : "";
-  return [
-    `You are a wiki author. Write a clear, well-structured markdown page about "${concept}".`,
-    "Draw facts only from the provided source material.",
-    "Include a ## Sources section at the end listing the source document.",
-    "Suggest [[wikilinks]] to related concepts where appropriate.",
-    "Write in a neutral, informative tone. Be concise but thorough.",
-    "",
-    "Source attribution: at the end of each prose paragraph, append a citation",
-    "marker showing which source file(s) the paragraph drew from.",
-    "Format: ^[filename.md] for single-source, ^[source-a.md, source-b.md] for multi-source.",
-    "Place citations only at the end of prose paragraphs \u2014 not on headings, list items, or code blocks.",
-    "Source filenames are visible as `--- SOURCE: filename.md ---` headers in the content below.",
-    "",
-    "If a paragraph is your inference rather than a direct extraction, leave it",
+// src/compiler/source-state.ts
+import path10 from "path";
+// src/compiler/hasher.ts
+import { createHash } from "crypto";
+import { readFile as readFile8, readdir } from "fs/promises";
+import path9 from "path";
+async function hashFile(filePath) {
+  const content = await readFile8(filePath, "utf-8");
+  return createHash("sha256").update(content).digest("hex");
+}
+async function detectChanges(root, prevState) {
+  const sourcesPath = path9.join(root, SOURCES_DIR);
+  const currentFiles = await listSourceFiles(sourcesPath);
+  const changes = [];
+  for (const file of currentFiles) {
+    const status2 = await classifyFile(root, file, prevState);
+    changes.push({ file, status: status2 });
+  }
+  const deletedChanges = findDeletedFiles(currentFiles, prevState);
+  changes.push(...deletedChanges);
+  return changes;
+}
+async function listSourceFiles(sourcesPath) {
+  try {
+    const entries = await readdir(sourcesPath);
+    return entries.filter((f) => f.endsWith(".md"));
+  } catch {
+    return [];
+  }
+}
+async function classifyFile(root, file, prevState) {
+  const filePath = path9.join(root, SOURCES_DIR, file);
+  const hash = await hashFile(filePath);
+  const prev = prevState.sources[file];
+  if (!prev) return "new";
+  if (prev.hash !== hash) return "changed";
+  return "unchanged";
+}
+function findDeletedFiles(currentFiles, prevState) {
+  const currentSet = new Set(currentFiles);
+  return Object.keys(prevState.sources).filter((file) => !currentSet.has(file)).map((file) => ({ file, status: "deleted" }));
+}
+// src/compiler/source-state.ts
+async function buildExtractionSourceStates(root, extractions) {
+  const snapshot = {};
+  const compiledAt = (/* @__PURE__ */ new Date()).toISOString();
+  for (const result of extractions) {
+    if (result.concepts.length === 0) continue;
+    snapshot[result.sourceFile] = await buildEntry(root, result, compiledAt);
+  }
+  return snapshot;
+}
+async function buildEntry(root, result, compiledAt) {
+  const filePath = path10.join(root, SOURCES_DIR, result.sourceFile);
+  const hash = await hashFile(filePath);
+  return {
+    hash,
+    concepts: result.concepts.map((concept) => slugify(concept.concept)),
+    compiledAt
+  };
+}
+function pickStatesForSources(allStates, sourceFiles) {
+  const picked = {};
+  for (const file of sourceFiles) {
+    const entry = allStates[file];
+    if (entry) picked[file] = entry;
+  }
+  return picked;
+}
+// src/providers/openai.ts
+import OpenAI from "openai";
+function readTimeoutEnv(name) {
+  const raw = process.env[name]?.trim();
+  if (!raw) return void 0;
+  const parsed = Number(raw);
+  return Number.isFinite(parsed) && parsed > 0 ? parsed : void 0;
+}
+function resolveOpenAITimeoutMs() {
+  return readTimeoutEnv("LLMWIKI_REQUEST_TIMEOUT_MS");
+}
+function translateToolToOpenAI(tool) {
+  return {
+    type: "function",
+    function: {
+      name: tool.name,
+      description: tool.description,
+      parameters: tool.input_schema
+    }
+  };
+}
+var OpenAIProvider = class {
+  client;
+  embeddingsClient;
+  model;
+  configuredEmbeddingModel;
+  constructor(model, options = {}) {
+    this.model = model;
+    this.configuredEmbeddingModel = options.embeddingModel;
+    const resolvedKey = options.apiKey ?? process.env.OPENAI_API_KEY ?? "";
+    const timeout = options.timeoutMs ?? resolveOpenAITimeoutMs() ?? OPENAI_DEFAULT_TIMEOUT_MS;
+    this.client = new OpenAI({
+      apiKey: resolvedKey,
+      baseURL: options.baseURL ?? null,
+      timeout
+    });
+    this.embeddingsClient = options.embeddingsBaseURL ? new OpenAI({ apiKey: resolvedKey, baseURL: options.embeddingsBaseURL, timeout }) : this.client;
+  }
+  /** Send a single non-streaming completion request. */
+  async complete(system, messages, maxTokens) {
+    const response = await this.client.chat.completions.create({
+      model: this.model,
+      max_tokens: maxTokens,
+      messages: [{ role: "system", content: system }, ...messages]
+    });
+    return response.choices[0]?.message?.content ?? "";
+  }
+  /** Stream a completion, invoking onToken for each text chunk. */
+  async stream(system, messages, maxTokens, onToken) {
+    const stream = await this.client.chat.completions.create({
+      model: this.model,
+      max_tokens: maxTokens,
+      messages: [{ role: "system", content: system }, ...messages],
+      stream: true
+    });
+    let fullText = "";
+    for await (const chunk of stream) {
+      const delta = chunk.choices[0]?.delta?.content;
+      if (delta) {
+        fullText += delta;
+        onToken?.(delta);
+      }
+    }
+    return fullText;
+  }
+  /** Call the model with tool definitions and return the parsed tool input as JSON. */
+  async toolCall(system, messages, tools, maxTokens) {
+    const openaiTools = tools.map(translateToolToOpenAI);
+    const response = await this.client.chat.completions.create({
+      model: this.model,
+      max_tokens: maxTokens,
+      messages: [{ role: "system", content: system }, ...messages],
+      tools: openaiTools
+    });
+    const toolCalls = response.choices[0]?.message?.tool_calls;
+    if (toolCalls && toolCalls.length > 0) {
+      return toolCalls[0].function.arguments;
+    }
+    return response.choices[0]?.message?.content ?? "";
+  }
+  /**
+   * Produce a single embedding vector via the OpenAI embeddings API.
+   * Subclasses (e.g. Ollama) override embeddingModel() to pick a different model.
+   */
+  async embed(text) {
+    const response = await this.embeddingsClient.embeddings.create({
+      model: this.embeddingModel(),
+      input: text
+    });
+    const vector = response.data[0]?.embedding;
+    if (!Array.isArray(vector)) {
+      throw new Error("OpenAI embeddings response did not include a vector.");
+    }
+    return vector;
+  }
+  /** Default embedding model for this provider. Subclasses may override. */
+  embeddingModel() {
+    return this.configuredEmbeddingModel ?? EMBEDDING_MODELS.openai;
+  }
+};
+// src/providers/ollama.ts
+function resolveOllamaTimeoutMs(explicit) {
+  return explicit ?? readTimeoutEnv("OLLAMA_TIMEOUT_MS") ?? readTimeoutEnv("LLMWIKI_REQUEST_TIMEOUT_MS") ?? OLLAMA_DEFAULT_TIMEOUT_MS;
+}
+var OllamaProvider = class extends OpenAIProvider {
+  constructor(model, options) {
+    super(model, {
+      baseURL: options.baseURL,
+      apiKey: "ollama",
+      embeddingsBaseURL: options.embeddingsBaseURL,
+      embeddingModel: options.embeddingModel,
+      timeoutMs: resolveOllamaTimeoutMs(options.timeoutMs)
+    });
+  }
+  /** Ollama ships a dedicated embedding model (nomic-embed-text). */
+  embeddingModel() {
+    return this.configuredEmbeddingModel ?? EMBEDDING_MODELS.ollama;
+  }
+};
+// src/providers/minimax.ts
+var MINIMAX_BASE_URL = "https://api.minimax.io/v1";
+var MiniMaxProvider = class extends OpenAIProvider {
+  constructor(model, apiKey) {
+    super(model, { baseURL: MINIMAX_BASE_URL, apiKey });
+  }
+};
+// src/utils/provider.ts
+var SUPPORTED_PROVIDERS = /* @__PURE__ */ new Set(["anthropic", "openai", "ollama", "minimax"]);
+function getProvider() {
+  const providerName = getProviderName();
+  switch (providerName) {
+    case "anthropic":
+      return getAnthropicProvider();
+    case "openai":
+      return new OpenAIProvider(getModelForProvider("openai"), {
+        baseURL: readOptionalEnv("OPENAI_BASE_URL"),
+        embeddingsBaseURL: readOptionalEnv("OPENAI_EMBEDDINGS_BASE_URL"),
+        embeddingModel: readOptionalEnv("LLMWIKI_EMBEDDING_MODEL")
+      });
+    case "ollama":
+      return new OllamaProvider(getModelForProvider("ollama"), {
+        baseURL: readOptionalEnv("OLLAMA_HOST") ?? OLLAMA_DEFAULT_HOST,
+        embeddingsBaseURL: readOptionalEnv("OLLAMA_EMBEDDINGS_HOST"),
+        embeddingModel: readOptionalEnv("LLMWIKI_EMBEDDING_MODEL")
+      });
+    case "minimax":
+      return getMiniMaxProvider();
+    default:
+      throw new Error(`Unhandled provider: ${providerName}`);
+  }
+}
+function readOptionalEnv(name) {
+  const value = process.env[name]?.trim();
+  return value ? value : void 0;
+}
+function getModelForProvider(providerName) {
+  return process.env.LLMWIKI_MODEL ?? PROVIDER_MODELS[providerName];
+}
+function getMiniMaxProvider() {
+  const apiKey = process.env.MINIMAX_API_KEY;
+  if (!apiKey) {
+    throw new Error(
+      "MiniMax provider requires MINIMAX_API_KEY environment variable.\n  Set it with: export MINIMAX_API_KEY=your_key"
+    );
+  }
+  return new MiniMaxProvider(getModelForProvider("minimax"), apiKey);
+}
+function getAnthropicProvider() {
+  const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
+  const baseURL = resolveAnthropicBaseURLFromEnv();
+  const auth = resolveAnthropicAuthFromEnv();
+  return new AnthropicProvider(model, {
+    baseURL,
+    ...auth
+  });
+}
+function getProviderName() {
+  const providerName = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
+  if (!SUPPORTED_PROVIDERS.has(providerName)) {
+    throw new Error(
+      `Unknown provider "${providerName}". Supported: ${[...SUPPORTED_PROVIDERS].join(", ")}`
+    );
+  }
+  return providerName;
+}
+function getActiveProviderName() {
+  return getProviderName();
+}
+// src/utils/llm.ts
+function sleep(ms) {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+async function callClaude(options) {
+  const { system, messages, tools, maxTokens = 4096, stream = false, onToken } = options;
+  const provider = getProvider();
+  for (let attempt = 0; attempt <= RETRY_COUNT; attempt++) {
+    try {
+      if (stream) {
+        return await provider.stream(system, messages, maxTokens, onToken);
+      }
+      if (tools && tools.length > 0) {
+        return await provider.toolCall(system, messages, tools, maxTokens);
+      }
+      return await provider.complete(system, messages, maxTokens);
+    } catch (error2) {
+      if (attempt === RETRY_COUNT) throw error2;
+      const delayMs = RETRY_BASE_MS * Math.pow(RETRY_MULTIPLIER, attempt);
+      const errMsg = error2 instanceof Error ? error2.message : String(error2);
+      console.warn(`\u26A0 API call failed (attempt ${attempt + 1}/${RETRY_COUNT + 1}): ${errMsg}`);
+      console.warn(`  Retrying in ${delayMs / 1e3}s...`);
+      await sleep(delayMs);
+    }
+  }
+  throw new Error("Unreachable");
+}
+// src/utils/lock.ts
+import { open, readFile as readFile9, unlink, mkdir as mkdir4 } from "fs/promises";
+import path11 from "path";
+var RECLAIM_SUFFIX = ".reclaim";
+var MAX_ACQUIRE_ATTEMPTS = 2;
+function isProcessAlive(pid) {
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch {
+    return false;
+  }
+}
+async function acquireLock(root) {
+  const lockPath = path11.join(root, LOCK_FILE);
+  await mkdir4(path11.join(root, LLMWIKI_DIR), { recursive: true });
+  for (let attempt = 0; attempt < MAX_ACQUIRE_ATTEMPTS; attempt++) {
+    const created = await tryCreateLock(lockPath);
+    if (created) return true;
+    const stale = await isLockStale(lockPath);
+    if (!stale) {
+      status("!", warn("Another compilation is running."));
+      return false;
+    }
+    const reclaimed = await reclaimStaleLock(root, lockPath);
+    if (reclaimed) return true;
+  }
+  status("!", warn("Could not acquire lock after retrying."));
+  return false;
+}
+async function reclaimStaleLock(root, lockPath) {
+  const reclaimPath = lockPath + RECLAIM_SUFFIX;
+  const gotReclaimLock = await acquireReclaimLock(reclaimPath);
+  if (!gotReclaimLock) return false;
+  try {
+    if (!await isLockStale(lockPath)) {
+      return false;
+    }
+    try {
+      await unlink(lockPath);
+    } catch {
+    }
+    const acquired = await tryCreateLock(lockPath);
+    if (acquired) {
+      status("i", dim("Reclaimed stale lock from dead process."));
+    }
+    return acquired;
+  } finally {
+    try {
+      await unlink(reclaimPath);
+    } catch {
+    }
+  }
+}
+async function acquireReclaimLock(reclaimPath) {
+  if (await tryCreateLock(reclaimPath)) return true;
+  if (!await isLockStale(reclaimPath)) return false;
+  try {
+    await unlink(reclaimPath);
+  } catch {
+  }
+  return false;
+}
+async function tryCreateLock(lockPath) {
+  try {
+    const fd = await open(lockPath, "wx");
+    await fd.writeFile(String(process.pid), "utf-8");
+    await fd.close();
+    return true;
+  } catch (err) {
+    if (err instanceof Error && "code" in err && err.code === "EEXIST") {
+      return false;
+    }
+    throw err;
+  }
+}
+async function isLockStale(lockPath) {
+  try {
+    const content = await readFile9(lockPath, "utf-8");
+    const pid = parseInt(content.trim(), 10);
+    if (isNaN(pid)) return true;
+    return !isProcessAlive(pid);
+  } catch {
+    return true;
+  }
+}
+async function releaseLock(root) {
+  const lockPath = path11.join(root, LOCK_FILE);
+  try {
+    await unlink(lockPath);
+  } catch {
+  }
+}
+// src/compiler/prompts.ts
+var PROVENANCE_STATE_VALUES = [
+  "extracted",
+  "merged",
+  "inferred",
+  "ambiguous"
+];
+var CONCEPT_EXTRACTION_TOOL = {
+  name: "extract_concepts",
+  description: "Extract knowledge concepts from a source document",
+  input_schema: {
+    type: "object",
+    properties: {
+      concepts: {
+        type: "array",
+        items: {
+          type: "object",
+          properties: {
+            concept: {
+              type: "string",
+              description: "Human-readable concept title"
+            },
+            summary: {
+              type: "string",
+              description: "One-line description"
+            },
+            is_new: {
+              type: "boolean",
+              description: "True if this is a new concept not in existing wiki"
+            },
+            tags: {
+              type: "array",
+              items: { type: "string" },
+              description: "2-4 categorical tags for organizing this concept (e.g., 'machine-learning', 'optimization')"
+            },
+            confidence: {
+              type: "number",
+              description: "Confidence in this concept on a 0..1 scale (1 = directly stated, 0 = highly speculative)."
+            },
+            provenance_state: {
+              type: "string",
+              enum: PROVENANCE_STATE_VALUES,
+              description: "How this concept was produced: 'extracted' (direct from source), 'merged' (synthesised across sources), 'inferred' (model deduction), or 'ambiguous' (sources disagree)."
+            },
+            contradicted_by: {
+              type: "array",
+              items: {
+                type: "object",
+                properties: {
+                  slug: { type: "string", description: "Slug of the contradicting concept." },
+                  reason: { type: "string", description: "Brief reason for the contradiction." }
+                },
+                required: ["slug"]
+              },
+              description: "Slugs of other concepts whose evidence contradicts this one."
+            },
+            inferred_paragraphs: {
+              type: "integer",
+              description: "Estimated number of paragraphs in the page that will be inferred rather than directly cited."
+            }
+          },
+          required: ["concept", "summary", "is_new"]
+        }
+      }
+    },
+    required: ["concepts"]
+  }
+};
+function buildExtractionPrompt(sourceContent, existingIndex) {
+  const indexSection = existingIndex ? `
+Here is the existing wiki index \u2014 avoid duplicating concepts already covered:
+${existingIndex}` : "\n\nNo existing wiki pages yet.";
+  return [
+    "You are a knowledge extraction engine. Analyze the following source document",
+    "and identify 3-8 distinct, meaningful concepts worth documenting as wiki pages.",
+    "Each concept should be a standalone topic that someone might look up.",
+    "Focus on key ideas, techniques, patterns, or entities \u2014 not trivial details.",
+    "Use the extract_concepts tool to return your findings.",
+    "",
+    "For every concept, emit provenance metadata so downstream tools can reason",
+    "about reliability:",
+    "  - confidence: 0..1 \u2014 how certain you are the source supports this concept.",
+    "  - provenance_state: 'extracted' if directly stated, 'merged' if synthesised",
+    "    from multiple parts of the source, 'inferred' if reasoned from context,",
+    "    or 'ambiguous' if the source is contradictory or unclear.",
+    "  - contradicted_by: slugs of other concepts (in this batch or the index)",
+    "    whose evidence conflicts with this one.",
+    "  - inferred_paragraphs: estimated number of paragraphs in the resulting",
+    "    page that will be inferred rather than directly citable.",
+    indexSection,
+    "\n\n--- SOURCE DOCUMENT ---\n\n",
+    sourceContent
+  ].join("\n");
+}
+function buildPagePrompt(concept, sourceContent, existingPage, relatedPages) {
+  const existingSection = existingPage ? `
+Existing page to update:
+${existingPage}` : "";
+  const relatedSection = relatedPages ? `
+Related wiki pages for cross-referencing:
+${relatedPages}` : "";
+  return [
+    `You are a wiki author. Write a clear, well-structured markdown page about "${concept}".`,
+    "Draw facts only from the provided source material.",
+    "Include a ## Sources section at the end listing the source document.",
+    "Suggest [[wikilinks]] to related concepts where appropriate.",
+    "Write in a neutral, informative tone. Be concise but thorough.",
+    "",
+    "Source attribution: at the end of each prose paragraph, append a citation",
+    "marker showing which source file(s) the paragraph drew from.",
+    "Format: ^[filename.md] for single-source, ^[source-a.md, source-b.md] for multi-source.",
+    "When a single sentence makes a specific factual claim and you can identify the",
+    "exact line range it came from, you may use the claim-level form",
+    "^[filename.md:START-END] (or ^[filename.md#LSTART-LEND]) at the end of that",
+    "sentence \u2014 START and END are 1-indexed line numbers in the source file.",
+    "Paragraph-level citations remain the default; only switch to claim-level form",
+    "when it materially improves verifiability and the line range is unambiguous.",
+    "Place citations only at the end of prose paragraphs or sentences \u2014 not on",
+    "headings, list items, or code blocks.",
+    "Source filenames are visible as `--- SOURCE: filename.md ---` headers in the content below.",
+    "",
+    "If a paragraph is your inference rather than a direct extraction, leave it",
     "uncited \u2014 downstream lint rules will count uncited paragraphs as 'inferred'",
     "to compute the page's provenance metadata.",
     existingSection,
@@ -1077,774 +1391,1504 @@ ${relatedPages}` : "";
     sourceContent
   ].join("\n");
 }
-function isValidRawConcept(c) {
-  return typeof c.concept === "string" && typeof c.summary === "string" && typeof c.is_new === "boolean" && (c.tags === void 0 || Array.isArray(c.tags));
+function isValidRawConcept(c) {
+  return typeof c.concept === "string" && typeof c.summary === "string" && typeof c.is_new === "boolean" && (c.tags === void 0 || Array.isArray(c.tags));
+}
+function coerceContradictedBy(raw) {
+  if (!Array.isArray(raw)) return void 0;
+  const refs = [];
+  for (const entry of raw) {
+    if (!entry || typeof entry !== "object") continue;
+    const obj = entry;
+    if (typeof obj.slug !== "string" || obj.slug.trim().length === 0) continue;
+    const ref = { slug: obj.slug.trim() };
+    if (typeof obj.reason === "string") ref.reason = obj.reason;
+    refs.push(ref);
+  }
+  return refs.length > 0 ? refs : void 0;
+}
+function mapRawConcept(c) {
+  const provenance = typeof c.provenance_state === "string" && PROVENANCE_STATE_VALUES.includes(c.provenance_state) ? c.provenance_state : void 0;
+  return {
+    concept: c.concept,
+    summary: c.summary,
+    is_new: c.is_new,
+    tags: Array.isArray(c.tags) ? c.tags : void 0,
+    confidence: typeof c.confidence === "number" ? c.confidence : void 0,
+    provenanceState: provenance,
+    contradictedBy: coerceContradictedBy(c.contradicted_by),
+    inferredParagraphs: typeof c.inferred_paragraphs === "number" && Number.isInteger(c.inferred_paragraphs) && c.inferred_paragraphs >= 0 ? c.inferred_paragraphs : void 0
+  };
+}
+function buildSeedPagePrompt(seed, rule, relatedPagesContent) {
+  const minLinks = rule.minWikilinks;
+  const linkExpectation = minLinks > 0 ? `Include at least ${minLinks} [[wikilinks]] to related pages.` : "Use [[wikilinks]] when referencing other pages.";
+  return [
+    `You are a wiki author. Write a ${seed.kind} page titled "${seed.title}".`,
+    `Page-kind guidance: ${rule.description}`,
+    `Summary line for context: ${seed.summary}`,
+    "Draw facts only from the related wiki pages provided below.",
+    linkExpectation,
+    "Write in a neutral, informative tone. Be concise but thorough.",
+    "\n\n--- RELATED PAGES ---\n\n",
+    relatedPagesContent
+  ].join("\n");
+}
+function parseConcepts(toolOutput) {
+  try {
+    const parsed = JSON.parse(toolOutput);
+    const concepts = parsed.concepts ?? [];
+    return concepts.filter(isValidRawConcept).map(mapRawConcept);
+  } catch {
+    return [];
+  }
+}
+// src/schema/types.ts
+var PAGE_KINDS = [
+  "concept",
+  "entity",
+  "comparison",
+  "overview"
+];
+// src/schema/defaults.ts
+var DEFAULT_MIN_LINKS = {
+  concept: 0,
+  entity: 1,
+  comparison: 2,
+  overview: 3
+};
+var DEFAULT_DESCRIPTIONS = {
+  concept: "A standalone idea, technique, or pattern worth documenting.",
+  entity: "A specific thing \u2014 a person, product, organization, or named artifact.",
+  comparison: "A side-by-side analysis weighing two or more concepts or entities.",
+  overview: "A top-down map page that situates several concepts within a domain."
+};
+function buildDefaultKindRules() {
+  return {
+    concept: { minWikilinks: DEFAULT_MIN_LINKS.concept, description: DEFAULT_DESCRIPTIONS.concept },
+    entity: { minWikilinks: DEFAULT_MIN_LINKS.entity, description: DEFAULT_DESCRIPTIONS.entity },
+    comparison: {
+      minWikilinks: DEFAULT_MIN_LINKS.comparison,
+      description: DEFAULT_DESCRIPTIONS.comparison
+    },
+    overview: {
+      minWikilinks: DEFAULT_MIN_LINKS.overview,
+      description: DEFAULT_DESCRIPTIONS.overview
+    }
+  };
+}
+function buildDefaultSchema() {
+  return {
+    version: 1,
+    defaultKind: "concept",
+    kinds: buildDefaultKindRules(),
+    seedPages: [],
+    loadedFrom: null
+  };
+}
+// src/schema/loader.ts
+import { existsSync as existsSync2 } from "fs";
+import { readFile as readFile10 } from "fs/promises";
+import path12 from "path";
+import yaml2 from "js-yaml";
+var SCHEMA_CANDIDATE_PATHS = [
+  ".llmwiki/schema.json",
+  ".llmwiki/schema.yaml",
+  ".llmwiki/schema.yml",
+  "wiki/.schema.yaml",
+  "wiki/.schema.yml"
+];
+function findSchemaPath(root) {
+  for (const candidate of SCHEMA_CANDIDATE_PATHS) {
+    const absolute = path12.join(root, candidate);
+    if (existsSync2(absolute)) return absolute;
+  }
+  return null;
+}
+function parseSchemaFile(filePath, content) {
+  const isJson = filePath.endsWith(".json");
+  const parsed = isJson ? JSON.parse(content) : yaml2.load(content);
+  if (parsed && typeof parsed === "object") return parsed;
+  return {};
+}
+function isPageKind(value) {
+  return typeof value === "string" && PAGE_KINDS.includes(value);
+}
+function mergeKindRule(defaults, override) {
+  if (!override) return defaults;
+  const minWikilinks = typeof override.minWikilinks === "number" ? override.minWikilinks : defaults.minWikilinks;
+  const description = typeof override.description === "string" ? override.description : defaults.description;
+  return { minWikilinks, description };
+}
+function mergeKinds(defaults, overrides) {
+  const merged = { ...defaults };
+  if (!overrides) return merged;
+  for (const kind of PAGE_KINDS) {
+    merged[kind] = mergeKindRule(defaults[kind], overrides[kind]);
+  }
+  return merged;
+}
+function normalizeSeedPage(entry) {
+  if (typeof entry.title !== "string" || entry.title.trim() === "") return null;
+  if (!isPageKind(entry.kind)) return null;
+  const summary = typeof entry.summary === "string" ? entry.summary : "";
+  const relatedSlugs = Array.isArray(entry.relatedSlugs) ? entry.relatedSlugs.filter((slug) => typeof slug === "string") : void 0;
+  return { title: entry.title, kind: entry.kind, summary, relatedSlugs };
+}
+function normalizeSeedPages(entries) {
+  if (!Array.isArray(entries)) return [];
+  return entries.map(normalizeSeedPage).filter((entry) => entry !== null);
+}
+function applyOverrides(defaults, overrides, loadedFrom) {
+  const defaultKind = isPageKind(overrides.defaultKind) ? overrides.defaultKind : defaults.defaultKind;
+  return {
+    version: 1,
+    defaultKind,
+    kinds: mergeKinds(defaults.kinds, overrides.kinds),
+    seedPages: normalizeSeedPages(overrides.seedPages),
+    loadedFrom
+  };
+}
+async function loadSchema(root) {
+  const defaults = buildDefaultSchema();
+  const schemaPath = findSchemaPath(root);
+  if (!schemaPath) return defaults;
+  const raw = await readFile10(schemaPath, "utf-8");
+  const parsed = parseSchemaFile(schemaPath, raw);
+  return applyOverrides(defaults, parsed, schemaPath);
+}
+function defaultSchemaInitPath(root) {
+  return path12.join(root, SCHEMA_CANDIDATE_PATHS[0]);
+}
+// src/schema/helpers.ts
+import yaml3 from "js-yaml";
+var WIKILINK_PATTERN = /\[\[([^\]]+)\]\]/g;
+function resolvePageKind(rawKind, schema) {
+  if (typeof rawKind === "string" && PAGE_KINDS.includes(rawKind)) {
+    return rawKind;
+  }
+  return schema.defaultKind;
+}
+function countWikilinks(body) {
+  const matches = body.match(WIKILINK_PATTERN);
+  return matches ? matches.length : 0;
+}
+function serializeSchemaToYaml(schema) {
+  const serializable = {
+    version: schema.version,
+    defaultKind: schema.defaultKind,
+    kinds: schema.kinds,
+    seedPages: schema.seedPages
+  };
+  return yaml3.dump(serializable, { lineWidth: -1, quotingType: '"' });
+}
+// src/compiler/deps.ts
+function buildConceptToSourcesMap(sources) {
+  const conceptMap = /* @__PURE__ */ new Map();
+  for (const [sourceFile, entry] of Object.entries(sources)) {
+    for (const slug of entry.concepts) {
+      const existing = conceptMap.get(slug);
+      if (existing) {
+        existing.push(sourceFile);
+      } else {
+        conceptMap.set(slug, [sourceFile]);
+      }
+    }
+  }
+  return conceptMap;
+}
+function filesByStatus(changes, ...statuses) {
+  const statusSet = new Set(statuses);
+  return new Set(
+    changes.filter((c) => statusSet.has(c.status)).map((c) => c.file)
+  );
+}
+function collectSharedContributors(sourceFile, state, conceptMap, excludeSets, out) {
+  const sourceEntry = state.sources[sourceFile];
+  if (!sourceEntry) return;
+  for (const slug of sourceEntry.concepts) {
+    const contributors = conceptMap.get(slug);
+    if (!contributors || contributors.length < 2) continue;
+    for (const contributor of contributors) {
+      const isExcluded = excludeSets.some((s) => s.has(contributor));
+      if (!isExcluded) out.add(contributor);
+    }
+  }
+}
+function findAffectedSources(state, directChanges) {
+  const changedFiles = filesByStatus(directChanges, "new", "changed");
+  const deletedFiles = filesByStatus(directChanges, "deleted");
+  const conceptMap = buildConceptToSourcesMap(state.sources);
+  const affected = /* @__PURE__ */ new Set();
+  for (const changedFile of changedFiles) {
+    collectSharedContributors(
+      changedFile,
+      state,
+      conceptMap,
+      [changedFiles, deletedFiles, affected],
+      affected
+    );
+  }
+  return Array.from(affected);
+}
+function findFrozenSlugs(state, changes) {
+  const frozen = new Set(state.frozenSlugs ?? []);
+  const deletedFiles = changes.filter((c) => c.status === "deleted").map((c) => c.file);
+  const conceptMap = buildConceptToSourcesMap(state.sources);
+  for (const file of deletedFiles) {
+    const entry = state.sources[file];
+    if (!entry) continue;
+    for (const slug of entry.concepts) {
+      const contributors = conceptMap.get(slug);
+      if (contributors && contributors.length > 1) {
+        frozen.add(slug);
+      }
+    }
+  }
+  return frozen;
+}
+async function persistFrozenSlugs(root, frozenSlugs, successfulExtractions) {
+  const currentState = await readState(root);
+  const conceptMap = buildConceptToSourcesMap(currentState.sources);
+  const extractedBy = /* @__PURE__ */ new Set();
+  for (const result of successfulExtractions) {
+    if (result.concepts.length === 0) continue;
+    for (const c of result.concepts) {
+      extractedBy.add(slugify(c.concept));
+    }
+  }
+  const compiledFiles = new Set(
+    successfulExtractions.filter((r) => r.concepts.length > 0).map((r) => r.sourceFile)
+  );
+  const remaining = /* @__PURE__ */ new Set();
+  for (const slug of frozenSlugs) {
+    const owners = conceptMap.get(slug) ?? [];
+    const allOwnersCompiled = owners.length > 0 && owners.every((f) => compiledFiles.has(f)) && extractedBy.has(slug);
+    if (!allOwnersCompiled) remaining.add(slug);
+  }
+  const stateToSave = { ...currentState, frozenSlugs: Array.from(remaining) };
+  await writeState(root, stateToSave);
+}
+function collectFreshSlugs(extractions, state) {
+  const freshSlugs = /* @__PURE__ */ new Set();
+  for (const result of extractions) {
+    const oldConcepts = new Set(state.sources[result.sourceFile]?.concepts ?? []);
+    for (const c of result.concepts) {
+      const slug = slugify(c.concept);
+      if (!oldConcepts.has(slug)) freshSlugs.add(slug);
+    }
+  }
+  return freshSlugs;
+}
+function findSlugOwners(slugs, conceptMap, excludeSets) {
+  const affected = /* @__PURE__ */ new Set();
+  for (const slug of slugs) {
+    const owners = conceptMap.get(slug);
+    if (!owners) continue;
+    for (const owner of owners) {
+      const isExcluded = excludeSets.some((s) => s.has(owner));
+      if (!isExcluded) affected.add(owner);
+    }
+  }
+  return Array.from(affected);
+}
+function findLateAffectedSources(extractions, state, allChanges) {
+  const compilingFiles = filesByStatus(allChanges, "new", "changed");
+  const deletedFiles = filesByStatus(allChanges, "deleted");
+  const conceptMap = buildConceptToSourcesMap(state.sources);
+  const freshSlugs = collectFreshSlugs(extractions, state);
+  return findSlugOwners(freshSlugs, conceptMap, [compilingFiles, deletedFiles]);
+}
+function findSharedConcepts(sourceFile, state) {
+  const shared = /* @__PURE__ */ new Set();
+  const sourceEntry = state.sources[sourceFile];
+  if (!sourceEntry) return shared;
+  const conceptMap = buildConceptToSourcesMap(state.sources);
+  for (const slug of sourceEntry.concepts) {
+    const contributors = conceptMap.get(slug);
+    if (contributors && contributors.length > 1) {
+      shared.add(slug);
+    }
+  }
+  return shared;
+}
+async function freezeFailedExtractions(root, results, frozenSlugs) {
+  for (const result of results) {
+    if (result.concepts.length > 0) continue;
+    status("!", warn(`${result.sourceFile}: no concepts \u2014 will retry.`));
+    const currentState = await readState(root);
+    const oldConcepts = currentState.sources[result.sourceFile]?.concepts ?? [];
+    for (const slug of oldConcepts) frozenSlugs.add(slug);
+    await updateSourceState(root, result.sourceFile, {
+      hash: "",
+      concepts: oldConcepts,
+      compiledAt: (/* @__PURE__ */ new Date()).toISOString()
+    });
+  }
+}
+// src/compiler/orphan.ts
+import path13 from "path";
+async function markOrphaned(root, sourceFile, state) {
+  const sourceEntry = state.sources[sourceFile];
+  if (!sourceEntry) return;
+  const sharedSlugs = findSharedConcepts(sourceFile, state);
+  for (const slug of sourceEntry.concepts) {
+    if (sharedSlugs.has(slug)) {
+      status("i", dim(`Kept: ${slug}.md (shared with other sources)`));
+      continue;
+    }
+    await orphanPage(root, slug, "source deleted");
+  }
+  await removeSourceState(root, sourceFile);
+}
+async function orphanUnownedFrozenPages(root, frozenSlugs) {
+  const currentState = await readState(root);
+  const ownedSlugs = /* @__PURE__ */ new Set();
+  for (const entry of Object.values(currentState.sources)) {
+    for (const slug of entry.concepts) ownedSlugs.add(slug);
+  }
+  for (const slug of frozenSlugs) {
+    if (ownedSlugs.has(slug)) continue;
+    await orphanPage(root, slug, "no remaining sources");
+  }
+}
+async function orphanPage(root, slug, reason) {
+  const pagePath = path13.join(root, CONCEPTS_DIR, `${slug}.md`);
+  const content = await safeReadFile(pagePath);
+  if (!content) return;
+  const { meta } = parseFrontmatter(content);
+  if (meta.orphaned === true) return;
+  const updated = content.replace("---\n", "---\norphaned: true\n");
+  await atomicWrite(pagePath, updated);
+  status("\u26A0", warn(`Orphaned: ${slug}.md (${reason})`));
+}
+// src/compiler/resolver.ts
+import { readdir as readdir2, readFile as readFile11 } from "fs/promises";
+import path14 from "path";
+import { existsSync as existsSync3 } from "fs";
+async function buildTitleIndex(root) {
+  const conceptsDir = path14.join(root, CONCEPTS_DIR);
+  if (!existsSync3(conceptsDir)) return [];
+  const files = await readdir2(conceptsDir);
+  const pages = [];
+  for (const file of files) {
+    if (!file.endsWith(".md")) continue;
+    const filePath = path14.join(conceptsDir, file);
+    const content = await readFile11(filePath, "utf-8");
+    const { meta } = parseFrontmatter(content);
+    if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
+      pages.push({
+        slug: file.replace(/\.md$/, ""),
+        title: meta.title,
+        filePath
+      });
+    }
+  }
+  return pages;
+}
+function isInsideWikilink(text, position) {
+  const before = text.lastIndexOf("[[", position);
+  const after = text.indexOf("]]", position);
+  if (before === -1 || after === -1) return false;
+  const closeBefore = text.indexOf("]]", before);
+  return closeBefore >= position;
+}
+function isInsideCitation(text, position) {
+  const before = text.lastIndexOf("^[", position);
+  const after = text.indexOf("]", position);
+  if (before === -1 || after === -1) return false;
+  const closeBefore = text.indexOf("]", before);
+  return closeBefore >= position;
+}
+function isWordBoundary(text, start, end) {
+  const before = start === 0 || /[\s,.:;!?()\[\]{}/"']/.test(text[start - 1]);
+  const after = end >= text.length || /[\s,.:;!?()\[\]{}/"']/.test(text[end]);
+  return before && after;
+}
+function findTitleMatches(text, title) {
+  const escaped = title.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+  const regex = new RegExp(escaped, "gi");
+  const matches = [];
+  let match;
+  while ((match = regex.exec(text)) !== null) {
+    matches.push({ start: match.index, end: match.index + match[0].length });
+  }
+  return matches;
+}
+function isLinkablePosition(text, start, end) {
+  if (isInsideWikilink(text, start)) return false;
+  if (isInsideCitation(text, start)) return false;
+  return isWordBoundary(text, start, end);
+}
+function addWikilinks(body, titles, selfTitle) {
+  let result = body;
+  const selfLower = selfTitle.toLowerCase();
+  for (const page of titles) {
+    if (page.title.toLowerCase() === selfLower) continue;
+    const matches = findTitleMatches(result, page.title);
+    for (const m of matches.reverse()) {
+      if (!isLinkablePosition(result, m.start, m.end)) continue;
+      result = result.slice(0, m.start) + `[[${page.slug}|${page.title}]]` + result.slice(m.end);
+    }
+  }
+  return result;
+}
+async function resolveLinks(root, changedSlugs, newSlugs) {
+  const titleIndex = await buildTitleIndex(root);
+  if (titleIndex.length === 0) return 0;
+  let linkCount = 0;
+  linkCount += await resolveOutboundLinks(titleIndex, changedSlugs);
+  linkCount += await resolveInboundLinks(titleIndex, newSlugs);
+  if (linkCount > 0) {
+    status("\u{1F517}", dim(`Resolved links in ${linkCount} page(s)`));
+  }
+  return linkCount;
+}
+async function resolveOutboundLinks(titleIndex, changedSlugs) {
+  let count = 0;
+  for (const page of titleIndex) {
+    if (!changedSlugs.includes(page.slug)) continue;
+    const didLink = await linkPage(page, titleIndex);
+    if (didLink) count++;
+  }
+  return count;
 }
-function coerceContradictedBy(raw) {
-  if (!Array.isArray(raw)) return void 0;
-  const refs = [];
-  for (const entry of raw) {
-    if (!entry || typeof entry !== "object") continue;
-    const obj = entry;
-    if (typeof obj.slug !== "string" || obj.slug.trim().length === 0) continue;
-    const ref = { slug: obj.slug.trim() };
-    if (typeof obj.reason === "string") ref.reason = obj.reason;
-    refs.push(ref);
+async function resolveInboundLinks(titleIndex, newSlugs) {
+  if (newSlugs.length === 0) return 0;
+  const newTitles = titleIndex.filter((p) => newSlugs.includes(p.slug));
+  if (newTitles.length === 0) return 0;
+  let count = 0;
+  for (const page of titleIndex) {
+    if (newSlugs.includes(page.slug)) continue;
+    const content = await readFile11(page.filePath, "utf-8");
+    const { body } = parseFrontmatter(content);
+    const linked = addWikilinks(body, newTitles, page.title);
+    if (linked !== body) {
+      const newContent = content.replace(body, linked);
+      await atomicWrite(page.filePath, newContent);
+      count++;
+    }
   }
-  return refs.length > 0 ? refs : void 0;
+  return count;
 }
-function mapRawConcept(c) {
-  const provenance = typeof c.provenance_state === "string" && PROVENANCE_STATE_VALUES.includes(c.provenance_state) ? c.provenance_state : void 0;
-  return {
-    concept: c.concept,
-    summary: c.summary,
-    is_new: c.is_new,
-    tags: Array.isArray(c.tags) ? c.tags : void 0,
-    confidence: typeof c.confidence === "number" ? c.confidence : void 0,
-    provenanceState: provenance,
-    contradictedBy: coerceContradictedBy(c.contradicted_by),
-    inferredParagraphs: typeof c.inferred_paragraphs === "number" && Number.isInteger(c.inferred_paragraphs) && c.inferred_paragraphs >= 0 ? c.inferred_paragraphs : void 0
-  };
+async function linkPage(page, titleIndex) {
+  const content = await readFile11(page.filePath, "utf-8");
+  const { body } = parseFrontmatter(content);
+  const linked = addWikilinks(body, titleIndex, page.title);
+  if (linked === body) return false;
+  const newContent = content.replace(body, linked);
+  await atomicWrite(page.filePath, newContent);
+  return true;
 }
-function parseConcepts(toolOutput) {
+// src/compiler/indexgen.ts
+import { readdir as readdir3 } from "fs/promises";
+import path15 from "path";
+async function generateIndex(root) {
+  status("*", info("Generating index..."));
+  const conceptsPath = path15.join(root, CONCEPTS_DIR);
+  const queriesPath = path15.join(root, QUERIES_DIR);
+  const concepts = await collectPageSummaries(conceptsPath);
+  const queries = await collectPageSummaries(queriesPath);
+  concepts.sort((a, b) => a.title.localeCompare(b.title));
+  queries.sort((a, b) => a.title.localeCompare(b.title));
+  const indexContent = buildIndexContent(concepts, queries);
+  const indexPath = path15.join(root, INDEX_FILE);
+  await atomicWrite(indexPath, indexContent);
+  const total = concepts.length + queries.length;
+  status("+", success(`Index updated with ${total} pages.`));
+}
+async function scanWikiPages(dirPath) {
+  let files;
   try {
-    const parsed = JSON.parse(toolOutput);
-    const concepts = parsed.concepts ?? [];
-    return concepts.filter(isValidRawConcept).map(mapRawConcept);
+    files = await readdir3(dirPath);
   } catch {
     return [];
   }
-}
-// src/compiler/deps.ts
-function buildConceptToSourcesMap(sources) {
-  const conceptMap = /* @__PURE__ */ new Map();
-  for (const [sourceFile, entry] of Object.entries(sources)) {
-    for (const slug of entry.concepts) {
-      const existing = conceptMap.get(slug);
-      if (existing) {
-        existing.push(sourceFile);
-      } else {
-        conceptMap.set(slug, [sourceFile]);
-      }
-    }
+  const scanned = [];
+  for (const file of files.filter((f) => f.endsWith(".md"))) {
+    const content = await safeReadFile(path15.join(dirPath, file));
+    const { meta } = parseFrontmatter(content);
+    scanned.push({ slug: file.replace(/\.md$/, ""), meta });
   }
-  return conceptMap;
+  return scanned;
 }
-function filesByStatus(changes, ...statuses) {
-  const statusSet = new Set(statuses);
-  return new Set(
-    changes.filter((c) => statusSet.has(c.status)).map((c) => c.file)
-  );
+async function collectPageSummaries(conceptsPath) {
+  const scanned = await scanWikiPages(conceptsPath);
+  return scanned.filter(({ meta }) => meta.title && typeof meta.title === "string" && !meta.orphaned).map(({ slug, meta }) => ({
+    title: meta.title,
+    slug,
+    summary: typeof meta.summary === "string" ? meta.summary : ""
+  }));
 }
-function collectSharedContributors(sourceFile, state, conceptMap, excludeSets, out) {
-  const sourceEntry = state.sources[sourceFile];
-  if (!sourceEntry) return;
-  for (const slug of sourceEntry.concepts) {
-    const contributors = conceptMap.get(slug);
-    if (!contributors || contributors.length < 2) continue;
-    for (const contributor of contributors) {
-      const isExcluded = excludeSets.some((s) => s.has(contributor));
-      if (!isExcluded) out.add(contributor);
-    }
-  }
+function stripWikilinks(text) {
+  return text.replace(/\[\[([^\]]+)\]\]/g, "$1");
 }
-function findAffectedSources(state, directChanges) {
-  const changedFiles = filesByStatus(directChanges, "new", "changed");
-  const deletedFiles = filesByStatus(directChanges, "deleted");
-  const conceptMap = buildConceptToSourcesMap(state.sources);
-  const affected = /* @__PURE__ */ new Set();
-  for (const changedFile of changedFiles) {
-    collectSharedContributors(
-      changedFile,
-      state,
-      conceptMap,
-      [changedFiles, deletedFiles, affected],
-      affected
-    );
+function buildIndexContent(concepts, queries) {
+  const lines = ["# Knowledge Wiki", "", "## Concepts", ""];
+  for (const page of concepts) {
+    lines.push(`- **[[${page.slug}|${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
   }
-  return Array.from(affected);
-}
-function findFrozenSlugs(state, changes) {
-  const frozen = new Set(state.frozenSlugs ?? []);
-  const deletedFiles = changes.filter((c) => c.status === "deleted").map((c) => c.file);
-  const conceptMap = buildConceptToSourcesMap(state.sources);
-  for (const file of deletedFiles) {
-    const entry = state.sources[file];
-    if (!entry) continue;
-    for (const slug of entry.concepts) {
-      const contributors = conceptMap.get(slug);
-      if (contributors && contributors.length > 1) {
-        frozen.add(slug);
-      }
+  if (queries.length > 0) {
+    lines.push("", "## Saved Queries", "");
+    for (const page of queries) {
+      lines.push(`- **[[${page.slug}|${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
     }
   }
-  return frozen;
+  const total = concepts.length + queries.length;
+  lines.push("");
+  lines.push(`_${total} pages | Generated ${(/* @__PURE__ */ new Date()).toISOString()}_`);
+  lines.push("");
+  return lines.join("\n");
 }
-async function persistFrozenSlugs(root, frozenSlugs, successfulExtractions) {
-  const currentState = await readState(root);
-  const conceptMap = buildConceptToSourcesMap(currentState.sources);
-  const extractedBy = /* @__PURE__ */ new Set();
-  for (const result of successfulExtractions) {
-    if (result.concepts.length === 0) continue;
-    for (const c of result.concepts) {
-      extractedBy.add(slugify(c.concept));
-    }
+// src/compiler/obsidian.ts
+import { readdir as readdir4 } from "fs/promises";
+import path16 from "path";
+var ABBREVIATION_MIN_WORDS = 3;
+var SWAP_CONJUNCTIONS = [" and ", " or "];
+function addObsidianMeta(frontmatter, conceptTitle, tags) {
+  frontmatter.tags = tags;
+  frontmatter.aliases = generateAliases(conceptTitle);
+}
+function generateAliases(title) {
+  const aliases = [];
+  const slug = slugify(title);
+  if (slug !== title) {
+    aliases.push(slug);
   }
-  const compiledFiles = new Set(
-    successfulExtractions.filter((r) => r.concepts.length > 0).map((r) => r.sourceFile)
-  );
-  const remaining = /* @__PURE__ */ new Set();
-  for (const slug of frozenSlugs) {
-    const owners = conceptMap.get(slug) ?? [];
-    const allOwnersCompiled = owners.length > 0 && owners.every((f) => compiledFiles.has(f)) && extractedBy.has(slug);
-    if (!allOwnersCompiled) remaining.add(slug);
+  const swapAlias = generateSwapAlias(title);
+  if (swapAlias) {
+    aliases.push(swapAlias);
   }
-  const stateToSave = { ...currentState, frozenSlugs: Array.from(remaining) };
-  await writeState(root, stateToSave);
+  const abbreviation = generateAbbreviation(title);
+  if (abbreviation) {
+    aliases.push(abbreviation);
+  }
+  return aliases;
 }
-function collectFreshSlugs(extractions, state) {
-  const freshSlugs = /* @__PURE__ */ new Set();
-  for (const result of extractions) {
-    const oldConcepts = new Set(state.sources[result.sourceFile]?.concepts ?? []);
-    for (const c of result.concepts) {
-      const slug = slugify(c.concept);
-      if (!oldConcepts.has(slug)) freshSlugs.add(slug);
-    }
+function generateSwapAlias(title) {
+  for (const conjunction of SWAP_CONJUNCTIONS) {
+    const index = title.toLowerCase().indexOf(conjunction);
+    if (index === -1) continue;
+    const before = title.slice(0, index);
+    const after = title.slice(index + conjunction.length);
+    const originalConjunction = title.slice(index, index + conjunction.length);
+    return `${after}${originalConjunction}${before}`;
   }
-  return freshSlugs;
+  return null;
+}
+function generateAbbreviation(title) {
+  const words = title.split(/\s+/);
+  if (words.length < ABBREVIATION_MIN_WORDS) return null;
+  const abbreviation = words.map((w) => w[0].toUpperCase()).join("");
+  if (abbreviation === title) return null;
+  return abbreviation;
+}
+async function generateMOC(root) {
+  const conceptsPath = path16.join(root, CONCEPTS_DIR);
+  const pages = await loadConceptPages(conceptsPath);
+  const tagGroups = groupPagesByTag(pages);
+  const content = buildMOCContent(tagGroups);
+  await atomicWrite(path16.join(root, MOC_FILE), content);
+}
+async function loadConceptPages(conceptsPath) {
+  let files;
+  try {
+    files = await readdir4(conceptsPath);
+  } catch {
+    return [];
+  }
+  const pages = [];
+  for (const file of files) {
+    if (!file.endsWith(".md")) continue;
+    const content = await safeReadFile(path16.join(conceptsPath, file));
+    if (!content) continue;
+    const { meta } = parseFrontmatter(content);
+    if (meta.orphaned) continue;
+    const slug = file.replace(/\.md$/, "");
+    const title = typeof meta.title === "string" ? meta.title : slug;
+    const tags = Array.isArray(meta.tags) ? meta.tags : [];
+    pages.push({ slug, title, tags });
+  }
+  return pages;
 }
-function findSlugOwners(slugs, conceptMap, excludeSets) {
-  const affected = /* @__PURE__ */ new Set();
-  for (const slug of slugs) {
-    const owners = conceptMap.get(slug);
-    if (!owners) continue;
-    for (const owner of owners) {
-      const isExcluded = excludeSets.some((s) => s.has(owner));
-      if (!isExcluded) affected.add(owner);
+function groupPagesByTag(pages) {
+  const groups = /* @__PURE__ */ new Map();
+  for (const page of pages) {
+    if (page.tags.length === 0) {
+      appendToGroup(groups, "Uncategorized", page);
+      continue;
+    }
+    for (const tag of page.tags) {
+      appendToGroup(groups, tag, page);
     }
   }
-  return Array.from(affected);
+  return groups;
 }
-function findLateAffectedSources(extractions, state, allChanges) {
-  const compilingFiles = filesByStatus(allChanges, "new", "changed");
-  const deletedFiles = filesByStatus(allChanges, "deleted");
-  const conceptMap = buildConceptToSourcesMap(state.sources);
-  const freshSlugs = collectFreshSlugs(extractions, state);
-  return findSlugOwners(freshSlugs, conceptMap, [compilingFiles, deletedFiles]);
+function appendToGroup(groups, key, page) {
+  const existing = groups.get(key);
+  if (existing) {
+    existing.push(page);
+  } else {
+    groups.set(key, [page]);
+  }
 }
-function findSharedConcepts(sourceFile, state) {
-  const shared = /* @__PURE__ */ new Set();
-  const sourceEntry = state.sources[sourceFile];
-  if (!sourceEntry) return shared;
-  const conceptMap = buildConceptToSourcesMap(state.sources);
-  for (const slug of sourceEntry.concepts) {
-    const contributors = conceptMap.get(slug);
-    if (contributors && contributors.length > 1) {
-      shared.add(slug);
+function buildMOCContent(tagGroups) {
+  const lines = ["# Map of Content", ""];
+  const sortedTags = [...tagGroups.keys()].sort((a, b) => {
+    if (a === "Uncategorized") return 1;
+    if (b === "Uncategorized") return -1;
+    return a.localeCompare(b);
+  });
+  for (const tag of sortedTags) {
+    const pages = tagGroups.get(tag) ?? [];
+    lines.push(`## ${tag}`, "");
+    for (const page of pages.sort((a, b) => a.title.localeCompare(b.title))) {
+      lines.push(`- [[${page.slug}|${page.title}]]`);
     }
+    lines.push("");
   }
-  return shared;
+  return lines.join("\n");
 }
-async function freezeFailedExtractions(root, results, frozenSlugs) {
-  for (const result of results) {
-    if (result.concepts.length > 0) continue;
-    status("!", warn(`${result.sourceFile}: no concepts \u2014 will retry.`));
-    const currentState = await readState(root);
-    const oldConcepts = currentState.sources[result.sourceFile]?.concepts ?? [];
-    for (const slug of oldConcepts) frozenSlugs.add(slug);
-    await updateSourceState(root, result.sourceFile, {
-      hash: "",
-      concepts: oldConcepts,
-      compiledAt: (/* @__PURE__ */ new Date()).toISOString()
-    });
+// src/utils/embeddings.ts
+import { readFile as readFile12, readdir as readdir5 } from "fs/promises";
+import { existsSync as existsSync4 } from "fs";
+import path17 from "path";
+// src/utils/retrieval.ts
+import { createHash as createHash2 } from "crypto";
+function hashChunkText(text) {
+  return createHash2("sha256").update(text, "utf8").digest("hex").slice(0, 16);
+}
+function splitIntoChunks(body) {
+  const paragraphs = extractParagraphs(body);
+  if (paragraphs.length === 0) return [];
+  const chunks = [];
+  let buffer = "";
+  for (const paragraph of paragraphs) {
+    for (const piece of splitOversizedParagraph(paragraph)) {
+      buffer = appendParagraph(buffer, piece, chunks);
+    }
   }
+  if (buffer.length > 0) chunks.push(buffer);
+  return mergeTrailingFragment(chunks);
 }
+function appendParagraph(buffer, paragraph, chunks) {
+  const candidate = buffer ? `${buffer}
-// src/compiler/orphan.ts
-import path9 from "path";
-async function markOrphaned(root, sourceFile, state) {
-  const sourceEntry = state.sources[sourceFile];
-  if (!sourceEntry) return;
-  const sharedSlugs = findSharedConcepts(sourceFile, state);
-  for (const slug of sourceEntry.concepts) {
-    if (sharedSlugs.has(slug)) {
-      status("i", dim(`Kept: ${slug}.md (shared with other sources)`));
-      continue;
+${paragraph}` : paragraph;
+  if (candidate.length <= CHUNK_TARGET_CHARS) return candidate;
+  if (buffer.length > 0) {
+    chunks.push(buffer);
+    return paragraph;
+  }
+  chunks.push(candidate);
+  return "";
+}
+function mergeTrailingFragment(chunks) {
+  if (chunks.length < 2) return chunks;
+  const last = chunks[chunks.length - 1];
+  if (last.length >= CHUNK_MIN_CHARS) return chunks;
+  const previous = chunks[chunks.length - 2];
+  if (previous.length + last.length + 2 > CHUNK_MAX_CHARS) return chunks;
+  const merged = chunks.slice(0, -2);
+  merged.push(`${previous}
+${last}`);
+  return merged;
+}
+function extractParagraphs(body) {
+  return body.split(/\n{2,}/).map((p) => p.trim()).filter((p) => p.length > 0);
+}
+function splitOversizedParagraph(paragraph) {
+  if (paragraph.length <= CHUNK_MAX_CHARS) return [paragraph];
+  const sentences = paragraph.split(/(?<=[.!?])\s+/);
+  const pieces = [];
+  let buffer = "";
+  for (const sentence of sentences) {
+    if ((buffer + " " + sentence).length > CHUNK_MAX_CHARS && buffer.length > 0) {
+      pieces.push(buffer.trim());
+      buffer = sentence;
+    } else {
+      buffer = buffer ? `${buffer} ${sentence}` : sentence;
     }
-    await orphanPage(root, slug, "source deleted");
   }
-  await removeSourceState(root, sourceFile);
+  if (buffer.length > 0) pieces.push(buffer.trim());
+  return pieces.flatMap(hardCut);
 }
-async function orphanUnownedFrozenPages(root, frozenSlugs) {
-  const currentState = await readState(root);
-  const ownedSlugs = /* @__PURE__ */ new Set();
-  for (const entry of Object.values(currentState.sources)) {
-    for (const slug of entry.concepts) ownedSlugs.add(slug);
+function hardCut(text) {
+  if (text.length <= CHUNK_MAX_CHARS) return [text];
+  const pieces = [];
+  for (let start = 0; start < text.length; start += CHUNK_MAX_CHARS) {
+    pieces.push(text.slice(start, start + CHUNK_MAX_CHARS));
   }
-  for (const slug of frozenSlugs) {
-    if (ownedSlugs.has(slug)) continue;
-    await orphanPage(root, slug, "no remaining sources");
+  return pieces;
+}
+function rerankWithBm25(query, candidates) {
+  if (candidates.length === 0) return [];
+  const queryTerms = tokenize(query);
+  if (queryTerms.length === 0) {
+    return candidates.map((candidate) => ({ candidate, score: candidate.baseScore }));
   }
+  const docs = candidates.map((c) => tokenize(c.text));
+  const stats = buildCorpusStats(docs);
+  return rankByBm25Score(candidates, docs, queryTerms, stats);
 }
-async function orphanPage(root, slug, reason) {
-  const pagePath = path9.join(root, CONCEPTS_DIR, `${slug}.md`);
-  const content = await safeReadFile(pagePath);
-  if (!content) return;
-  const { meta } = parseFrontmatter(content);
-  if (meta.orphaned === true) return;
-  const updated = content.replace("---\n", "---\norphaned: true\n");
-  await atomicWrite(pagePath, updated);
-  status("\u26A0", warn(`Orphaned: ${slug}.md (${reason})`));
+function rankByBm25Score(candidates, docs, queryTerms, stats) {
+  const scored = candidates.map((candidate, index) => {
+    const lexical = bm25Score(queryTerms, docs[index], stats);
+    return { candidate, score: lexical + candidate.baseScore * BASE_SCORE_WEIGHT };
+  });
+  scored.sort((a, b) => b.score - a.score);
+  return scored;
+}
+function tokenize(text) {
+  return text.toLowerCase().match(/[a-z0-9]+/g) ?? [];
+}
+function buildCorpusStats(docs) {
+  const docFreq = /* @__PURE__ */ new Map();
+  let totalLen = 0;
+  for (const tokens of docs) {
+    totalLen += tokens.length;
+    const unique = new Set(tokens);
+    for (const term of unique) docFreq.set(term, (docFreq.get(term) ?? 0) + 1);
+  }
+  const totalDocs = docs.length;
+  const avgDocLen = totalDocs > 0 ? totalLen / totalDocs : 0;
+  return { docFreq, avgDocLen, totalDocs };
+}
+var BM25_K1 = 1.5;
+var BM25_B = 0.75;
+var BASE_SCORE_WEIGHT = 0.5;
+function bm25Score(queryTerms, docTokens, stats) {
+  if (docTokens.length === 0 || stats.totalDocs === 0) return 0;
+  const termFreq = countTerms(docTokens);
+  const lengthRatio = docTokens.length / (stats.avgDocLen || 1);
+  let total = 0;
+  for (const term of queryTerms) {
+    const tf = termFreq.get(term) ?? 0;
+    if (tf === 0) continue;
+    const idf = idfWeight(stats.docFreq.get(term) ?? 0, stats.totalDocs);
+    const numerator = tf * (BM25_K1 + 1);
+    const denominator = tf + BM25_K1 * (1 - BM25_B + BM25_B * lengthRatio);
+    total += idf * (numerator / denominator);
+  }
+  return total;
+}
+function idfWeight(docFrequency, totalDocs) {
+  const numerator = totalDocs - docFrequency + 0.5;
+  const denominator = docFrequency + 0.5;
+  return Math.log(1 + numerator / denominator);
+}
+function countTerms(tokens) {
+  const counts = /* @__PURE__ */ new Map();
+  for (const token of tokens) counts.set(token, (counts.get(token) ?? 0) + 1);
+  return counts;
 }
-// src/compiler/resolver.ts
-import { readdir as readdir2, readFile as readFile6 } from "fs/promises";
-import path10 from "path";
-import { existsSync as existsSync2 } from "fs";
-async function buildTitleIndex(root) {
-  const conceptsDir = path10.join(root, CONCEPTS_DIR);
-  if (!existsSync2(conceptsDir)) return [];
-  const files = await readdir2(conceptsDir);
-  const pages = [];
-  for (const file of files) {
-    if (!file.endsWith(".md")) continue;
-    const filePath = path10.join(conceptsDir, file);
-    const content = await readFile6(filePath, "utf-8");
-    const { meta } = parseFrontmatter(content);
-    if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
-      pages.push({
-        slug: file.replace(/\.md$/, ""),
-        title: meta.title,
-        filePath
-      });
-    }
+// src/utils/embeddings.ts
+var STORE_VERSION = 2;
+function cosineSimilarity(a, b) {
+  if (a.length !== b.length || a.length === 0) return 0;
+  let dot = 0;
+  let magA = 0;
+  let magB = 0;
+  for (let i = 0; i < a.length; i++) {
+    dot += a[i] * b[i];
+    magA += a[i] * a[i];
+    magB += b[i] * b[i];
   }
-  return pages;
+  if (magA === 0 || magB === 0) return 0;
+  return dot / (Math.sqrt(magA) * Math.sqrt(magB));
 }
-function isInsideWikilink(text, position) {
-  const before = text.lastIndexOf("[[", position);
-  const after = text.indexOf("]]", position);
-  if (before === -1 || after === -1) return false;
-  const closeBefore = text.indexOf("]]", before);
-  return closeBefore >= position;
+function findTopK(queryVec, store, k) {
+  const scored = store.entries.map((entry) => ({
+    entry,
+    score: cosineSimilarity(queryVec, entry.vector)
+  }));
+  scored.sort((left, right) => right.score - left.score);
+  return scored.slice(0, k).map((item) => item.entry);
 }
-function isInsideCitation(text, position) {
-  const before = text.lastIndexOf("^[", position);
-  const after = text.indexOf("]", position);
-  if (before === -1 || after === -1) return false;
-  const closeBefore = text.indexOf("]", before);
-  return closeBefore >= position;
+function findTopKChunks(queryVec, chunks, k) {
+  const scored = chunks.map((chunk) => ({
+    chunk,
+    score: cosineSimilarity(queryVec, chunk.vector)
+  }));
+  scored.sort((left, right) => right.score - left.score);
+  return scored.slice(0, k);
 }
-function isWordBoundary(text, start, end) {
-  const before = start === 0 || /[\s,.:;!?()\[\]{}/"']/.test(text[start - 1]);
-  const after = end >= text.length || /[\s,.:;!?()\[\]{}/"']/.test(text[end]);
-  return before && after;
+async function readEmbeddingStore(root) {
+  const filePath = path17.join(root, EMBEDDINGS_FILE);
+  if (!existsSync4(filePath)) return null;
+  const raw = await readFile12(filePath, "utf-8");
+  return JSON.parse(raw);
 }
-function findTitleMatches(text, title) {
-  const escaped = title.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-  const regex = new RegExp(escaped, "gi");
-  const matches = [];
-  let match;
-  while ((match = regex.exec(text)) !== null) {
-    matches.push({ start: match.index, end: match.index + match[0].length });
-  }
-  return matches;
+async function writeEmbeddingStore(root, store) {
+  const filePath = path17.join(root, EMBEDDINGS_FILE);
+  await atomicWrite(filePath, JSON.stringify(store, null, 2));
 }
-function isLinkablePosition(text, start, end) {
-  if (isInsideWikilink(text, start)) return false;
-  if (isInsideCitation(text, start)) return false;
-  return isWordBoundary(text, start, end);
+async function findRelevantPages(root, question) {
+  const store = await loadActiveStore(root, (s) => s.entries.length > 0);
+  if (!store) return [];
+  const queryVec = await getProvider().embed(question);
+  return findTopK(queryVec, store, EMBEDDING_TOP_K).map((entry) => ({
+    slug: entry.slug,
+    title: entry.title,
+    summary: entry.summary
+  }));
 }
-function addWikilinks(body, titles, selfTitle) {
-  let result = body;
-  const selfLower = selfTitle.toLowerCase();
-  for (const page of titles) {
-    if (page.title.toLowerCase() === selfLower) continue;
-    const matches = findTitleMatches(result, page.title);
-    for (const m of matches.reverse()) {
-      if (!isLinkablePosition(result, m.start, m.end)) continue;
-      result = result.slice(0, m.start) + `[[${page.title}]]` + result.slice(m.end);
+async function findRelevantChunks(root, question, k) {
+  const store = await loadActiveStore(root, (s) => Boolean(s.chunks && s.chunks.length > 0));
+  if (!store) return [];
+  const queryVec = await getProvider().embed(question);
+  return findTopKChunks(queryVec, store.chunks ?? [], k);
+}
+async function loadActiveStore(root, hasContent) {
+  const store = await readEmbeddingStore(root);
+  if (!store || !hasContent(store)) return null;
+  const activeModel = resolveEmbeddingModel();
+  if (store.model !== activeModel) {
+    warnStaleEmbeddingStore(store.model, activeModel);
+    return null;
+  }
+  return store;
+}
+async function collectPageRecords(root) {
+  const records = [];
+  for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
+    const absDir = path17.join(root, dir);
+    let files;
+    try {
+      files = await readdir5(absDir);
+    } catch {
+      continue;
+    }
+    for (const file of files.filter((f) => f.endsWith(".md"))) {
+      const record = await readPageRecord(absDir, file);
+      if (record) records.push(record);
     }
   }
-  return result;
+  return records;
 }
-async function resolveLinks(root, changedSlugs, newSlugs) {
-  const titleIndex = await buildTitleIndex(root);
-  if (titleIndex.length === 0) return 0;
-  let linkCount = 0;
-  linkCount += await resolveOutboundLinks(titleIndex, changedSlugs);
-  linkCount += await resolveInboundLinks(titleIndex, newSlugs);
-  if (linkCount > 0) {
-    status("\u{1F517}", dim(`Resolved links in ${linkCount} page(s)`));
+async function readPageRecord(absDir, file) {
+  const content = await safeReadFile(path17.join(absDir, file));
+  const { meta, body } = parseFrontmatter(content);
+  if (meta.orphaned || typeof meta.title !== "string") return null;
+  return {
+    slug: file.replace(/\.md$/, ""),
+    title: meta.title,
+    summary: typeof meta.summary === "string" ? meta.summary : "",
+    body
+  };
+}
+function buildEmbeddingText(record) {
+  return record.summary ? `${record.title}
+${record.summary}` : record.title;
+}
+async function embedPages(records, slugsToEmbed) {
+  const provider = getProvider();
+  const now = (/* @__PURE__ */ new Date()).toISOString();
+  const fresh = [];
+  for (const record of records) {
+    if (!slugsToEmbed.has(record.slug)) continue;
+    const vector = await provider.embed(buildEmbeddingText(record));
+    fresh.push({
+      slug: record.slug,
+      title: record.title,
+      summary: record.summary,
+      vector,
+      updatedAt: now
+    });
+  }
+  return fresh;
+}
+var warnedStaleModels = /* @__PURE__ */ new Set();
+function warnStaleEmbeddingStore(storedModel, activeModel) {
+  const key = `${storedModel}\u2192${activeModel}`;
+  if (warnedStaleModels.has(key)) return;
+  warnedStaleModels.add(key);
+  status(
+    "!",
+    warn(
+      `Embedding store was built with "${storedModel}" but active embedding model is "${activeModel}". Falling back to full-index selection. Run 'llmwiki compile' to rebuild embeddings.`
+    )
+  );
+}
+function resolveEmbeddingModel() {
+  const providerName = getActiveProviderName();
+  const configuredModel = process.env.LLMWIKI_EMBEDDING_MODEL?.trim();
+  if (configuredModel && (providerName === "openai" || providerName === "ollama")) {
+    return configuredModel;
   }
-  return linkCount;
+  return EMBEDDING_MODELS[providerName] ?? EMBEDDING_MODELS.anthropic;
 }
-async function resolveOutboundLinks(titleIndex, changedSlugs) {
-  let count = 0;
-  for (const page of titleIndex) {
-    if (!changedSlugs.includes(page.slug)) continue;
-    const didLink = await linkPage(page, titleIndex);
-    if (didLink) count++;
+function mergeEntries(existing, fresh, liveSlugs) {
+  const bySlug = /* @__PURE__ */ new Map();
+  for (const entry of existing) {
+    if (liveSlugs.has(entry.slug)) bySlug.set(entry.slug, entry);
   }
-  return count;
+  for (const entry of fresh) {
+    bySlug.set(entry.slug, entry);
+  }
+  return Array.from(bySlug.values());
 }
-async function resolveInboundLinks(titleIndex, newSlugs) {
-  if (newSlugs.length === 0) return 0;
-  const newTitles = titleIndex.filter((p) => newSlugs.includes(p.slug));
-  if (newTitles.length === 0) return 0;
-  let count = 0;
-  for (const page of titleIndex) {
-    if (newSlugs.includes(page.slug)) continue;
-    const content = await readFile6(page.filePath, "utf-8");
-    const { body } = parseFrontmatter(content);
-    const linked = addWikilinks(body, newTitles, page.title);
-    if (linked !== body) {
-      const newContent = content.replace(body, linked);
-      await atomicWrite(page.filePath, newContent);
-      count++;
+async function refreshChunkEmbeddings(records, existing, forceAll) {
+  const liveSlugs = new Set(records.map((r) => r.slug));
+  const existingByKey = indexChunksByKey(existing.filter((c) => liveSlugs.has(c.slug)));
+  const now = (/* @__PURE__ */ new Date()).toISOString();
+  const fresh = [];
+  for (const record of records) {
+    const pageChunks = await embedRecordChunks(record, existingByKey, forceAll, now);
+    fresh.push(...pageChunks);
+  }
+  return fresh;
+}
+async function embedRecordChunks(record, existingByKey, forceAll, now) {
+  const provider = getProvider();
+  const chunkTexts = splitIntoChunks(record.body);
+  const out = [];
+  for (let i = 0; i < chunkTexts.length; i++) {
+    const text = chunkTexts[i];
+    const contentHash = hashChunkText(text);
+    const reused = pickReusableChunk(existingByKey, record.slug, i, contentHash, forceAll);
+    if (reused) {
+      out.push({ ...reused, title: record.title });
+      continue;
     }
+    const vector = await provider.embed(text);
+    out.push({
+      slug: record.slug,
+      title: record.title,
+      chunkIndex: i,
+      contentHash,
+      text,
+      vector,
+      updatedAt: now
+    });
   }
-  return count;
+  return out;
 }
-async function linkPage(page, titleIndex) {
-  const content = await readFile6(page.filePath, "utf-8");
-  const { body } = parseFrontmatter(content);
-  const linked = addWikilinks(body, titleIndex, page.title);
-  if (linked === body) return false;
-  const newContent = content.replace(body, linked);
-  await atomicWrite(page.filePath, newContent);
-  return true;
+function indexChunksByKey(chunks) {
+  const byKey = /* @__PURE__ */ new Map();
+  for (const chunk of chunks) byKey.set(chunkKey(chunk.slug, chunk.chunkIndex), chunk);
+  return byKey;
 }
-// src/compiler/indexgen.ts
-import { readdir as readdir3 } from "fs/promises";
-import path11 from "path";
-async function generateIndex(root) {
-  status("*", info("Generating index..."));
-  const conceptsPath = path11.join(root, CONCEPTS_DIR);
-  const queriesPath = path11.join(root, QUERIES_DIR);
-  const concepts = await collectPageSummaries(conceptsPath);
-  const queries = await collectPageSummaries(queriesPath);
-  concepts.sort((a, b) => a.title.localeCompare(b.title));
-  queries.sort((a, b) => a.title.localeCompare(b.title));
-  const indexContent = buildIndexContent(concepts, queries);
-  const indexPath = path11.join(root, INDEX_FILE);
-  await atomicWrite(indexPath, indexContent);
-  const total = concepts.length + queries.length;
-  status("+", success(`Index updated with ${total} pages.`));
+function chunkKey(slug, chunkIndex) {
+  return `${slug}#${chunkIndex}`;
 }
-async function scanWikiPages(dirPath) {
-  let files;
-  try {
-    files = await readdir3(dirPath);
-  } catch {
-    return [];
+function pickReusableChunk(byKey, slug, chunkIndex, contentHash, forceAll) {
+  if (forceAll) return null;
+  const existing = byKey.get(chunkKey(slug, chunkIndex));
+  if (!existing) return null;
+  return existing.contentHash === contentHash ? existing : null;
+}
+async function updateEmbeddings(root, changedSlugs) {
+  const records = await collectPageRecords(root);
+  const liveSlugs = new Set(records.map((r) => r.slug));
+  const embeddingModel = resolveEmbeddingModel();
+  const existingStore = await readEmbeddingStore(root);
+  const modelChanged = Boolean(existingStore && existingStore.model !== embeddingModel);
+  const toEmbed = new Set(changedSlugs.filter((slug) => liveSlugs.has(slug)));
+  const previousEntries = modelChanged ? [] : existingStore?.entries ?? [];
+  const previousChunks = modelChanged ? [] : existingStore?.chunks ?? [];
+  const isEmptyStore = isStoreEmpty(existingStore);
+  if (!existingStore || modelChanged || isEmptyStore && liveSlugs.size > 0) {
+    for (const record of records) toEmbed.add(record.slug);
   }
-  const scanned = [];
-  for (const file of files.filter((f) => f.endsWith(".md"))) {
-    const content = await safeReadFile(path11.join(dirPath, file));
-    const { meta } = parseFrontmatter(content);
-    scanned.push({ slug: file.replace(/\.md$/, ""), meta });
+  if (!shouldRunEmbedding(modelChanged, toEmbed, previousEntries, previousChunks, liveSlugs)) {
+    return;
   }
-  return scanned;
+  const freshEntries = await embedPages(records, toEmbed);
+  const mergedEntries = mergeEntries(previousEntries, freshEntries, liveSlugs);
+  const mergedChunks = await refreshChunkEmbeddings(records, previousChunks, modelChanged);
+  await persistRefreshedStore(root, embeddingModel, mergedEntries, mergedChunks);
 }
-async function collectPageSummaries(conceptsPath) {
-  const scanned = await scanWikiPages(conceptsPath);
-  return scanned.filter(({ meta }) => meta.title && typeof meta.title === "string" && !meta.orphaned).map(({ slug, meta }) => ({
-    title: meta.title,
-    slug,
-    summary: typeof meta.summary === "string" ? meta.summary : ""
-  }));
+async function persistRefreshedStore(root, embeddingModel, entries, chunks) {
+  const dimensions = entries[0]?.vector.length ?? chunks[0]?.vector.length ?? 0;
+  const store = {
+    version: STORE_VERSION,
+    model: embeddingModel,
+    dimensions,
+    entries,
+    chunks
+  };
+  await writeEmbeddingStore(root, store);
+  status(
+    "*",
+    dim(`Embeddings updated (${entries.length} pages, ${chunks.length} chunks).`)
+  );
 }
-function stripWikilinks(text) {
-  return text.replace(/\[\[([^\]]+)\]\]/g, "$1");
+function isStoreEmpty(store) {
+  if (!store) return false;
+  return store.entries.length === 0 && (!store.chunks || store.chunks.length === 0);
 }
-function buildIndexContent(concepts, queries) {
-  const lines = ["# Knowledge Wiki", "", "## Concepts", ""];
-  for (const page of concepts) {
-    lines.push(`- **[[${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
-  }
-  if (queries.length > 0) {
-    lines.push("", "## Saved Queries", "");
-    for (const page of queries) {
-      lines.push(`- **[[${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
-    }
-  }
-  const total = concepts.length + queries.length;
-  lines.push("");
-  lines.push(`_${total} pages | Generated ${(/* @__PURE__ */ new Date()).toISOString()}_`);
-  lines.push("");
-  return lines.join("\n");
+function shouldRunEmbedding(modelChanged, toEmbed, previousEntries, previousChunks, liveSlugs) {
+  if (modelChanged) return true;
+  if (toEmbed.size > 0) return true;
+  if (!previousEntries.every((e) => liveSlugs.has(e.slug))) return true;
+  if (!previousChunks.every((c) => liveSlugs.has(c.slug))) return true;
+  if (previousEntries.length > 0 && previousChunks.length === 0 && liveSlugs.size > 0) return true;
+  return false;
 }
-// src/compiler/obsidian.ts
-import { readdir as readdir4 } from "fs/promises";
-import path12 from "path";
-var ABBREVIATION_MIN_WORDS = 3;
-var SWAP_CONJUNCTIONS = [" and ", " or "];
-function addObsidianMeta(frontmatter, conceptTitle, tags) {
-  frontmatter.tags = tags;
-  frontmatter.aliases = generateAliases(conceptTitle);
+// src/compiler/candidates.ts
+import { readdir as readdir6, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
+import { existsSync as existsSync5 } from "fs";
+import path18 from "path";
+import { randomBytes } from "crypto";
+var ID_SUFFIX_BYTES = 4;
+var CANDIDATE_EXT = ".json";
+function buildCandidateId(slug) {
+  const suffix = randomBytes(ID_SUFFIX_BYTES).toString("hex");
+  return `${slug}-${suffix}`;
 }
-function generateAliases(title) {
-  const aliases = [];
-  const slug = slugify(title);
-  if (slug !== title) {
-    aliases.push(slug);
+function candidatePath(root, id) {
+  return path18.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
+}
+function archivePath(root, id) {
+  return path18.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
+}
+async function writeCandidate(root, draft) {
+  const candidate = {
+    id: buildCandidateId(draft.slug),
+    title: draft.title,
+    slug: draft.slug,
+    summary: draft.summary,
+    sources: draft.sources,
+    body: draft.body,
+    generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
+    ...draft.sourceStates ? { sourceStates: draft.sourceStates } : {},
+    ...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {}
+  };
+  await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
+  return candidate;
+}
+function failWithError(message) {
+  status("!", error(message));
+  process.exitCode = 1;
+  return null;
+}
+async function loadCandidateOrFail(root, id) {
+  const candidate = await readCandidate(root, id);
+  if (!candidate) return failWithError(`Candidate not found: ${id}`);
+  return candidate;
+}
+async function loadCandidateUnderLockOrFail(root, id) {
+  const candidate = await readCandidate(root, id);
+  if (!candidate) {
+    return failWithError(`Candidate ${id} was removed by another process during review.`);
   }
-  const swapAlias = generateSwapAlias(title);
-  if (swapAlias) {
-    aliases.push(swapAlias);
+  return candidate;
+}
+async function readCandidate(root, id) {
+  const raw = await safeReadFile(candidatePath(root, id));
+  if (!raw) return null;
+  try {
+    const parsed = JSON.parse(raw);
+    if (!isValidCandidate(parsed)) return null;
+    return parsed;
+  } catch {
+    return null;
   }
-  const abbreviation = generateAbbreviation(title);
-  if (abbreviation) {
-    aliases.push(abbreviation);
+}
+function isValidCandidate(value) {
+  if (!value || typeof value !== "object") return false;
+  const candidate = value;
+  return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
+}
+async function listCandidates(root) {
+  const dir = path18.join(root, CANDIDATES_DIR);
+  if (!existsSync5(dir)) return [];
+  const entries = await readdir6(dir, { withFileTypes: true });
+  const candidates = [];
+  for (const entry of entries) {
+    if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_EXT)) continue;
+    const id = entry.name.slice(0, -CANDIDATE_EXT.length);
+    const candidate = await readCandidate(root, id);
+    if (candidate) candidates.push(candidate);
   }
-  return aliases;
+  candidates.sort((a, b) => a.generatedAt.localeCompare(b.generatedAt));
+  return candidates;
 }
-function generateSwapAlias(title) {
-  for (const conjunction of SWAP_CONJUNCTIONS) {
-    const index = title.toLowerCase().indexOf(conjunction);
-    if (index === -1) continue;
-    const before = title.slice(0, index);
-    const after = title.slice(index + conjunction.length);
-    const originalConjunction = title.slice(index, index + conjunction.length);
-    return `${after}${originalConjunction}${before}`;
+async function countCandidates(root) {
+  const candidates = await listCandidates(root);
+  return candidates.length;
+}
+async function deleteCandidate(root, id) {
+  const filePath = candidatePath(root, id);
+  if (!existsSync5(filePath)) return false;
+  await unlink2(filePath);
+  return true;
+}
+async function archiveCandidate(root, id) {
+  const sourcePath = candidatePath(root, id);
+  if (!existsSync5(sourcePath)) return false;
+  const target = archivePath(root, id);
+  await mkdir5(path18.dirname(target), { recursive: true });
+  try {
+    await rename3(sourcePath, target);
+  } catch {
+    const raw = await safeReadFile(sourcePath);
+    await writeFile4(target, raw, "utf-8");
+    await unlink2(sourcePath);
   }
-  return null;
+  return true;
 }
-function generateAbbreviation(title) {
-  const words = title.split(/\s+/);
-  if (words.length < ABBREVIATION_MIN_WORDS) return null;
-  const abbreviation = words.map((w) => w[0].toUpperCase()).join("");
-  if (abbreviation === title) return null;
-  return abbreviation;
+// src/linter/rules.ts
+import { readdir as readdir7, readFile as readFile13 } from "fs/promises";
+import { existsSync as existsSync6 } from "fs";
+import path19 from "path";
+var MIN_BODY_LENGTH = 50;
+var WIKILINK_PATTERN2 = /\[\[([^\]]+)\]\]/g;
+var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
+function findMatchesInContent(content, pattern) {
+  const results = [];
+  const lines = content.split("\n");
+  for (let i = 0; i < lines.length; i++) {
+    const matches = lines[i].matchAll(pattern);
+    for (const match of matches) {
+      results.push({ captured: match[1], line: i + 1 });
+    }
+  }
+  return results;
 }
-async function generateMOC(root) {
-  const conceptsPath = path12.join(root, CONCEPTS_DIR);
-  const pages = await loadConceptPages(conceptsPath);
-  const tagGroups = groupPagesByTag(pages);
-  const content = buildMOCContent(tagGroups);
-  await atomicWrite(path12.join(root, MOC_FILE), content);
+async function readMarkdownFiles(dirPath) {
+  if (!existsSync6(dirPath)) return [];
+  const entries = await readdir7(dirPath);
+  const mdFiles = entries.filter((f) => f.endsWith(".md"));
+  const results = await Promise.all(
+    mdFiles.map(async (fileName) => {
+      const filePath = path19.join(dirPath, fileName);
+      const content = await readFile13(filePath, "utf-8");
+      return { filePath, content };
+    })
+  );
+  return results;
 }
-async function loadConceptPages(conceptsPath) {
-  let files;
-  try {
-    files = await readdir4(conceptsPath);
-  } catch {
-    return [];
-  }
-  const pages = [];
-  for (const file of files) {
-    if (!file.endsWith(".md")) continue;
-    const content = await safeReadFile(path12.join(conceptsPath, file));
-    if (!content) continue;
-    const { meta } = parseFrontmatter(content);
-    if (meta.orphaned) continue;
-    const title = typeof meta.title === "string" ? meta.title : file.replace(/\.md$/, "");
-    const tags = Array.isArray(meta.tags) ? meta.tags : [];
-    pages.push({ title, tags });
+async function collectAllPages(root) {
+  const conceptPages = await readMarkdownFiles(path19.join(root, CONCEPTS_DIR));
+  const queryPages = await readMarkdownFiles(path19.join(root, QUERIES_DIR));
+  return [...conceptPages, ...queryPages];
+}
+function buildPageSlugSet(pages) {
+  const slugs = /* @__PURE__ */ new Set();
+  for (const page of pages) {
+    const baseName = path19.basename(page.filePath, ".md");
+    slugs.add(baseName.toLowerCase());
   }
-  return pages;
+  return slugs;
 }
-function groupPagesByTag(pages) {
-  const groups = /* @__PURE__ */ new Map();
+async function checkBrokenWikilinks(root) {
+  const pages = await collectAllPages(root);
+  const existingSlugs = buildPageSlugSet(pages);
+  const results = [];
   for (const page of pages) {
-    if (page.tags.length === 0) {
-      appendToGroup(groups, "Uncategorized", page.title);
-      continue;
-    }
-    for (const tag of page.tags) {
-      appendToGroup(groups, tag, page.title);
+    for (const { captured, line } of findMatchesInContent(page.content, WIKILINK_PATTERN2)) {
+      const linkSlug = slugify(captured);
+      if (!existingSlugs.has(linkSlug)) {
+        results.push({
+          rule: "broken-wikilink",
+          severity: "error",
+          file: page.filePath,
+          message: `Broken wikilink [[${captured}]] \u2014 no matching page found`,
+          line
+        });
+      }
     }
   }
-  return groups;
+  return results;
 }
-function appendToGroup(groups, key, title) {
-  const existing = groups.get(key);
-  if (existing) {
-    existing.push(title);
-  } else {
-    groups.set(key, [title]);
+async function checkOrphanedPages(root) {
+  const pages = await collectAllPages(root);
+  const results = [];
+  for (const page of pages) {
+    const { meta } = parseFrontmatter(page.content);
+    if (meta.orphaned === true) {
+      results.push({
+        rule: "orphaned-page",
+        severity: "warning",
+        file: page.filePath,
+        message: `Page is marked as orphaned`
+      });
+    }
   }
+  return results;
 }
-function buildMOCContent(tagGroups) {
-  const lines = ["# Map of Content", ""];
-  const sortedTags = [...tagGroups.keys()].sort((a, b) => {
-    if (a === "Uncategorized") return 1;
-    if (b === "Uncategorized") return -1;
-    return a.localeCompare(b);
-  });
-  for (const tag of sortedTags) {
-    const titles = tagGroups.get(tag) ?? [];
-    lines.push(`## ${tag}`, "");
-    for (const title of titles.sort()) {
-      lines.push(`- [[${title}]]`);
+async function checkMissingSummaries(root) {
+  const pages = await collectAllPages(root);
+  const results = [];
+  for (const page of pages) {
+    const { meta } = parseFrontmatter(page.content);
+    const summary = meta.summary;
+    const isMissing = !summary || typeof summary === "string" && summary.trim() === "";
+    if (isMissing) {
+      results.push({
+        rule: "missing-summary",
+        severity: "warning",
+        file: page.filePath,
+        message: `Page has no summary in frontmatter`
+      });
     }
-    lines.push("");
   }
-  return lines.join("\n");
+  return results;
 }
-// src/utils/embeddings.ts
-import { readFile as readFile7, readdir as readdir5 } from "fs/promises";
-import { existsSync as existsSync3 } from "fs";
-import path13 from "path";
-function cosineSimilarity(a, b) {
-  if (a.length !== b.length || a.length === 0) return 0;
-  let dot = 0;
-  let magA = 0;
-  let magB = 0;
-  for (let i = 0; i < a.length; i++) {
-    dot += a[i] * b[i];
-    magA += a[i] * a[i];
-    magB += b[i] * b[i];
+async function checkDuplicateConcepts(root) {
+  const pages = await collectAllPages(root);
+  const titleMap = /* @__PURE__ */ new Map();
+  for (const page of pages) {
+    const { meta } = parseFrontmatter(page.content);
+    const title = typeof meta.title === "string" ? meta.title : "";
+    if (!title) continue;
+    const normalizedTitle = title.toLowerCase().trim();
+    const existing = titleMap.get(normalizedTitle) ?? [];
+    existing.push(page.filePath);
+    titleMap.set(normalizedTitle, existing);
   }
-  if (magA === 0 || magB === 0) return 0;
-  return dot / (Math.sqrt(magA) * Math.sqrt(magB));
-}
-function findTopK(queryVec, store, k) {
-  const scored = store.entries.map((entry) => ({
-    entry,
-    score: cosineSimilarity(queryVec, entry.vector)
-  }));
-  scored.sort((left, right) => right.score - left.score);
-  return scored.slice(0, k).map((item) => item.entry);
-}
-async function readEmbeddingStore(root) {
-  const filePath = path13.join(root, EMBEDDINGS_FILE);
-  if (!existsSync3(filePath)) return null;
-  const raw = await readFile7(filePath, "utf-8");
-  return JSON.parse(raw);
-}
-async function writeEmbeddingStore(root, store) {
-  const filePath = path13.join(root, EMBEDDINGS_FILE);
-  await atomicWrite(filePath, JSON.stringify(store, null, 2));
-}
-async function findRelevantPages(root, question) {
-  const store = await readEmbeddingStore(root);
-  if (!store || store.entries.length === 0) return [];
-  const activeModel = resolveEmbeddingModel();
-  if (store.model !== activeModel) {
-    warnStaleEmbeddingStore(store.model, activeModel);
-    return [];
+  const results = [];
+  for (const [title, files] of titleMap) {
+    if (files.length <= 1) continue;
+    for (const file of files) {
+      results.push({
+        rule: "duplicate-concept",
+        severity: "error",
+        file,
+        message: `Duplicate title "${title}" \u2014 also in ${files.filter((f) => f !== file).join(", ")}`
+      });
+    }
   }
-  const queryVec = await getProvider().embed(question);
-  return findTopK(queryVec, store, EMBEDDING_TOP_K).map((entry) => ({
-    slug: entry.slug,
-    title: entry.title,
-    summary: entry.summary
-  }));
+  return results;
 }
-async function collectPageRecords(root) {
-  const records = [];
-  for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
-    const absDir = path13.join(root, dir);
-    let files;
-    try {
-      files = await readdir5(absDir);
-    } catch {
-      continue;
-    }
-    for (const file of files.filter((f) => f.endsWith(".md"))) {
-      const content = await safeReadFile(path13.join(absDir, file));
-      const { meta } = parseFrontmatter(content);
-      if (meta.orphaned || typeof meta.title !== "string") continue;
-      records.push({
-        slug: file.replace(/\.md$/, ""),
-        title: meta.title,
-        summary: typeof meta.summary === "string" ? meta.summary : ""
+async function checkEmptyPages(root) {
+  const pages = await collectAllPages(root);
+  const results = [];
+  for (const page of pages) {
+    const { meta, body } = parseFrontmatter(page.content);
+    const hasTitle = typeof meta.title === "string" && meta.title.trim() !== "";
+    const isBodyEmpty = body.trim().length < MIN_BODY_LENGTH;
+    if (hasTitle && isBodyEmpty) {
+      results.push({
+        rule: "empty-page",
+        severity: "warning",
+        file: page.filePath,
+        message: `Page body is empty or too short (< ${MIN_BODY_LENGTH} chars)`
       });
     }
   }
-  return records;
+  return results;
 }
-function buildEmbeddingText(record) {
-  return record.summary ? `${record.title}
-${record.summary}` : record.title;
+function stripSpanSuffix(entry) {
+  const colonIdx = entry.indexOf(":");
+  const hashIdx = entry.indexOf("#");
+  const cuts = [colonIdx, hashIdx].filter((i) => i >= 0);
+  if (cuts.length === 0) return entry;
+  return entry.slice(0, Math.min(...cuts));
 }
-async function embedPages(records, slugsToEmbed) {
-  const provider = getProvider();
-  const now = (/* @__PURE__ */ new Date()).toISOString();
-  const fresh = [];
-  for (const record of records) {
-    if (!slugsToEmbed.has(record.slug)) continue;
-    const vector = await provider.embed(buildEmbeddingText(record));
-    fresh.push({
-      slug: record.slug,
-      title: record.title,
-      summary: record.summary,
-      vector,
-      updatedAt: now
+async function checkLowConfidencePages(root) {
+  const pages = await collectAllPages(root);
+  const results = [];
+  for (const page of pages) {
+    const { meta } = parseFrontmatter(page.content);
+    const { confidence } = parseProvenanceMetadata(meta);
+    if (confidence === void 0 || confidence >= LOW_CONFIDENCE_THRESHOLD) continue;
+    results.push({
+      rule: "low-confidence",
+      severity: "warning",
+      file: page.filePath,
+      message: `Page confidence ${confidence.toFixed(2)} is below ${LOW_CONFIDENCE_THRESHOLD}`
+    });
+  }
+  return results;
+}
+async function checkContradictedPages(root) {
+  const pages = await collectAllPages(root);
+  const results = [];
+  for (const page of pages) {
+    const { meta } = parseFrontmatter(page.content);
+    const { contradictedBy } = parseProvenanceMetadata(meta);
+    if (!contradictedBy || contradictedBy.length === 0) continue;
+    const slugs = contradictedBy.map((r) => r.slug).join(", ");
+    results.push({
+      rule: "contradicted-page",
+      severity: "warning",
+      file: page.filePath,
+      message: `Page contradicts: ${slugs}`
+    });
+  }
+  return results;
+}
+async function checkInferredWithoutCitations(root) {
+  const pages = await collectAllPages(root);
+  const results = [];
+  for (const page of pages) {
+    const { meta, body } = parseFrontmatter(page.content);
+    const provenance = parseProvenanceMetadata(meta);
+    const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
+    if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
+    results.push({
+      rule: "excess-inferred-paragraphs",
+      severity: "warning",
+      file: page.filePath,
+      message: `Page has ${inferred} inferred paragraphs without citations (max ${MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS})`
     });
   }
-  return fresh;
-}
-var warnedStaleModels = /* @__PURE__ */ new Set();
-function warnStaleEmbeddingStore(storedModel, activeModel) {
-  const key = `${storedModel}\u2192${activeModel}`;
-  if (warnedStaleModels.has(key)) return;
-  warnedStaleModels.add(key);
-  status(
-    "!",
-    warn(
-      `Embedding store was built with "${storedModel}" but active embedding model is "${activeModel}". Falling back to full-index selection. Run 'llmwiki compile' to rebuild embeddings.`
-    )
-  );
+  return results;
 }
-function resolveEmbeddingModel() {
-  const providerName = getActiveProviderName();
-  const configuredModel = process.env.LLMWIKI_EMBEDDING_MODEL?.trim();
-  if (configuredModel && (providerName === "openai" || providerName === "ollama")) {
-    return configuredModel;
+var PROSE_PARAGRAPH_LEAD = /^[A-Za-z]/;
+function countUncitedProseParagraphs(body) {
+  const paragraphs = body.split(/\n\s*\n/);
+  let count = 0;
+  for (const block of paragraphs) {
+    const trimmed = block.trim();
+    if (trimmed.length === 0) continue;
+    if (!PROSE_PARAGRAPH_LEAD.test(trimmed)) continue;
+    if (CITATION_PATTERN.test(trimmed)) {
+      CITATION_PATTERN.lastIndex = 0;
+      continue;
+    }
+    CITATION_PATTERN.lastIndex = 0;
+    count += 1;
   }
-  return EMBEDDING_MODELS[providerName] ?? EMBEDDING_MODELS.anthropic;
+  return count;
 }
-function mergeEntries(existing, fresh, liveSlugs) {
-  const bySlug = /* @__PURE__ */ new Map();
-  for (const entry of existing) {
-    if (liveSlugs.has(entry.slug)) bySlug.set(entry.slug, entry);
-  }
-  for (const entry of fresh) {
-    bySlug.set(entry.slug, entry);
+var COLON_SPAN_PATTERN = /^[^:#]+:(\d+)(?:-(\d+))?$/;
+var HASH_SPAN_PATTERN = /^[^:#]+#L(\d+)(?:-L(\d+))?$/;
+async function checkSchemaCrossLinks(root, schema) {
+  const pages = await collectAllPages(root);
+  const results = [];
+  for (const page of pages) {
+    const { meta, body } = parseFrontmatter(page.content);
+    const kind = resolvePageKind(meta.kind, schema);
+    const rule = schema.kinds[kind];
+    if (rule.minWikilinks <= 0) continue;
+    const linkCount = countWikilinks(body);
+    if (linkCount >= rule.minWikilinks) continue;
+    results.push({
+      rule: "schema-cross-link-minimum",
+      severity: "warning",
+      file: page.filePath,
+      message: `Page kind "${kind}" requires at least ${rule.minWikilinks} [[wikilinks]] but only ${linkCount} found.`
+    });
   }
-  return Array.from(bySlug.values());
+  return results;
 }
-async function updateEmbeddings(root, changedSlugs) {
-  const records = await collectPageRecords(root);
-  const liveSlugs = new Set(records.map((r) => r.slug));
-  const embeddingModel = resolveEmbeddingModel();
-  const existingStore = await readEmbeddingStore(root);
-  const modelChanged = Boolean(existingStore && existingStore.model !== embeddingModel);
-  const toEmbed = new Set(changedSlugs.filter((slug) => liveSlugs.has(slug)));
-  const previousEntries = modelChanged ? [] : existingStore?.entries ?? [];
-  if (!existingStore || modelChanged) {
-    for (const record of records) toEmbed.add(record.slug);
+function checkPageCrossLinks(content, filePath, schema) {
+  const { meta, body } = parseFrontmatter(content);
+  const kind = resolvePageKind(meta.kind, schema);
+  const rule = schema.kinds[kind];
+  if (rule.minWikilinks <= 0) return [];
+  const linkCount = countWikilinks(body);
+  if (linkCount >= rule.minWikilinks) return [];
+  return [
+    {
+      rule: "schema-cross-link-minimum",
+      severity: "warning",
+      file: filePath,
+      message: `Page kind "${kind}" requires at least ${rule.minWikilinks} [[wikilinks]] but only ${linkCount} found.`
+    }
+  ];
+}
+function parseLineRange(entry) {
+  const colonMatch = COLON_SPAN_PATTERN.exec(entry);
+  if (colonMatch) {
+    const start = Number(colonMatch[1]);
+    const end = colonMatch[2] !== void 0 ? Number(colonMatch[2]) : start;
+    return { start, end };
   }
-  if (!modelChanged && toEmbed.size === 0 && previousEntries.every((e) => liveSlugs.has(e.slug))) {
-    return;
+  const hashMatch = HASH_SPAN_PATTERN.exec(entry);
+  if (hashMatch) {
+    const start = Number(hashMatch[1]);
+    const end = hashMatch[2] !== void 0 ? Number(hashMatch[2]) : start;
+    return { start, end };
   }
-  const freshEntries = await embedPages(records, toEmbed);
-  const mergedEntries = mergeEntries(previousEntries, freshEntries, liveSlugs);
-  const dimensions = mergedEntries[0]?.vector.length ?? 0;
-  const store = {
-    version: 1,
-    model: embeddingModel,
-    dimensions,
-    entries: mergedEntries
-  };
-  await writeEmbeddingStore(root, store);
-  status("*", dim(`Embeddings updated (${mergedEntries.length} pages).`));
-}
-// src/compiler/candidates.ts
-import { readdir as readdir6, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
-import { existsSync as existsSync4 } from "fs";
-import path14 from "path";
-import { randomBytes } from "crypto";
-var ID_SUFFIX_BYTES = 4;
-var CANDIDATE_EXT = ".json";
-function buildCandidateId(slug) {
-  const suffix = randomBytes(ID_SUFFIX_BYTES).toString("hex");
-  return `${slug}-${suffix}`;
-}
-function candidatePath(root, id) {
-  return path14.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
-}
-function archivePath(root, id) {
-  return path14.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
-}
-async function writeCandidate(root, draft) {
-  const candidate = {
-    id: buildCandidateId(draft.slug),
-    title: draft.title,
-    slug: draft.slug,
-    summary: draft.summary,
-    sources: draft.sources,
-    body: draft.body,
-    generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
-    ...draft.sourceStates ? { sourceStates: draft.sourceStates } : {}
-  };
-  await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
-  return candidate;
-}
-function failWithError(message) {
-  status("!", error(message));
-  process.exitCode = 1;
   return null;
 }
-async function loadCandidateOrFail(root, id) {
-  const candidate = await readCandidate(root, id);
-  if (!candidate) return failWithError(`Candidate not found: ${id}`);
-  return candidate;
-}
-async function loadCandidateUnderLockOrFail(root, id) {
-  const candidate = await readCandidate(root, id);
-  if (!candidate) {
-    return failWithError(`Candidate ${id} was removed by another process during review.`);
-  }
-  return candidate;
+function countLines(content) {
+  if (content.length === 0) return 0;
+  return content.split("\n").length;
 }
-async function readCandidate(root, id) {
-  const raw = await safeReadFile(candidatePath(root, id));
-  if (!raw) return null;
-  try {
-    const parsed = JSON.parse(raw);
-    if (!isValidCandidate(parsed)) return null;
-    return parsed;
-  } catch {
-    return null;
+async function checkBrokenCitations(root) {
+  const pages = await collectAllPages(root);
+  const sourcesDir = path19.join(root, SOURCES_DIR);
+  const results = [];
+  const lineCountCache = /* @__PURE__ */ new Map();
+  for (const page of pages) {
+    for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
+      await collectBrokenForMarker(captured, line, page.filePath, sourcesDir, lineCountCache, results);
+    }
   }
+  return results;
 }
-function isValidCandidate(value) {
-  if (!value || typeof value !== "object") return false;
-  const candidate = value;
-  return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
-}
-async function listCandidates(root) {
-  const dir = path14.join(root, CANDIDATES_DIR);
-  if (!existsSync4(dir)) return [];
-  const entries = await readdir6(dir, { withFileTypes: true });
-  const candidates = [];
-  for (const entry of entries) {
-    if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_EXT)) continue;
-    const id = entry.name.slice(0, -CANDIDATE_EXT.length);
-    const candidate = await readCandidate(root, id);
-    if (candidate) candidates.push(candidate);
+async function collectBrokenForMarker(captured, line, pageFile, sourcesDir, lineCountCache, out) {
+  for (const part of captured.split(",")) {
+    const trimmed = part.trim();
+    if (trimmed.length === 0) continue;
+    const filename = stripSpanSuffix(trimmed);
+    const citedPath = path19.join(sourcesDir, filename);
+    if (!existsSync6(citedPath)) {
+      out.push({
+        rule: "broken-citation",
+        severity: "error",
+        file: pageFile,
+        message: `Broken citation ^[${filename}] \u2014 source file not found`,
+        line
+      });
+      continue;
+    }
+    const range = parseLineRange(trimmed);
+    if (range === null) continue;
+    const lineCount = await resolveLineCount(citedPath, filename, lineCountCache);
+    if (range.end <= lineCount) continue;
+    out.push({
+      rule: "broken-citation",
+      severity: "error",
+      file: pageFile,
+      message: `Claim-level span ^[${trimmed}] is out of bounds (source has only ${lineCount} lines)`,
+      line
+    });
   }
-  candidates.sort((a, b) => a.generatedAt.localeCompare(b.generatedAt));
-  return candidates;
-}
-async function countCandidates(root) {
-  const candidates = await listCandidates(root);
-  return candidates.length;
 }
-async function deleteCandidate(root, id) {
-  const filePath = candidatePath(root, id);
-  if (!existsSync4(filePath)) return false;
-  await unlink2(filePath);
-  return true;
+async function resolveLineCount(citedPath, filename, cache) {
+  const cached = cache.get(filename);
+  if (cached !== void 0) return cached;
+  const content = await safeReadFile(citedPath);
+  const lineCount = countLines(content);
+  cache.set(filename, lineCount);
+  return lineCount;
 }
-async function archiveCandidate(root, id) {
-  const sourcePath = candidatePath(root, id);
-  if (!existsSync4(sourcePath)) return false;
-  const target = archivePath(root, id);
-  await mkdir5(path14.dirname(target), { recursive: true });
-  try {
-    await rename3(sourcePath, target);
-  } catch {
-    const raw = await safeReadFile(sourcePath);
-    await writeFile4(target, raw, "utf-8");
-    await unlink2(sourcePath);
+async function checkMalformedClaimCitations(root) {
+  const pages = await collectAllPages(root);
+  const results = [];
+  for (const page of pages) {
+    for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
+      for (const part of captured.split(",")) {
+        if (!isMalformedCitationEntry(part)) continue;
+        results.push({
+          rule: "malformed-claim-citation",
+          severity: "error",
+          file: page.filePath,
+          message: `Malformed claim citation ^[${captured}] \u2014 expected file.md, file.md:N-N, or file.md#LN-LN`,
+          line
+        });
+      }
+    }
   }
-  return true;
+  return results;
 }
 // src/compiler/page-renderer.ts
-import { readdir as readdir7 } from "fs/promises";
-import path15 from "path";
+import { readdir as readdir8 } from "fs/promises";
+import path20 from "path";
 // src/compiler/provenance.ts
 function addProvenanceMeta(fields, concept) {
@@ -1873,8 +2917,8 @@ function reportContradictionWarnings(conceptTitle, concept) {
 // src/compiler/page-renderer.ts
 var RELATED_PAGE_CONTEXT_LIMIT = 5;
-async function renderMergedPageContent(root, entry) {
-  const pagePath = path15.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
+async function renderMergedPageContent(root, entry, schema) {
+  const pagePath = path20.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
   const existingPage = await safeReadFile(pagePath);
   const relatedPages = await loadRelatedPages(root, entry.slug);
   const system = buildPagePrompt(
@@ -1889,14 +2933,14 @@ async function renderMergedPageContent(root, entry) {
       { role: "user", content: `Write the wiki page for "${entry.concept.concept}".` }
     ]
   });
-  const frontmatter = buildMergedFrontmatter(entry, existingPage);
+  const frontmatter = buildMergedFrontmatter(entry, existingPage, schema);
   reportContradictionWarnings(entry.concept.concept, entry.concept);
   return `${frontmatter}
 ${pageBody}
 `;
 }
-function buildMergedFrontmatter(entry, existingPage) {
+function buildMergedFrontmatter(entry, existingPage, schema) {
   const now = (/* @__PURE__ */ new Date()).toISOString();
   const existing = existingPage ? parseFrontmatter(existingPage) : null;
   const createdAt = existing?.meta.createdAt && typeof existing.meta.createdAt === "string" ? existing.meta.createdAt : now;
@@ -1904,6 +2948,7 @@ function buildMergedFrontmatter(entry, existingPage) {
     title: entry.concept.concept,
     summary: entry.concept.summary,
     sources: entry.sourceFiles,
+    kind: schema.defaultKind,
     createdAt,
     updatedAt: now
   };
@@ -1912,17 +2957,17 @@ function buildMergedFrontmatter(entry, existingPage) {
   return buildFrontmatter(frontmatterFields);
 }
 async function loadRelatedPages(root, excludeSlug) {
-  const conceptsPath = path15.join(root, CONCEPTS_DIR);
+  const conceptsPath = path20.join(root, CONCEPTS_DIR);
   let files;
   try {
-    files = await readdir7(conceptsPath);
+    files = await readdir8(conceptsPath);
   } catch {
     return "";
   }
   const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
   const contents = [];
   for (const f of related) {
-    const content = await safeReadFile(path15.join(conceptsPath, f));
+    const content = await safeReadFile(path20.join(conceptsPath, f));
     if (!content) continue;
     const { meta } = parseFrontmatter(content);
     if (meta.orphaned) continue;
@@ -1962,7 +3007,7 @@ function bucketChanges(changes) {
     unchanged: changes.filter((c) => c.status === "unchanged")
   };
 }
-async function generatePagesPhase(root, extractions, frozenSlugs, options) {
+async function generatePagesPhase(root, extractions, frozenSlugs, schema, options) {
   const merged = mergeExtractions(extractions, frozenSlugs);
   const sourceStates = options.review ? await buildExtractionSourceStates(root, extractions) : {};
   const limit = pLimit(COMPILE_CONCURRENCY);
@@ -1970,7 +3015,7 @@ async function generatePagesPhase(root, extractions, frozenSlugs, options) {
   const candidates = [];
   const pages = await Promise.all(
     merged.map((entry) => limit(async () => {
-      const result = await generateMergedPage(root, entry, options, sourceStates);
+      const result = await generateMergedPage(root, entry, schema, options, sourceStates);
       if (result.error) errors.push(result.error);
       if (result.candidateId) candidates.push(result.candidateId);
       return entry;
@@ -2016,12 +3061,24 @@ function summarizeCompile(buckets, generation, extractions, options) {
   return baseResult;
 }
 async function runCompilePipeline(root, options) {
+  const schema = await loadSchema(root);
+  reportSchemaStatus(schema);
   const state = await readState(root);
   const changes = await detectChanges(root, state);
   augmentWithAffectedSources(changes, findAffectedSources(state, changes));
   const buckets = bucketChanges(changes);
   if (buckets.toCompile.length === 0 && buckets.deleted.length === 0) {
     status("\u2713", success("Nothing to compile \u2014 all sources up to date."));
+    if (!options.review) {
+      const emptyGeneration = { pages: [], errors: [], candidates: [] };
+      await generateSeedPages(root, schema, emptyGeneration);
+      await finalizeWiki(root, emptyGeneration.pages);
+      return {
+        ...emptyCompileResult(),
+        skipped: buckets.unchanged.length,
+        errors: emptyGeneration.errors
+      };
+    }
     return { ...emptyCompileResult(), skipped: buckets.unchanged.length };
   }
   printChangesSummary(changes);
@@ -2034,17 +3091,23 @@ async function runCompilePipeline(root, options) {
   if (!options.review) {
     await freezeFailedExtractions(root, extractions, frozenSlugs);
   }
-  const generation = await generatePagesPhase(root, extractions, frozenSlugs, options);
+  const generation = await generatePagesPhase(root, extractions, frozenSlugs, schema, options);
   if (!options.review) {
     await persistExtractionStates(root, extractions);
     if (frozenSlugs.size > 0) {
       await orphanUnownedFrozenPages(root, frozenSlugs);
     }
     await persistFrozenSlugs(root, frozenSlugs, extractions);
+    await generateSeedPages(root, schema, generation);
     await finalizeWiki(root, generation.pages);
   }
   return summarizeCompile(buckets, generation, extractions, options);
 }
+function reportSchemaStatus(schema) {
+  if (schema.loadedFrom) {
+    status("i", dim(`Schema: ${schema.loadedFrom}`));
+  }
+}
 function augmentWithAffectedSources(changes, affected) {
   for (const file of affected) {
     status("~", info(`${file} [affected by shared concept]`));
@@ -2105,9 +3168,9 @@ function printChangesSummary(changes) {
 }
 async function extractForSource(root, sourceFile) {
   status("*", info(`Extracting: ${sourceFile}`));
-  const sourcePath = path16.join(root, SOURCES_DIR, sourceFile);
-  const sourceContent = await readFile8(sourcePath, "utf-8");
-  const existingIndex = await safeReadFile(path16.join(root, INDEX_FILE));
+  const sourcePath = path21.join(root, SOURCES_DIR, sourceFile);
+  const sourceContent = await readFile14(sourcePath, "utf-8");
+  const existingIndex = await safeReadFile(path21.join(root, INDEX_FILE));
   const concepts = await extractConcepts(sourceContent, existingIndex);
   if (concepts.length > 0) {
     const names = concepts.map((c) => c.concept).join(", ");
@@ -2165,27 +3228,77 @@ ${result.sourceContent}`
   }
   return Array.from(bySlug.values());
 }
-async function generateMergedPage(root, entry, options, sourceStates) {
-  const fullPage = await renderMergedPageContent(root, entry);
+async function generateMergedPage(root, entry, schema, options, sourceStates) {
+  const fullPage = await renderMergedPageContent(root, entry, schema);
   if (options.review) {
-    return await persistReviewCandidate(root, entry, fullPage, sourceStates);
+    return await persistReviewCandidate(root, entry, fullPage, sourceStates, schema);
   }
-  const pagePath = path16.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
+  const pagePath = path21.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
   const error2 = await writePageIfValid(pagePath, fullPage, entry.concept.concept);
   return { error: error2 ?? void 0 };
 }
-async function persistReviewCandidate(root, entry, fullPage, sourceStates) {
+async function persistReviewCandidate(root, entry, fullPage, sourceStates, schema) {
+  const virtualPath = `wiki/concepts/${entry.slug}.md`;
+  const violations = checkPageCrossLinks(fullPage, virtualPath, schema);
   const candidate = await writeCandidate(root, {
     title: entry.concept.concept,
     slug: entry.slug,
     summary: entry.concept.summary,
     sources: entry.sourceFiles,
     body: fullPage,
-    sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles)
+    sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles),
+    schemaViolations: violations.length > 0 ? violations : void 0
   });
   status("?", info(`Candidate ready: ${candidate.id} (${entry.slug})`));
   return { candidateId: candidate.id };
 }
+async function generateSeedPages(root, schema, generation) {
+  if (schema.seedPages.length === 0) return;
+  for (const seed of schema.seedPages) {
+    const error2 = await generateSingleSeedPage(root, schema, seed);
+    if (error2) generation.errors.push(error2);
+  }
+}
+async function generateSingleSeedPage(root, schema, seed) {
+  const slug = slugify(seed.title);
+  const pagePath = path21.join(root, CONCEPTS_DIR, `${slug}.md`);
+  const relatedContent = await loadSeedRelatedPages(root, seed.relatedSlugs ?? []);
+  const rule = schema.kinds[seed.kind];
+  const system = buildSeedPagePrompt(seed, rule, relatedContent);
+  const pageBody = await callClaude({
+    system,
+    messages: [{ role: "user", content: `Write the ${seed.kind} page titled "${seed.title}".` }]
+  });
+  const now = (/* @__PURE__ */ new Date()).toISOString();
+  const existing = await safeReadFile(pagePath);
+  const existingMeta = existing ? parseFrontmatter(existing).meta : null;
+  const createdAt = typeof existingMeta?.createdAt === "string" ? existingMeta.createdAt : now;
+  const typedFields = {
+    title: seed.title,
+    summary: seed.summary,
+    sources: [],
+    kind: seed.kind,
+    createdAt,
+    updatedAt: now
+  };
+  const frontmatterFields = { ...typedFields };
+  addObsidianMeta(frontmatterFields, seed.title, []);
+  const frontmatter = buildFrontmatter(frontmatterFields);
+  return await writePageIfValid(pagePath, `${frontmatter}
+${pageBody}
+`, seed.title);
+}
+async function loadSeedRelatedPages(root, slugs) {
+  if (slugs.length === 0) return "";
+  const contents = [];
+  for (const slug of slugs) {
+    const pagePath = path21.join(root, CONCEPTS_DIR, `${slug}.md`);
+    const content = await safeReadFile(pagePath);
+    if (content) contents.push(content);
+  }
+  return contents.join("\n\n---\n\n");
+}
 async function extractConcepts(sourceContent, existingIndex) {
   const system = buildExtractionPrompt(sourceContent, existingIndex);
   const rawOutput = await callClaude({
@@ -2223,7 +3336,7 @@ async function persistSourceState(root, sourcePath, sourceFile, concepts) {
 // src/commands/compile.ts
 async function compileCommand(options = {}) {
-  if (!existsSync5(SOURCES_DIR)) {
+  if (!existsSync7(SOURCES_DIR)) {
     status(
       "!",
       warn("No sources found. Run `llmwiki ingest <url>` first.")
@@ -2234,8 +3347,8 @@ async function compileCommand(options = {}) {
 }
 // src/commands/query.ts
-import { existsSync as existsSync6 } from "fs";
-import path17 from "path";
+import { existsSync as existsSync8 } from "fs";
+import path22 from "path";
 var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
 var PAGE_SELECTION_TOOL = {
   name: "select_pages",
@@ -2283,16 +3396,92 @@ ${indexContent}`;
 function buildFilteredIndex(candidates) {
   return candidates.map((entry) => `- **${entry.slug}**: ${entry.title} \u2014 ${entry.summary}`).join("\n");
 }
-async function selectRelevantPages(root, question) {
+async function selectRelevantPages(root, question, debug) {
+  const chunkSelection = await trySelectViaChunks(root, question, debug);
+  if (chunkSelection) return chunkSelection;
   const candidates = await tryFindRelevantPages(root, question);
   if (candidates.length > 0) {
     const filteredIndex = buildFilteredIndex(candidates);
     const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
-    return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2 };
+    return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2, chunks: [] };
   }
-  const indexContent = await safeReadFile(path17.join(root, INDEX_FILE));
+  const indexContent = await safeReadFile(path22.join(root, INDEX_FILE));
   const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
-  return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning };
+  return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning, chunks: [] };
+}
+async function trySelectViaChunks(root, question, debug) {
+  const ranked = await tryFindRelevantChunks(root, question);
+  if (ranked.length === 0) return null;
+  const reranked = rerankWithBm25(
+    question,
+    ranked.map(({ chunk, score }) => ({ text: chunk.text, baseScore: score, chunk }))
+  );
+  const kept = reranked.slice(0, CHUNK_RERANK_KEEP);
+  const reorderingHappened = wasReordered(ranked, kept.map((k) => k.candidate.chunk));
+  const chunkCitations = toChunkCitations(kept);
+  const pageSlugs = collapseToPages(chunkCitations, QUERY_PAGE_LIMIT);
+  const reasoning = buildChunkReasoning(chunkCitations, pageSlugs);
+  return {
+    pages: pageSlugs,
+    rawPages: pageSlugs,
+    reasoning,
+    chunks: chunkCitations,
+    debug: debug ? buildDebug(chunkCitations, pageSlugs, reorderingHappened) : void 0
+  };
+}
+function wasReordered(before, after) {
+  const limit = Math.min(before.length, after.length);
+  for (let i = 0; i < limit; i++) {
+    if (before[i].chunk !== after[i]) return true;
+  }
+  return false;
+}
+function toChunkCitations(ranked) {
+  return ranked.map(({ candidate, score }) => ({
+    slug: candidate.chunk.slug,
+    title: candidate.chunk.title,
+    chunkIndex: candidate.chunk.chunkIndex,
+    score,
+    text: candidate.chunk.text
+  }));
+}
+function collapseToPages(chunks, limit) {
+  const slugs = [];
+  const seen = /* @__PURE__ */ new Set();
+  for (const chunk of chunks) {
+    if (seen.has(chunk.slug)) continue;
+    seen.add(chunk.slug);
+    slugs.push(chunk.slug);
+    if (slugs.length >= limit) break;
+  }
+  return slugs;
+}
+function buildChunkReasoning(chunks, pages) {
+  const top = chunks.slice(0, pages.length);
+  const summary = top.map((c) => `${c.slug}#${c.chunkIndex} (${c.score.toFixed(3)})`).join(", ");
+  return `Selected ${pages.length} page(s) from ${chunks.length} reranked chunks: ${summary}`;
+}
+function buildDebug(chunks, pageSlugs, reranked) {
+  const bestPerPage = /* @__PURE__ */ new Map();
+  for (const c of chunks) {
+    const prev = bestPerPage.get(c.slug);
+    if (prev === void 0 || c.score > prev) bestPerPage.set(c.slug, c.score);
+  }
+  return {
+    pages: pageSlugs.map((slug) => ({ slug, score: bestPerPage.get(slug) ?? 0 })),
+    chunks,
+    usedChunks: true,
+    reranked
+  };
+}
+async function tryFindRelevantChunks(root, question) {
+  try {
+    return await findRelevantChunks(root, question, CHUNK_TOP_K);
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    status("!", dim(`Chunk pre-filter unavailable (${message}); falling back.`));
+    return [];
+  }
 }
 async function tryFindRelevantPages(root, question) {
   try {
@@ -2308,7 +3497,7 @@ async function loadSelectedPages(root, slugs) {
   for (const slug of slugs) {
     let content = "";
     for (const dir of PAGE_DIRS) {
-      const candidate = await safeReadFile(path17.join(root, dir, `${slug}.md`));
+      const candidate = await safeReadFile(path22.join(root, dir, `${slug}.md`));
       if (!candidate) continue;
       const { meta } = parseFrontmatter(candidate);
       if (meta.orphaned) continue;
@@ -2325,11 +3514,12 @@ ${content}`);
   return sections.join("\n\n");
 }
 var ANSWER_SYSTEM_PROMPT = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
-async function callAnswerLLM(question, pagesContent, onToken) {
+async function callAnswerLLM(question, pagesContent, chunks, onToken) {
+  const provenance = chunks.length > 0 ? buildChunkProvenance(chunks) : "";
   const userMessage = `Question: ${question}
 Relevant wiki pages:
-${pagesContent}`;
+${pagesContent}${provenance}`;
   return callClaude({
     system: ANSWER_SYSTEM_PROMPT,
     messages: [{ role: "user", content: userMessage }],
@@ -2337,6 +3527,16 @@ ${pagesContent}`;
     onToken
   });
 }
+function buildChunkProvenance(chunks) {
+  const sections = chunks.map(
+    (chunk) => `--- ${chunk.slug} (chunk ${chunk.chunkIndex}) ---
+${chunk.text}`
+  );
+  return `
+Most relevant excerpts (from chunk-level retrieval):
+${sections.join("\n\n")}`;
+}
 function summarizeAnswer(answer) {
   const firstLine = answer.trim().split(/\n/)[0] ?? "";
   const firstSentence = firstLine.split(/(?<=[.!?])\s/)[0] ?? firstLine;
@@ -2344,7 +3544,7 @@ function summarizeAnswer(answer) {
 }
 async function saveQueryPage(root, question, answer) {
   const slug = slugify(question);
-  const filePath = path17.join(root, QUERIES_DIR, `${slug}.md`);
+  const filePath = path22.join(root, QUERIES_DIR, `${slug}.md`);
   const frontmatter = buildFrontmatter({
     title: question,
     summary: summarizeAnswer(answer),
@@ -2370,30 +3570,42 @@ ${answer}
   return slug;
 }
 async function generateAnswer(root, question, options = {}) {
-  if (!existsSync6(path17.join(root, INDEX_FILE))) {
+  if (!existsSync8(path22.join(root, INDEX_FILE))) {
     throw new Error("Wiki index not found. Run `llmwiki compile` first.");
   }
-  const { pages, reasoning } = await selectRelevantPages(root, question);
-  options.onPageSelection?.(pages, reasoning);
-  const pagesContent = await loadSelectedPages(root, pages);
+  const selection = await selectRelevantPages(root, question, Boolean(options.debug));
+  options.onPageSelection?.(selection.pages, selection.reasoning);
+  const pagesContent = await loadSelectedPages(root, selection.pages);
   if (!pagesContent) {
-    return { answer: "", selectedPages: pages, reasoning };
-  }
-  const answer = await callAnswerLLM(question, pagesContent, options.onToken);
-  let saved;
-  if (options.save) {
-    saved = await saveQueryPage(root, question, answer);
+    return buildEmptyResult(selection);
   }
-  return { answer, selectedPages: pages, reasoning, saved };
+  const answer = await callAnswerLLM(question, pagesContent, selection.chunks, options.onToken);
+  const saved = options.save ? await saveQueryPage(root, question, answer) : void 0;
+  return {
+    answer,
+    selectedPages: selection.pages,
+    reasoning: selection.reasoning,
+    saved,
+    debug: selection.debug
+  };
+}
+function buildEmptyResult(selection) {
+  return {
+    answer: "",
+    selectedPages: selection.pages,
+    reasoning: selection.reasoning,
+    debug: selection.debug
+  };
 }
 async function queryCommand(root, question, options) {
-  if (!existsSync6(path17.join(root, INDEX_FILE))) {
+  if (!existsSync8(path22.join(root, INDEX_FILE))) {
     status("!", error("Wiki index not found. Run `llmwiki compile` first."));
     return;
   }
   header("Selecting relevant pages");
   const result = await generateAnswer(root, question, {
     save: options.save,
+    debug: options.debug,
     onToken: (text) => process.stdout.write(text),
     onPageSelection: (pages, reasoning) => {
       status("i", dim(`Reasoning: ${reasoning}`));
@@ -2402,6 +3614,7 @@ async function queryCommand(root, question, options) {
     }
   });
   process.stdout.write("\n");
+  if (result.debug) printDebugSnapshot(result.debug);
   if (!result.answer) {
     status("!", error("No matching pages found. Try refining your question."));
     return;
@@ -2412,15 +3625,35 @@ async function queryCommand(root, question, options) {
     status("\u2192", dim("Tip: use --save to add this answer to your wiki"));
   }
 }
+function printDebugSnapshot(debug) {
+  header("Retrieval debug");
+  status(
+    "i",
+    dim(
+      `Source: ${debug.usedChunks ? "chunk-level" : "page-level"}; reranked: ${debug.reranked ? "yes" : "no"}`
+    )
+  );
+  for (const page of debug.pages) {
+    status("\u2022", `${page.slug} (best chunk score ${page.score.toFixed(3)})`);
+  }
+  for (const chunk of debug.chunks) {
+    const preview = chunk.text.slice(0, DEBUG_CHUNK_PREVIEW_CHARS).replace(/\s+/g, " ").trim();
+    status(
+      "\xB7",
+      dim(`${chunk.slug}#${chunk.chunkIndex} score=${chunk.score.toFixed(3)} :: ${preview}\u2026`)
+    );
+  }
+}
+var DEBUG_CHUNK_PREVIEW_CHARS = 120;
 // src/commands/watch.ts
 import { watch as chokidarWatch } from "chokidar";
-import { existsSync as existsSync7 } from "fs";
-import path18 from "path";
+import { existsSync as existsSync9 } from "fs";
+import path23 from "path";
 var DEBOUNCE_MS = 500;
 async function watchCommand() {
-  const sourcesPath = path18.resolve(SOURCES_DIR);
-  if (!existsSync7(sourcesPath)) {
+  const sourcesPath = path23.resolve(SOURCES_DIR);
+  if (!existsSync9(sourcesPath)) {
     status(
       "!",
       warn("No sources/ directory found. Run `llmwiki ingest <url>` first.")
@@ -2454,7 +3687,7 @@ async function watchCommand() {
   const scheduleCompile = (eventPath, event) => {
     status(
       "~",
-      dim(`${event}: ${path18.basename(eventPath)}`)
+      dim(`${event}: ${path23.basename(eventPath)}`)
     );
     if (debounceTimer) clearTimeout(debounceTimer);
     debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
@@ -2468,261 +3701,30 @@ async function watchCommand() {
   });
 }
-// src/linter/rules.ts
-import { readdir as readdir8, readFile as readFile9 } from "fs/promises";
-import { existsSync as existsSync8 } from "fs";
-import path19 from "path";
-var MIN_BODY_LENGTH = 50;
-var WIKILINK_PATTERN = /\[\[([^\]]+)\]\]/g;
-var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
-function findMatchesInContent(content, pattern) {
-  const results = [];
-  const lines = content.split("\n");
-  for (let i = 0; i < lines.length; i++) {
-    const matches = lines[i].matchAll(pattern);
-    for (const match of matches) {
-      results.push({ captured: match[1], line: i + 1 });
-    }
-  }
-  return results;
-}
-async function readMarkdownFiles(dirPath) {
-  if (!existsSync8(dirPath)) return [];
-  const entries = await readdir8(dirPath);
-  const mdFiles = entries.filter((f) => f.endsWith(".md"));
-  const results = await Promise.all(
-    mdFiles.map(async (fileName) => {
-      const filePath = path19.join(dirPath, fileName);
-      const content = await readFile9(filePath, "utf-8");
-      return { filePath, content };
-    })
-  );
-  return results;
-}
-async function collectAllPages(root) {
-  const conceptPages = await readMarkdownFiles(path19.join(root, CONCEPTS_DIR));
-  const queryPages = await readMarkdownFiles(path19.join(root, QUERIES_DIR));
-  return [...conceptPages, ...queryPages];
-}
-function buildPageSlugSet(pages) {
-  const slugs = /* @__PURE__ */ new Set();
-  for (const page of pages) {
-    const baseName = path19.basename(page.filePath, ".md");
-    slugs.add(baseName.toLowerCase());
-  }
-  return slugs;
-}
-async function checkBrokenWikilinks(root) {
-  const pages = await collectAllPages(root);
-  const existingSlugs = buildPageSlugSet(pages);
-  const results = [];
-  for (const page of pages) {
-    for (const { captured, line } of findMatchesInContent(page.content, WIKILINK_PATTERN)) {
-      const linkSlug = slugify(captured);
-      if (!existingSlugs.has(linkSlug)) {
-        results.push({
-          rule: "broken-wikilink",
-          severity: "error",
-          file: page.filePath,
-          message: `Broken wikilink [[${captured}]] \u2014 no matching page found`,
-          line
-        });
-      }
-    }
-  }
-  return results;
-}
-async function checkOrphanedPages(root) {
-  const pages = await collectAllPages(root);
-  const results = [];
-  for (const page of pages) {
-    const { meta } = parseFrontmatter(page.content);
-    if (meta.orphaned === true) {
-      results.push({
-        rule: "orphaned-page",
-        severity: "warning",
-        file: page.filePath,
-        message: `Page is marked as orphaned`
-      });
-    }
-  }
-  return results;
-}
-async function checkMissingSummaries(root) {
-  const pages = await collectAllPages(root);
-  const results = [];
-  for (const page of pages) {
-    const { meta } = parseFrontmatter(page.content);
-    const summary = meta.summary;
-    const isMissing = !summary || typeof summary === "string" && summary.trim() === "";
-    if (isMissing) {
-      results.push({
-        rule: "missing-summary",
-        severity: "warning",
-        file: page.filePath,
-        message: `Page has no summary in frontmatter`
-      });
-    }
-  }
-  return results;
-}
-async function checkDuplicateConcepts(root) {
-  const pages = await collectAllPages(root);
-  const titleMap = /* @__PURE__ */ new Map();
-  for (const page of pages) {
-    const { meta } = parseFrontmatter(page.content);
-    const title = typeof meta.title === "string" ? meta.title : "";
-    if (!title) continue;
-    const normalizedTitle = title.toLowerCase().trim();
-    const existing = titleMap.get(normalizedTitle) ?? [];
-    existing.push(page.filePath);
-    titleMap.set(normalizedTitle, existing);
-  }
-  const results = [];
-  for (const [title, files] of titleMap) {
-    if (files.length <= 1) continue;
-    for (const file of files) {
-      results.push({
-        rule: "duplicate-concept",
-        severity: "error",
-        file,
-        message: `Duplicate title "${title}" \u2014 also in ${files.filter((f) => f !== file).join(", ")}`
-      });
-    }
-  }
-  return results;
-}
-async function checkEmptyPages(root) {
-  const pages = await collectAllPages(root);
-  const results = [];
-  for (const page of pages) {
-    const { meta, body } = parseFrontmatter(page.content);
-    const hasTitle = typeof meta.title === "string" && meta.title.trim() !== "";
-    const isBodyEmpty = body.trim().length < MIN_BODY_LENGTH;
-    if (hasTitle && isBodyEmpty) {
-      results.push({
-        rule: "empty-page",
-        severity: "warning",
-        file: page.filePath,
-        message: `Page body is empty or too short (< ${MIN_BODY_LENGTH} chars)`
-      });
-    }
-  }
-  return results;
-}
-async function checkLowConfidencePages(root) {
-  const pages = await collectAllPages(root);
-  const results = [];
-  for (const page of pages) {
-    const { meta } = parseFrontmatter(page.content);
-    const { confidence } = parseProvenanceMetadata(meta);
-    if (confidence === void 0 || confidence >= LOW_CONFIDENCE_THRESHOLD) continue;
-    results.push({
-      rule: "low-confidence",
-      severity: "warning",
-      file: page.filePath,
-      message: `Page confidence ${confidence.toFixed(2)} is below ${LOW_CONFIDENCE_THRESHOLD}`
-    });
-  }
-  return results;
-}
-async function checkContradictedPages(root) {
-  const pages = await collectAllPages(root);
-  const results = [];
-  for (const page of pages) {
-    const { meta } = parseFrontmatter(page.content);
-    const { contradictedBy } = parseProvenanceMetadata(meta);
-    if (!contradictedBy || contradictedBy.length === 0) continue;
-    const slugs = contradictedBy.map((r) => r.slug).join(", ");
-    results.push({
-      rule: "contradicted-page",
-      severity: "warning",
-      file: page.filePath,
-      message: `Page contradicts: ${slugs}`
-    });
-  }
-  return results;
-}
-async function checkInferredWithoutCitations(root) {
-  const pages = await collectAllPages(root);
-  const results = [];
-  for (const page of pages) {
-    const { meta, body } = parseFrontmatter(page.content);
-    const provenance = parseProvenanceMetadata(meta);
-    const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
-    if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
-    results.push({
-      rule: "excess-inferred-paragraphs",
-      severity: "warning",
-      file: page.filePath,
-      message: `Page has ${inferred} inferred paragraphs without citations (max ${MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS})`
-    });
-  }
-  return results;
-}
-var PROSE_PARAGRAPH_LEAD = /^[A-Za-z]/;
-function countUncitedProseParagraphs(body) {
-  const paragraphs = body.split(/\n\s*\n/);
-  let count = 0;
-  for (const block of paragraphs) {
-    const trimmed = block.trim();
-    if (trimmed.length === 0) continue;
-    if (!PROSE_PARAGRAPH_LEAD.test(trimmed)) continue;
-    if (CITATION_PATTERN.test(trimmed)) {
-      CITATION_PATTERN.lastIndex = 0;
-      continue;
-    }
-    CITATION_PATTERN.lastIndex = 0;
-    count += 1;
-  }
-  return count;
-}
-function splitCitationFilenames(captured) {
-  return captured.split(",").map((s) => s.trim()).filter((s) => s.length > 0);
-}
-async function checkBrokenCitations(root) {
-  const pages = await collectAllPages(root);
-  const sourcesDir = path19.join(root, SOURCES_DIR);
-  const results = [];
-  for (const page of pages) {
-    for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
-      for (const filename of splitCitationFilenames(captured)) {
-        const citedPath = path19.join(sourcesDir, filename);
-        if (!existsSync8(citedPath)) {
-          results.push({
-            rule: "broken-citation",
-            severity: "error",
-            file: page.filePath,
-            message: `Broken citation ^[${filename}] \u2014 source file not found`,
-            line
-          });
-        }
-      }
-    }
-  }
-  return results;
-}
 // src/linter/index.ts
-var ALL_RULES = [
+var RULES_WITHOUT_SCHEMA = [
   checkBrokenWikilinks,
   checkOrphanedPages,
   checkMissingSummaries,
   checkDuplicateConcepts,
   checkEmptyPages,
   checkBrokenCitations,
+  checkMalformedClaimCitations,
   checkLowConfidencePages,
   checkContradictedPages,
   checkInferredWithoutCitations
 ];
+var RULES_WITH_SCHEMA = [checkSchemaCrossLinks];
 function countBySeverity(results, severity) {
   return results.filter((r) => r.severity === severity).length;
 }
 async function lint(root) {
-  const ruleResults = await Promise.all(
-    ALL_RULES.map((rule) => rule(root))
-  );
-  const results = ruleResults.flat();
+  const schema = await loadSchema(root);
+  const [plainResults, schemaResults] = await Promise.all([
+    Promise.all(RULES_WITHOUT_SCHEMA.map((rule) => rule(root))),
+    Promise.all(RULES_WITH_SCHEMA.map((rule) => rule(root, schema)))
+  ]);
+  const results = [...plainResults.flat(), ...schemaResults.flat()];
   return {
     errors: countBySeverity(results, "error"),
     warnings: countBySeverity(results, "warning"),
@@ -2750,6 +3752,9 @@ function printResult(result) {
 }
 async function lintCommand() {
   header("Linting wiki");
+  const schema = await loadSchema(process.cwd());
+  const schemaSource = schema.loadedFrom ?? "defaults (no schema file)";
+  status("i", dim(`Schema: ${schemaSource}`));
   const summary = await lint(process.cwd());
   for (const result of summary.results) {
     printResult(result);
@@ -2766,6 +3771,36 @@ async function lintCommand() {
   }
 }
+// src/commands/schema.ts
+import { existsSync as existsSync10 } from "fs";
+import { mkdir as mkdir6, writeFile as writeFile5 } from "fs/promises";
+import path24 from "path";
+async function schemaInitCommand() {
+  const root = process.cwd();
+  const defaults = buildDefaultSchema();
+  const targetPath = defaultSchemaInitPath(root);
+  if (existsSync10(targetPath)) {
+    status("!", warn(`Schema file already exists at ${targetPath}`));
+    return;
+  }
+  await mkdir6(path24.dirname(targetPath), { recursive: true });
+  const serializable = {
+    version: defaults.version,
+    defaultKind: defaults.defaultKind,
+    kinds: defaults.kinds,
+    seedPages: defaults.seedPages
+  };
+  await writeFile5(targetPath, `${JSON.stringify(serializable, null, 2)}
+`, "utf-8");
+  status("+", success(`Wrote schema to ${targetPath}`));
+}
+async function schemaShowCommand() {
+  const schema = await loadSchema(process.cwd());
+  const loadedFrom = schema.loadedFrom ?? "(defaults \u2014 no schema file found)";
+  header(`Schema (${loadedFrom})`);
+  console.log(serializeSchemaToYaml(schema));
+}
 // src/commands/review-list.ts
 async function reviewListCommand() {
   header("Pending review candidates");
@@ -2797,10 +3832,17 @@ async function reviewShowCommand(id) {
   status("i", dim(`generated:  ${candidate.generatedAt}`));
   console.log();
   console.log(candidate.body);
+  if (candidate.schemaViolations && candidate.schemaViolations.length > 0) {
+    console.log();
+    header("Schema violations");
+    for (const v of candidate.schemaViolations) {
+      status("!", warn(`[${v.severity}] ${v.message}`));
+    }
+  }
 }
 // src/commands/review-approve.ts
-import path20 from "path";
+import path25 from "path";
 // src/commands/review-helpers.ts
 async function runReviewUnderLock(id, underLock) {
@@ -2832,7 +3874,7 @@ async function approveUnderLock(root, id) {
     process.exitCode = 1;
     return;
   }
-  const pagePath = path20.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
+  const pagePath = path25.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
   await atomicWrite(pagePath, candidate.body);
   status("+", success(`Approved \u2192 ${source(pagePath)}`));
   await persistCandidateSourceStates(root, candidate);
@@ -2892,7 +3934,7 @@ import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 // src/mcp/tools.ts
-import path21 from "path";
+import path26 from "path";
 import { z } from "zod";
 // src/mcp/provider-check.ts
@@ -2985,15 +4027,16 @@ function registerQueryTool(server, root) {
     "query_wiki",
     {
       title: "Query Wiki",
-      description: "Ask a natural-language question. Selects relevant pages with the LLM, loads them, and returns a grounded answer with citations. Set save=true to persist the answer as a wiki page. Requires an LLM provider.",
+      description: "Ask a natural-language question. Selects relevant pages with the LLM, loads them, and returns a grounded answer with citations. Set save=true to persist the answer as a wiki page. Set debug=true to include the selected chunks and their scores. Requires an LLM provider.",
       inputSchema: {
         question: z.string().describe("The natural-language question to answer."),
-        save: z.boolean().optional().describe("Persist the answer as a wiki/queries/ page when true.")
+        save: z.boolean().optional().describe("Persist the answer as a wiki/queries/ page when true."),
+        debug: z.boolean().optional().describe("Include retrieval debug info (selected chunks/pages + scores).")
       }
     },
-    async ({ question, save }) => {
+    async ({ question, save, debug }) => {
       ensureProviderAvailable();
-      const result = await generateAnswer(root, question, { save });
+      const result = await generateAnswer(root, question, { save, debug });
       return jsonResult(result);
     }
   );
@@ -3017,15 +4060,30 @@ function registerSearchTool(server, root) {
   );
 }
 async function pickSearchSlugs(root, question) {
+  try {
+    const chunks = await findRelevantChunks(root, question, CHUNK_TOP_K);
+    if (chunks.length > 0) return dedupePreservingOrder(chunks.map((c) => c.chunk.slug));
+  } catch {
+  }
   try {
     const candidates = await findRelevantPages(root, question);
     if (candidates.length > 0) return candidates.map((c) => c.slug);
   } catch {
   }
-  const indexContent = await safeReadFile(path21.join(root, INDEX_FILE));
+  const indexContent = await safeReadFile(path26.join(root, INDEX_FILE));
   const { pages } = await selectPages(question, indexContent);
   return pages;
 }
+function dedupePreservingOrder(slugs) {
+  const seen = /* @__PURE__ */ new Set();
+  const out = [];
+  for (const slug of slugs) {
+    if (seen.has(slug)) continue;
+    seen.add(slug);
+    out.push(slug);
+  }
+  return out;
+}
 function registerReadTool(server, root) {
   server.registerTool(
     "read_page",
@@ -3071,8 +4129,8 @@ function registerStatusTool(server, root) {
   );
 }
 async function collectStatus(root) {
-  const concepts = await collectPageSummaries(path21.join(root, CONCEPTS_DIR));
-  const queries = await collectPageSummaries(path21.join(root, QUERIES_DIR));
+  const concepts = await collectPageSummaries(path26.join(root, CONCEPTS_DIR));
+  const queries = await collectPageSummaries(path26.join(root, QUERIES_DIR));
   const state = await readState(root);
   const changes = await detectChanges(root, state);
   const orphans = await findOrphanedSlugs(root);
@@ -3089,7 +4147,7 @@ async function collectStatus(root) {
   };
 }
 async function findOrphanedSlugs(root) {
-  const scanned = await scanWikiPages(path21.join(root, CONCEPTS_DIR));
+  const scanned = await scanWikiPages(path26.join(root, CONCEPTS_DIR));
   return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
 }
 async function loadPageRecords(root, slugs) {
@@ -3102,7 +4160,7 @@ async function loadPageRecords(root, slugs) {
 }
 async function readPage(root, slug) {
   for (const dir of PAGE_DIRS2) {
-    const content = await safeReadFile(path21.join(root, dir, `${slug}.md`));
+    const content = await safeReadFile(path26.join(root, dir, `${slug}.md`));
     if (!content) continue;
     const { meta, body } = parseFrontmatter(content);
     if (meta.orphaned) continue;
@@ -3117,7 +4175,7 @@ async function readPage(root, slug) {
 }
 // src/mcp/resources.ts
-import path22 from "path";
+import path27 from "path";
 import { readdir as readdir9 } from "fs/promises";
 import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
 function jsonContent(uri, payload) {
@@ -3151,7 +4209,7 @@ function registerIndexResource(server, root) {
       mimeType: "text/markdown"
     },
     async (uri) => {
-      const content = await safeReadFile(path22.join(root, INDEX_FILE));
+      const content = await safeReadFile(path27.join(root, INDEX_FILE));
       return { contents: [markdownContent(uri, content)] };
     }
   );
@@ -3218,7 +4276,7 @@ function registerQueryResource(server, root) {
   );
 }
 async function listSources(root) {
-  const sourcesPath = path22.join(root, SOURCES_DIR);
+  const sourcesPath = path27.join(root, SOURCES_DIR);
   let files;
   try {
     files = await readdir9(sourcesPath);
@@ -3227,14 +4285,14 @@ async function listSources(root) {
   }
   const records = [];
   for (const file of files.filter((f) => f.endsWith(".md"))) {
-    const content = await safeReadFile(path22.join(sourcesPath, file));
+    const content = await safeReadFile(path27.join(sourcesPath, file));
     const { meta } = parseFrontmatter(content);
     records.push({ filename: file, ...meta });
   }
   return records;
 }
 async function loadPageWithMeta(root, dir, slug) {
-  const filePath = path22.join(root, dir, `${slug}.md`);
+  const filePath = path27.join(root, dir, `${slug}.md`);
   const content = await safeReadFile(filePath);
   if (!content) {
     throw new Error(`Page not found: ${dir}/${slug}.md`);
@@ -3243,7 +4301,7 @@ async function loadPageWithMeta(root, dir, slug) {
   return { slug, meta, body: body.trim() };
 }
 async function listPagesUnder(root, dir, scheme) {
-  const pagesPath = path22.join(root, dir);
+  const pagesPath = path27.join(root, dir);
   let files;
   try {
     files = await readdir9(pagesPath);
@@ -3327,7 +4385,7 @@ reviewCommand.command("reject <id>").description("Reject a candidate and archive
     process.exit(1);
   }
 });
-program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").action(async (question, options) => {
+program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").option("--debug", "Print which pages and chunks were selected and their scores").action(async (question, options) => {
   try {
     requireProvider();
     await queryCommand(process.cwd(), question, options);
@@ -3353,6 +4411,23 @@ program.command("lint").description("Run rule-based quality checks against the w
     process.exit(1);
   }
 });
+var schemaCmd = program.command("schema").description("Inspect or initialize the project's wiki schema config");
+schemaCmd.command("init").description("Write a starter schema file to .llmwiki/schema.json").action(async () => {
+  try {
+    await schemaInitCommand();
+  } catch (err) {
+    console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
+    process.exit(1);
+  }
+});
+schemaCmd.command("show").description("Print the resolved schema for this project").action(async () => {
+  try {
+    await schemaShowCommand();
+  } catch (err) {
+    console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
+    process.exit(1);
+  }
+});
 program.command("serve").description("Start an MCP server exposing wiki tools and resources over stdio").option("--root <dir>", "Project root directory", process.cwd()).action(async (options) => {
   try {
     await startMCPServer({ root: options.root, version });