llm-wiki-compiler 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -13,6 +13,8 @@ import { mkdir as mkdir2, writeFile as writeFile2 } from "fs/promises";
13
13
  import { writeFile, rename, readFile, mkdir } from "fs/promises";
14
14
  import path from "path";
15
15
  import yaml from "js-yaml";
16
+ var SPAN_SUFFIX_PATTERN = /^(?<file>[^:#]+)(?:(?::(?<colonStart>\d+)(?:-(?<colonEnd>\d+))?)|(?:#L(?<hashStart>\d+)(?:-L(?<hashEnd>\d+))?))?$/;
17
+ var MIN_LINE_NUMBER = 1;
16
18
  var VALID_PROVENANCE_STATES = /* @__PURE__ */ new Set([
17
19
  "extracted",
18
20
  "merged",
@@ -49,6 +51,23 @@ async function atomicWrite(filePath, content) {
49
51
  await writeFile(tmpPath, content, "utf-8");
50
52
  await rename(tmpPath, filePath);
51
53
  }
54
+ function isValidLineRange(start, end) {
55
+ return start >= MIN_LINE_NUMBER && end >= start;
56
+ }
57
+ function isMalformedCitationEntry(entry) {
58
+ const trimmed = entry.trim();
59
+ if (trimmed.length === 0) return true;
60
+ if (!trimmed.includes(":") && !trimmed.includes("#")) return false;
61
+ const match = SPAN_SUFFIX_PATTERN.exec(trimmed);
62
+ if (!match || !match.groups) return true;
63
+ const { colonStart, colonEnd, hashStart, hashEnd } = match.groups;
64
+ const start = colonStart ?? hashStart;
65
+ const end = colonEnd ?? hashEnd;
66
+ if (start === void 0) return false;
67
+ const startLine = Number(start);
68
+ const endLine = end === void 0 ? startLine : Number(end);
69
+ return !isValidLineRange(startLine, endLine);
70
+ }
52
71
  async function safeReadFile(filePath) {
53
72
  try {
54
73
  return await readFile(filePath, "utf-8");
@@ -120,6 +139,8 @@ var PROVIDER_MODELS = {
120
139
  minimax: "MiniMax-M2.7"
121
140
  };
122
141
  var OLLAMA_DEFAULT_HOST = "http://localhost:11434/v1";
142
+ var OPENAI_DEFAULT_TIMEOUT_MS = 10 * 60 * 1e3;
143
+ var OLLAMA_DEFAULT_TIMEOUT_MS = 30 * 60 * 1e3;
123
144
  var SOURCES_DIR = "sources";
124
145
  var CONCEPTS_DIR = "wiki/concepts";
125
146
  var QUERIES_DIR = "wiki/queries";
@@ -324,11 +345,11 @@ async function ingest(source2) {
324
345
  }
325
346
 
326
347
  // src/commands/compile.ts
327
- import { existsSync as existsSync5 } from "fs";
348
+ import { existsSync as existsSync7 } from "fs";
328
349
 
329
350
  // src/compiler/index.ts
330
- import { readFile as readFile8 } from "fs/promises";
331
- import path16 from "path";
351
+ import { readFile as readFile10 } from "fs/promises";
352
+ import path18 from "path";
332
353
 
333
354
  // src/utils/state.ts
334
355
  import { readFile as readFile3, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
@@ -556,6 +577,15 @@ var AnthropicProvider = class {
556
577
 
557
578
  // src/providers/openai.ts
558
579
  import OpenAI from "openai";
580
+ function readTimeoutEnv(name) {
581
+ const raw = process.env[name]?.trim();
582
+ if (!raw) return void 0;
583
+ const parsed = Number(raw);
584
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : void 0;
585
+ }
586
+ function resolveOpenAITimeoutMs() {
587
+ return readTimeoutEnv("LLMWIKI_REQUEST_TIMEOUT_MS");
588
+ }
559
589
  function translateToolToOpenAI(tool) {
560
590
  return {
561
591
  type: "function",
@@ -575,11 +605,13 @@ var OpenAIProvider = class {
575
605
  this.model = model;
576
606
  this.configuredEmbeddingModel = options.embeddingModel;
577
607
  const resolvedKey = options.apiKey ?? process.env.OPENAI_API_KEY ?? "";
608
+ const timeout = options.timeoutMs ?? resolveOpenAITimeoutMs() ?? OPENAI_DEFAULT_TIMEOUT_MS;
578
609
  this.client = new OpenAI({
579
610
  apiKey: resolvedKey,
580
- baseURL: options.baseURL ?? null
611
+ baseURL: options.baseURL ?? null,
612
+ timeout
581
613
  });
582
- this.embeddingsClient = options.embeddingsBaseURL ? new OpenAI({ apiKey: resolvedKey, baseURL: options.embeddingsBaseURL }) : this.client;
614
+ this.embeddingsClient = options.embeddingsBaseURL ? new OpenAI({ apiKey: resolvedKey, baseURL: options.embeddingsBaseURL, timeout }) : this.client;
583
615
  }
584
616
  /** Send a single non-streaming completion request. */
585
617
  async complete(system, messages, maxTokens) {
@@ -645,13 +677,17 @@ var OpenAIProvider = class {
645
677
  };
646
678
 
647
679
  // src/providers/ollama.ts
680
+ function resolveOllamaTimeoutMs(explicit) {
681
+ return explicit ?? readTimeoutEnv("OLLAMA_TIMEOUT_MS") ?? readTimeoutEnv("LLMWIKI_REQUEST_TIMEOUT_MS") ?? OLLAMA_DEFAULT_TIMEOUT_MS;
682
+ }
648
683
  var OllamaProvider = class extends OpenAIProvider {
649
684
  constructor(model, options) {
650
685
  super(model, {
651
686
  baseURL: options.baseURL,
652
687
  apiKey: "ollama",
653
688
  embeddingsBaseURL: options.embeddingsBaseURL,
654
- embeddingModel: options.embeddingModel
689
+ embeddingModel: options.embeddingModel,
690
+ timeoutMs: resolveOllamaTimeoutMs(options.timeoutMs)
655
691
  });
656
692
  }
657
693
  /** Ollama ships a dedicated embedding model (nomic-embed-text). */
@@ -1065,7 +1101,14 @@ ${relatedPages}` : "";
1065
1101
  "Source attribution: at the end of each prose paragraph, append a citation",
1066
1102
  "marker showing which source file(s) the paragraph drew from.",
1067
1103
  "Format: ^[filename.md] for single-source, ^[source-a.md, source-b.md] for multi-source.",
1068
- "Place citations only at the end of prose paragraphs \u2014 not on headings, list items, or code blocks.",
1104
+ "When a single sentence makes a specific factual claim and you can identify the",
1105
+ "exact line range it came from, you may use the claim-level form",
1106
+ "^[filename.md:START-END] (or ^[filename.md#LSTART-LEND]) at the end of that",
1107
+ "sentence \u2014 START and END are 1-indexed line numbers in the source file.",
1108
+ "Paragraph-level citations remain the default; only switch to claim-level form",
1109
+ "when it materially improves verifiability and the line range is unambiguous.",
1110
+ "Place citations only at the end of prose paragraphs or sentences \u2014 not on",
1111
+ "headings, list items, or code blocks.",
1069
1112
  "Source filenames are visible as `--- SOURCE: filename.md ---` headers in the content below.",
1070
1113
  "",
1071
1114
  "If a paragraph is your inference rather than a direct extraction, leave it",
@@ -1106,6 +1149,20 @@ function mapRawConcept(c) {
1106
1149
  inferredParagraphs: typeof c.inferred_paragraphs === "number" && Number.isInteger(c.inferred_paragraphs) && c.inferred_paragraphs >= 0 ? c.inferred_paragraphs : void 0
1107
1150
  };
1108
1151
  }
1152
+ function buildSeedPagePrompt(seed, rule, relatedPagesContent) {
1153
+ const minLinks = rule.minWikilinks;
1154
+ const linkExpectation = minLinks > 0 ? `Include at least ${minLinks} [[wikilinks]] to related pages.` : "Use [[wikilinks]] when referencing other pages.";
1155
+ return [
1156
+ `You are a wiki author. Write a ${seed.kind} page titled "${seed.title}".`,
1157
+ `Page-kind guidance: ${rule.description}`,
1158
+ `Summary line for context: ${seed.summary}`,
1159
+ "Draw facts only from the related wiki pages provided below.",
1160
+ linkExpectation,
1161
+ "Write in a neutral, informative tone. Be concise but thorough.",
1162
+ "\n\n--- RELATED PAGES ---\n\n",
1163
+ relatedPagesContent
1164
+ ].join("\n");
1165
+ }
1109
1166
  function parseConcepts(toolOutput) {
1110
1167
  try {
1111
1168
  const parsed = JSON.parse(toolOutput);
@@ -1116,6 +1173,149 @@ function parseConcepts(toolOutput) {
1116
1173
  }
1117
1174
  }
1118
1175
 
1176
+ // src/schema/types.ts
1177
+ var PAGE_KINDS = [
1178
+ "concept",
1179
+ "entity",
1180
+ "comparison",
1181
+ "overview"
1182
+ ];
1183
+
1184
+ // src/schema/defaults.ts
1185
+ var DEFAULT_MIN_LINKS = {
1186
+ concept: 0,
1187
+ entity: 1,
1188
+ comparison: 2,
1189
+ overview: 3
1190
+ };
1191
+ var DEFAULT_DESCRIPTIONS = {
1192
+ concept: "A standalone idea, technique, or pattern worth documenting.",
1193
+ entity: "A specific thing \u2014 a person, product, organization, or named artifact.",
1194
+ comparison: "A side-by-side analysis weighing two or more concepts or entities.",
1195
+ overview: "A top-down map page that situates several concepts within a domain."
1196
+ };
1197
+ function buildDefaultKindRules() {
1198
+ return {
1199
+ concept: { minWikilinks: DEFAULT_MIN_LINKS.concept, description: DEFAULT_DESCRIPTIONS.concept },
1200
+ entity: { minWikilinks: DEFAULT_MIN_LINKS.entity, description: DEFAULT_DESCRIPTIONS.entity },
1201
+ comparison: {
1202
+ minWikilinks: DEFAULT_MIN_LINKS.comparison,
1203
+ description: DEFAULT_DESCRIPTIONS.comparison
1204
+ },
1205
+ overview: {
1206
+ minWikilinks: DEFAULT_MIN_LINKS.overview,
1207
+ description: DEFAULT_DESCRIPTIONS.overview
1208
+ }
1209
+ };
1210
+ }
1211
+ function buildDefaultSchema() {
1212
+ return {
1213
+ version: 1,
1214
+ defaultKind: "concept",
1215
+ kinds: buildDefaultKindRules(),
1216
+ seedPages: [],
1217
+ loadedFrom: null
1218
+ };
1219
+ }
1220
+
1221
+ // src/schema/loader.ts
1222
+ import { existsSync as existsSync2 } from "fs";
1223
+ import { readFile as readFile6 } from "fs/promises";
1224
+ import path9 from "path";
1225
+ import yaml2 from "js-yaml";
1226
+ var SCHEMA_CANDIDATE_PATHS = [
1227
+ ".llmwiki/schema.json",
1228
+ ".llmwiki/schema.yaml",
1229
+ ".llmwiki/schema.yml",
1230
+ "wiki/.schema.yaml",
1231
+ "wiki/.schema.yml"
1232
+ ];
1233
+ function findSchemaPath(root) {
1234
+ for (const candidate of SCHEMA_CANDIDATE_PATHS) {
1235
+ const absolute = path9.join(root, candidate);
1236
+ if (existsSync2(absolute)) return absolute;
1237
+ }
1238
+ return null;
1239
+ }
1240
+ function parseSchemaFile(filePath, content) {
1241
+ const isJson = filePath.endsWith(".json");
1242
+ const parsed = isJson ? JSON.parse(content) : yaml2.load(content);
1243
+ if (parsed && typeof parsed === "object") return parsed;
1244
+ return {};
1245
+ }
1246
+ function isPageKind(value) {
1247
+ return typeof value === "string" && PAGE_KINDS.includes(value);
1248
+ }
1249
+ function mergeKindRule(defaults, override) {
1250
+ if (!override) return defaults;
1251
+ const minWikilinks = typeof override.minWikilinks === "number" ? override.minWikilinks : defaults.minWikilinks;
1252
+ const description = typeof override.description === "string" ? override.description : defaults.description;
1253
+ return { minWikilinks, description };
1254
+ }
1255
+ function mergeKinds(defaults, overrides) {
1256
+ const merged = { ...defaults };
1257
+ if (!overrides) return merged;
1258
+ for (const kind of PAGE_KINDS) {
1259
+ merged[kind] = mergeKindRule(defaults[kind], overrides[kind]);
1260
+ }
1261
+ return merged;
1262
+ }
1263
+ function normalizeSeedPage(entry) {
1264
+ if (typeof entry.title !== "string" || entry.title.trim() === "") return null;
1265
+ if (!isPageKind(entry.kind)) return null;
1266
+ const summary = typeof entry.summary === "string" ? entry.summary : "";
1267
+ const relatedSlugs = Array.isArray(entry.relatedSlugs) ? entry.relatedSlugs.filter((slug) => typeof slug === "string") : void 0;
1268
+ return { title: entry.title, kind: entry.kind, summary, relatedSlugs };
1269
+ }
1270
+ function normalizeSeedPages(entries) {
1271
+ if (!Array.isArray(entries)) return [];
1272
+ return entries.map(normalizeSeedPage).filter((entry) => entry !== null);
1273
+ }
1274
+ function applyOverrides(defaults, overrides, loadedFrom) {
1275
+ const defaultKind = isPageKind(overrides.defaultKind) ? overrides.defaultKind : defaults.defaultKind;
1276
+ return {
1277
+ version: 1,
1278
+ defaultKind,
1279
+ kinds: mergeKinds(defaults.kinds, overrides.kinds),
1280
+ seedPages: normalizeSeedPages(overrides.seedPages),
1281
+ loadedFrom
1282
+ };
1283
+ }
1284
+ async function loadSchema(root) {
1285
+ const defaults = buildDefaultSchema();
1286
+ const schemaPath = findSchemaPath(root);
1287
+ if (!schemaPath) return defaults;
1288
+ const raw = await readFile6(schemaPath, "utf-8");
1289
+ const parsed = parseSchemaFile(schemaPath, raw);
1290
+ return applyOverrides(defaults, parsed, schemaPath);
1291
+ }
1292
+ function defaultSchemaInitPath(root) {
1293
+ return path9.join(root, SCHEMA_CANDIDATE_PATHS[0]);
1294
+ }
1295
+
1296
+ // src/schema/helpers.ts
1297
+ import yaml3 from "js-yaml";
1298
+ var WIKILINK_PATTERN = /\[\[([^\]]+)\]\]/g;
1299
+ function resolvePageKind(rawKind, schema) {
1300
+ if (typeof rawKind === "string" && PAGE_KINDS.includes(rawKind)) {
1301
+ return rawKind;
1302
+ }
1303
+ return schema.defaultKind;
1304
+ }
1305
+ function countWikilinks(body) {
1306
+ const matches = body.match(WIKILINK_PATTERN);
1307
+ return matches ? matches.length : 0;
1308
+ }
1309
+ function serializeSchemaToYaml(schema) {
1310
+ const serializable = {
1311
+ version: schema.version,
1312
+ defaultKind: schema.defaultKind,
1313
+ kinds: schema.kinds,
1314
+ seedPages: schema.seedPages
1315
+ };
1316
+ return yaml3.dump(serializable, { lineWidth: -1, quotingType: '"' });
1317
+ }
1318
+
1119
1319
  // src/compiler/deps.ts
1120
1320
  function buildConceptToSourcesMap(sources) {
1121
1321
  const conceptMap = /* @__PURE__ */ new Map();
@@ -1262,7 +1462,7 @@ async function freezeFailedExtractions(root, results, frozenSlugs) {
1262
1462
  }
1263
1463
 
1264
1464
  // src/compiler/orphan.ts
1265
- import path9 from "path";
1465
+ import path10 from "path";
1266
1466
  async function markOrphaned(root, sourceFile, state) {
1267
1467
  const sourceEntry = state.sources[sourceFile];
1268
1468
  if (!sourceEntry) return;
@@ -1288,7 +1488,7 @@ async function orphanUnownedFrozenPages(root, frozenSlugs) {
1288
1488
  }
1289
1489
  }
1290
1490
  async function orphanPage(root, slug, reason) {
1291
- const pagePath = path9.join(root, CONCEPTS_DIR, `${slug}.md`);
1491
+ const pagePath = path10.join(root, CONCEPTS_DIR, `${slug}.md`);
1292
1492
  const content = await safeReadFile(pagePath);
1293
1493
  if (!content) return;
1294
1494
  const { meta } = parseFrontmatter(content);
@@ -1299,18 +1499,18 @@ async function orphanPage(root, slug, reason) {
1299
1499
  }
1300
1500
 
1301
1501
  // src/compiler/resolver.ts
1302
- import { readdir as readdir2, readFile as readFile6 } from "fs/promises";
1303
- import path10 from "path";
1304
- import { existsSync as existsSync2 } from "fs";
1502
+ import { readdir as readdir2, readFile as readFile7 } from "fs/promises";
1503
+ import path11 from "path";
1504
+ import { existsSync as existsSync3 } from "fs";
1305
1505
  async function buildTitleIndex(root) {
1306
- const conceptsDir = path10.join(root, CONCEPTS_DIR);
1307
- if (!existsSync2(conceptsDir)) return [];
1506
+ const conceptsDir = path11.join(root, CONCEPTS_DIR);
1507
+ if (!existsSync3(conceptsDir)) return [];
1308
1508
  const files = await readdir2(conceptsDir);
1309
1509
  const pages = [];
1310
1510
  for (const file of files) {
1311
1511
  if (!file.endsWith(".md")) continue;
1312
- const filePath = path10.join(conceptsDir, file);
1313
- const content = await readFile6(filePath, "utf-8");
1512
+ const filePath = path11.join(conceptsDir, file);
1513
+ const content = await readFile7(filePath, "utf-8");
1314
1514
  const { meta } = parseFrontmatter(content);
1315
1515
  if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
1316
1516
  pages.push({
@@ -1364,7 +1564,7 @@ function addWikilinks(body, titles, selfTitle) {
1364
1564
  const matches = findTitleMatches(result, page.title);
1365
1565
  for (const m of matches.reverse()) {
1366
1566
  if (!isLinkablePosition(result, m.start, m.end)) continue;
1367
- result = result.slice(0, m.start) + `[[${page.title}]]` + result.slice(m.end);
1567
+ result = result.slice(0, m.start) + `[[${page.slug}|${page.title}]]` + result.slice(m.end);
1368
1568
  }
1369
1569
  }
1370
1570
  return result;
@@ -1396,7 +1596,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
1396
1596
  let count = 0;
1397
1597
  for (const page of titleIndex) {
1398
1598
  if (newSlugs.includes(page.slug)) continue;
1399
- const content = await readFile6(page.filePath, "utf-8");
1599
+ const content = await readFile7(page.filePath, "utf-8");
1400
1600
  const { body } = parseFrontmatter(content);
1401
1601
  const linked = addWikilinks(body, newTitles, page.title);
1402
1602
  if (linked !== body) {
@@ -1408,7 +1608,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
1408
1608
  return count;
1409
1609
  }
1410
1610
  async function linkPage(page, titleIndex) {
1411
- const content = await readFile6(page.filePath, "utf-8");
1611
+ const content = await readFile7(page.filePath, "utf-8");
1412
1612
  const { body } = parseFrontmatter(content);
1413
1613
  const linked = addWikilinks(body, titleIndex, page.title);
1414
1614
  if (linked === body) return false;
@@ -1419,17 +1619,17 @@ async function linkPage(page, titleIndex) {
1419
1619
 
1420
1620
  // src/compiler/indexgen.ts
1421
1621
  import { readdir as readdir3 } from "fs/promises";
1422
- import path11 from "path";
1622
+ import path12 from "path";
1423
1623
  async function generateIndex(root) {
1424
1624
  status("*", info("Generating index..."));
1425
- const conceptsPath = path11.join(root, CONCEPTS_DIR);
1426
- const queriesPath = path11.join(root, QUERIES_DIR);
1625
+ const conceptsPath = path12.join(root, CONCEPTS_DIR);
1626
+ const queriesPath = path12.join(root, QUERIES_DIR);
1427
1627
  const concepts = await collectPageSummaries(conceptsPath);
1428
1628
  const queries = await collectPageSummaries(queriesPath);
1429
1629
  concepts.sort((a, b) => a.title.localeCompare(b.title));
1430
1630
  queries.sort((a, b) => a.title.localeCompare(b.title));
1431
1631
  const indexContent = buildIndexContent(concepts, queries);
1432
- const indexPath = path11.join(root, INDEX_FILE);
1632
+ const indexPath = path12.join(root, INDEX_FILE);
1433
1633
  await atomicWrite(indexPath, indexContent);
1434
1634
  const total = concepts.length + queries.length;
1435
1635
  status("+", success(`Index updated with ${total} pages.`));
@@ -1443,7 +1643,7 @@ async function scanWikiPages(dirPath) {
1443
1643
  }
1444
1644
  const scanned = [];
1445
1645
  for (const file of files.filter((f) => f.endsWith(".md"))) {
1446
- const content = await safeReadFile(path11.join(dirPath, file));
1646
+ const content = await safeReadFile(path12.join(dirPath, file));
1447
1647
  const { meta } = parseFrontmatter(content);
1448
1648
  scanned.push({ slug: file.replace(/\.md$/, ""), meta });
1449
1649
  }
@@ -1463,12 +1663,12 @@ function stripWikilinks(text) {
1463
1663
  function buildIndexContent(concepts, queries) {
1464
1664
  const lines = ["# Knowledge Wiki", "", "## Concepts", ""];
1465
1665
  for (const page of concepts) {
1466
- lines.push(`- **[[${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
1666
+ lines.push(`- **[[${page.slug}|${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
1467
1667
  }
1468
1668
  if (queries.length > 0) {
1469
1669
  lines.push("", "## Saved Queries", "");
1470
1670
  for (const page of queries) {
1471
- lines.push(`- **[[${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
1671
+ lines.push(`- **[[${page.slug}|${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
1472
1672
  }
1473
1673
  }
1474
1674
  const total = concepts.length + queries.length;
@@ -1480,7 +1680,7 @@ function buildIndexContent(concepts, queries) {
1480
1680
 
1481
1681
  // src/compiler/obsidian.ts
1482
1682
  import { readdir as readdir4 } from "fs/promises";
1483
- import path12 from "path";
1683
+ import path13 from "path";
1484
1684
  var ABBREVIATION_MIN_WORDS = 3;
1485
1685
  var SWAP_CONJUNCTIONS = [" and ", " or "];
1486
1686
  function addObsidianMeta(frontmatter, conceptTitle, tags) {
@@ -1522,11 +1722,11 @@ function generateAbbreviation(title) {
1522
1722
  return abbreviation;
1523
1723
  }
1524
1724
  async function generateMOC(root) {
1525
- const conceptsPath = path12.join(root, CONCEPTS_DIR);
1725
+ const conceptsPath = path13.join(root, CONCEPTS_DIR);
1526
1726
  const pages = await loadConceptPages(conceptsPath);
1527
1727
  const tagGroups = groupPagesByTag(pages);
1528
1728
  const content = buildMOCContent(tagGroups);
1529
- await atomicWrite(path12.join(root, MOC_FILE), content);
1729
+ await atomicWrite(path13.join(root, MOC_FILE), content);
1530
1730
  }
1531
1731
  async function loadConceptPages(conceptsPath) {
1532
1732
  let files;
@@ -1538,13 +1738,14 @@ async function loadConceptPages(conceptsPath) {
1538
1738
  const pages = [];
1539
1739
  for (const file of files) {
1540
1740
  if (!file.endsWith(".md")) continue;
1541
- const content = await safeReadFile(path12.join(conceptsPath, file));
1741
+ const content = await safeReadFile(path13.join(conceptsPath, file));
1542
1742
  if (!content) continue;
1543
1743
  const { meta } = parseFrontmatter(content);
1544
1744
  if (meta.orphaned) continue;
1545
- const title = typeof meta.title === "string" ? meta.title : file.replace(/\.md$/, "");
1745
+ const slug = file.replace(/\.md$/, "");
1746
+ const title = typeof meta.title === "string" ? meta.title : slug;
1546
1747
  const tags = Array.isArray(meta.tags) ? meta.tags : [];
1547
- pages.push({ title, tags });
1748
+ pages.push({ slug, title, tags });
1548
1749
  }
1549
1750
  return pages;
1550
1751
  }
@@ -1552,21 +1753,21 @@ function groupPagesByTag(pages) {
1552
1753
  const groups = /* @__PURE__ */ new Map();
1553
1754
  for (const page of pages) {
1554
1755
  if (page.tags.length === 0) {
1555
- appendToGroup(groups, "Uncategorized", page.title);
1756
+ appendToGroup(groups, "Uncategorized", page);
1556
1757
  continue;
1557
1758
  }
1558
1759
  for (const tag of page.tags) {
1559
- appendToGroup(groups, tag, page.title);
1760
+ appendToGroup(groups, tag, page);
1560
1761
  }
1561
1762
  }
1562
1763
  return groups;
1563
1764
  }
1564
- function appendToGroup(groups, key, title) {
1765
+ function appendToGroup(groups, key, page) {
1565
1766
  const existing = groups.get(key);
1566
1767
  if (existing) {
1567
- existing.push(title);
1768
+ existing.push(page);
1568
1769
  } else {
1569
- groups.set(key, [title]);
1770
+ groups.set(key, [page]);
1570
1771
  }
1571
1772
  }
1572
1773
  function buildMOCContent(tagGroups) {
@@ -1577,10 +1778,10 @@ function buildMOCContent(tagGroups) {
1577
1778
  return a.localeCompare(b);
1578
1779
  });
1579
1780
  for (const tag of sortedTags) {
1580
- const titles = tagGroups.get(tag) ?? [];
1781
+ const pages = tagGroups.get(tag) ?? [];
1581
1782
  lines.push(`## ${tag}`, "");
1582
- for (const title of titles.sort()) {
1583
- lines.push(`- [[${title}]]`);
1783
+ for (const page of pages.sort((a, b) => a.title.localeCompare(b.title))) {
1784
+ lines.push(`- [[${page.slug}|${page.title}]]`);
1584
1785
  }
1585
1786
  lines.push("");
1586
1787
  }
@@ -1588,9 +1789,9 @@ function buildMOCContent(tagGroups) {
1588
1789
  }
1589
1790
 
1590
1791
  // src/utils/embeddings.ts
1591
- import { readFile as readFile7, readdir as readdir5 } from "fs/promises";
1592
- import { existsSync as existsSync3 } from "fs";
1593
- import path13 from "path";
1792
+ import { readFile as readFile8, readdir as readdir5 } from "fs/promises";
1793
+ import { existsSync as existsSync4 } from "fs";
1794
+ import path14 from "path";
1594
1795
  function cosineSimilarity(a, b) {
1595
1796
  if (a.length !== b.length || a.length === 0) return 0;
1596
1797
  let dot = 0;
@@ -1613,13 +1814,13 @@ function findTopK(queryVec, store, k) {
1613
1814
  return scored.slice(0, k).map((item) => item.entry);
1614
1815
  }
1615
1816
  async function readEmbeddingStore(root) {
1616
- const filePath = path13.join(root, EMBEDDINGS_FILE);
1617
- if (!existsSync3(filePath)) return null;
1618
- const raw = await readFile7(filePath, "utf-8");
1817
+ const filePath = path14.join(root, EMBEDDINGS_FILE);
1818
+ if (!existsSync4(filePath)) return null;
1819
+ const raw = await readFile8(filePath, "utf-8");
1619
1820
  return JSON.parse(raw);
1620
1821
  }
1621
1822
  async function writeEmbeddingStore(root, store) {
1622
- const filePath = path13.join(root, EMBEDDINGS_FILE);
1823
+ const filePath = path14.join(root, EMBEDDINGS_FILE);
1623
1824
  await atomicWrite(filePath, JSON.stringify(store, null, 2));
1624
1825
  }
1625
1826
  async function findRelevantPages(root, question) {
@@ -1640,7 +1841,7 @@ async function findRelevantPages(root, question) {
1640
1841
  async function collectPageRecords(root) {
1641
1842
  const records = [];
1642
1843
  for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
1643
- const absDir = path13.join(root, dir);
1844
+ const absDir = path14.join(root, dir);
1644
1845
  let files;
1645
1846
  try {
1646
1847
  files = await readdir5(absDir);
@@ -1648,7 +1849,7 @@ async function collectPageRecords(root) {
1648
1849
  continue;
1649
1850
  }
1650
1851
  for (const file of files.filter((f) => f.endsWith(".md"))) {
1651
- const content = await safeReadFile(path13.join(absDir, file));
1852
+ const content = await safeReadFile(path14.join(absDir, file));
1652
1853
  const { meta } = parseFrontmatter(content);
1653
1854
  if (meta.orphaned || typeof meta.title !== "string") continue;
1654
1855
  records.push({
@@ -1741,8 +1942,8 @@ async function updateEmbeddings(root, changedSlugs) {
1741
1942
 
1742
1943
  // src/compiler/candidates.ts
1743
1944
  import { readdir as readdir6, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
1744
- import { existsSync as existsSync4 } from "fs";
1745
- import path14 from "path";
1945
+ import { existsSync as existsSync5 } from "fs";
1946
+ import path15 from "path";
1746
1947
  import { randomBytes } from "crypto";
1747
1948
  var ID_SUFFIX_BYTES = 4;
1748
1949
  var CANDIDATE_EXT = ".json";
@@ -1751,10 +1952,10 @@ function buildCandidateId(slug) {
1751
1952
  return `${slug}-${suffix}`;
1752
1953
  }
1753
1954
  function candidatePath(root, id) {
1754
- return path14.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
1955
+ return path15.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
1755
1956
  }
1756
1957
  function archivePath(root, id) {
1757
- return path14.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
1958
+ return path15.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
1758
1959
  }
1759
1960
  async function writeCandidate(root, draft) {
1760
1961
  const candidate = {
@@ -1765,7 +1966,8 @@ async function writeCandidate(root, draft) {
1765
1966
  sources: draft.sources,
1766
1967
  body: draft.body,
1767
1968
  generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
1768
- ...draft.sourceStates ? { sourceStates: draft.sourceStates } : {}
1969
+ ...draft.sourceStates ? { sourceStates: draft.sourceStates } : {},
1970
+ ...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {}
1769
1971
  };
1770
1972
  await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
1771
1973
  return candidate;
@@ -1804,8 +2006,8 @@ function isValidCandidate(value) {
1804
2006
  return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
1805
2007
  }
1806
2008
  async function listCandidates(root) {
1807
- const dir = path14.join(root, CANDIDATES_DIR);
1808
- if (!existsSync4(dir)) return [];
2009
+ const dir = path15.join(root, CANDIDATES_DIR);
2010
+ if (!existsSync5(dir)) return [];
1809
2011
  const entries = await readdir6(dir, { withFileTypes: true });
1810
2012
  const candidates = [];
1811
2013
  for (const entry of entries) {
@@ -1823,15 +2025,15 @@ async function countCandidates(root) {
1823
2025
  }
1824
2026
  async function deleteCandidate(root, id) {
1825
2027
  const filePath = candidatePath(root, id);
1826
- if (!existsSync4(filePath)) return false;
2028
+ if (!existsSync5(filePath)) return false;
1827
2029
  await unlink2(filePath);
1828
2030
  return true;
1829
2031
  }
1830
2032
  async function archiveCandidate(root, id) {
1831
2033
  const sourcePath = candidatePath(root, id);
1832
- if (!existsSync4(sourcePath)) return false;
2034
+ if (!existsSync5(sourcePath)) return false;
1833
2035
  const target = archivePath(root, id);
1834
- await mkdir5(path14.dirname(target), { recursive: true });
2036
+ await mkdir5(path15.dirname(target), { recursive: true });
1835
2037
  try {
1836
2038
  await rename3(sourcePath, target);
1837
2039
  } catch {
@@ -1842,151 +2044,493 @@ async function archiveCandidate(root, id) {
1842
2044
  return true;
1843
2045
  }
1844
2046
 
1845
- // src/compiler/page-renderer.ts
1846
- import { readdir as readdir7 } from "fs/promises";
1847
- import path15 from "path";
1848
-
1849
- // src/compiler/provenance.ts
1850
- function addProvenanceMeta(fields, concept) {
1851
- if (typeof concept.confidence === "number") {
1852
- fields.confidence = concept.confidence;
1853
- }
1854
- if (concept.provenanceState) {
1855
- fields.provenanceState = concept.provenanceState;
1856
- }
1857
- if (concept.contradictedBy && concept.contradictedBy.length > 0) {
1858
- fields.contradictedBy = concept.contradictedBy;
1859
- }
1860
- if (typeof concept.inferredParagraphs === "number") {
1861
- fields.inferredParagraphs = concept.inferredParagraphs;
2047
+ // src/linter/rules.ts
2048
+ import { readdir as readdir7, readFile as readFile9 } from "fs/promises";
2049
+ import { existsSync as existsSync6 } from "fs";
2050
+ import path16 from "path";
2051
+ var MIN_BODY_LENGTH = 50;
2052
+ var WIKILINK_PATTERN2 = /\[\[([^\]]+)\]\]/g;
2053
+ var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
2054
+ function findMatchesInContent(content, pattern) {
2055
+ const results = [];
2056
+ const lines = content.split("\n");
2057
+ for (let i = 0; i < lines.length; i++) {
2058
+ const matches = lines[i].matchAll(pattern);
2059
+ for (const match of matches) {
2060
+ results.push({ captured: match[1], line: i + 1 });
2061
+ }
1862
2062
  }
2063
+ return results;
1863
2064
  }
1864
- function reportContradictionWarnings(conceptTitle, concept) {
1865
- const refs = concept.contradictedBy;
1866
- if (!refs || refs.length === 0) return;
1867
- const slugs = refs.map((r) => r.slug).join(", ");
1868
- status(
1869
- "!",
1870
- warn(`Contradiction reported on "${conceptTitle}" \u2014 conflicts with: ${slugs}`)
1871
- );
1872
- }
1873
-
1874
- // src/compiler/page-renderer.ts
1875
- var RELATED_PAGE_CONTEXT_LIMIT = 5;
1876
- async function renderMergedPageContent(root, entry) {
1877
- const pagePath = path15.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
1878
- const existingPage = await safeReadFile(pagePath);
1879
- const relatedPages = await loadRelatedPages(root, entry.slug);
1880
- const system = buildPagePrompt(
1881
- entry.concept.concept,
1882
- entry.combinedContent,
1883
- existingPage,
1884
- relatedPages
2065
+ async function readMarkdownFiles(dirPath) {
2066
+ if (!existsSync6(dirPath)) return [];
2067
+ const entries = await readdir7(dirPath);
2068
+ const mdFiles = entries.filter((f) => f.endsWith(".md"));
2069
+ const results = await Promise.all(
2070
+ mdFiles.map(async (fileName) => {
2071
+ const filePath = path16.join(dirPath, fileName);
2072
+ const content = await readFile9(filePath, "utf-8");
2073
+ return { filePath, content };
2074
+ })
1885
2075
  );
1886
- const pageBody = await callClaude({
1887
- system,
1888
- messages: [
1889
- { role: "user", content: `Write the wiki page for "${entry.concept.concept}".` }
1890
- ]
1891
- });
1892
- const frontmatter = buildMergedFrontmatter(entry, existingPage);
1893
- reportContradictionWarnings(entry.concept.concept, entry.concept);
1894
- return `${frontmatter}
1895
-
1896
- ${pageBody}
1897
- `;
2076
+ return results;
1898
2077
  }
1899
- function buildMergedFrontmatter(entry, existingPage) {
1900
- const now = (/* @__PURE__ */ new Date()).toISOString();
1901
- const existing = existingPage ? parseFrontmatter(existingPage) : null;
1902
- const createdAt = existing?.meta.createdAt && typeof existing.meta.createdAt === "string" ? existing.meta.createdAt : now;
1903
- const frontmatterFields = {
1904
- title: entry.concept.concept,
1905
- summary: entry.concept.summary,
1906
- sources: entry.sourceFiles,
1907
- createdAt,
1908
- updatedAt: now
1909
- };
1910
- addObsidianMeta(frontmatterFields, entry.concept.concept, entry.concept.tags ?? []);
1911
- addProvenanceMeta(frontmatterFields, entry.concept);
1912
- return buildFrontmatter(frontmatterFields);
2078
+ async function collectAllPages(root) {
2079
+ const conceptPages = await readMarkdownFiles(path16.join(root, CONCEPTS_DIR));
2080
+ const queryPages = await readMarkdownFiles(path16.join(root, QUERIES_DIR));
2081
+ return [...conceptPages, ...queryPages];
1913
2082
  }
1914
- async function loadRelatedPages(root, excludeSlug) {
1915
- const conceptsPath = path15.join(root, CONCEPTS_DIR);
1916
- let files;
1917
- try {
1918
- files = await readdir7(conceptsPath);
1919
- } catch {
1920
- return "";
1921
- }
1922
- const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
1923
- const contents = [];
1924
- for (const f of related) {
1925
- const content = await safeReadFile(path15.join(conceptsPath, f));
1926
- if (!content) continue;
1927
- const { meta } = parseFrontmatter(content);
1928
- if (meta.orphaned) continue;
1929
- contents.push(content);
2083
+ function buildPageSlugSet(pages) {
2084
+ const slugs = /* @__PURE__ */ new Set();
2085
+ for (const page of pages) {
2086
+ const baseName = path16.basename(page.filePath, ".md");
2087
+ slugs.add(baseName.toLowerCase());
1930
2088
  }
1931
- return contents.join("\n\n---\n\n");
1932
- }
1933
-
1934
- // src/compiler/index.ts
1935
- import pLimit from "p-limit";
1936
- function emptyCompileResult() {
1937
- return { compiled: 0, skipped: 0, deleted: 0, concepts: [], pages: [], errors: [] };
1938
- }
1939
- async function compile(root, options = {}) {
1940
- await compileAndReport(root, options);
2089
+ return slugs;
1941
2090
  }
1942
- async function compileAndReport(root, options = {}) {
1943
- header("llmwiki compile");
1944
- const locked = await acquireLock(root);
1945
- if (!locked) {
1946
- status("!", error("Could not acquire lock. Try again later."));
1947
- return {
1948
- ...emptyCompileResult(),
1949
- errors: ["Could not acquire .llmwiki/lock \u2014 another compile is in progress."]
1950
- };
1951
- }
1952
- try {
1953
- return await runCompilePipeline(root, options);
1954
- } finally {
1955
- await releaseLock(root);
2091
+ async function checkBrokenWikilinks(root) {
2092
+ const pages = await collectAllPages(root);
2093
+ const existingSlugs = buildPageSlugSet(pages);
2094
+ const results = [];
2095
+ for (const page of pages) {
2096
+ for (const { captured, line } of findMatchesInContent(page.content, WIKILINK_PATTERN2)) {
2097
+ const linkSlug = slugify(captured);
2098
+ if (!existingSlugs.has(linkSlug)) {
2099
+ results.push({
2100
+ rule: "broken-wikilink",
2101
+ severity: "error",
2102
+ file: page.filePath,
2103
+ message: `Broken wikilink [[${captured}]] \u2014 no matching page found`,
2104
+ line
2105
+ });
2106
+ }
2107
+ }
1956
2108
  }
2109
+ return results;
1957
2110
  }
1958
- function bucketChanges(changes) {
1959
- return {
1960
- toCompile: changes.filter((c) => c.status === "new" || c.status === "changed"),
1961
- deleted: changes.filter((c) => c.status === "deleted"),
1962
- unchanged: changes.filter((c) => c.status === "unchanged")
1963
- };
1964
- }
1965
- async function generatePagesPhase(root, extractions, frozenSlugs, options) {
1966
- const merged = mergeExtractions(extractions, frozenSlugs);
1967
- const sourceStates = options.review ? await buildExtractionSourceStates(root, extractions) : {};
1968
- const limit = pLimit(COMPILE_CONCURRENCY);
1969
- const errors = [];
1970
- const candidates = [];
1971
- const pages = await Promise.all(
1972
- merged.map((entry) => limit(async () => {
1973
- const result = await generateMergedPage(root, entry, options, sourceStates);
1974
- if (result.error) errors.push(result.error);
1975
- if (result.candidateId) candidates.push(result.candidateId);
1976
- return entry;
1977
- }))
1978
- );
1979
- return { pages, errors, candidates };
1980
- }
1981
- async function persistExtractionStates(root, extractions) {
1982
- for (const result of extractions) {
1983
- if (result.concepts.length === 0) continue;
1984
- await persistSourceState(root, result.sourcePath, result.sourceFile, result.concepts);
2111
+ async function checkOrphanedPages(root) {
2112
+ const pages = await collectAllPages(root);
2113
+ const results = [];
2114
+ for (const page of pages) {
2115
+ const { meta } = parseFrontmatter(page.content);
2116
+ if (meta.orphaned === true) {
2117
+ results.push({
2118
+ rule: "orphaned-page",
2119
+ severity: "warning",
2120
+ file: page.filePath,
2121
+ message: `Page is marked as orphaned`
2122
+ });
2123
+ }
1985
2124
  }
2125
+ return results;
1986
2126
  }
1987
- function summarizeCompile(buckets, generation, extractions, options) {
1988
- header("Compilation complete");
1989
- status("\u2713", success(
2127
+ async function checkMissingSummaries(root) {
2128
+ const pages = await collectAllPages(root);
2129
+ const results = [];
2130
+ for (const page of pages) {
2131
+ const { meta } = parseFrontmatter(page.content);
2132
+ const summary = meta.summary;
2133
+ const isMissing = !summary || typeof summary === "string" && summary.trim() === "";
2134
+ if (isMissing) {
2135
+ results.push({
2136
+ rule: "missing-summary",
2137
+ severity: "warning",
2138
+ file: page.filePath,
2139
+ message: `Page has no summary in frontmatter`
2140
+ });
2141
+ }
2142
+ }
2143
+ return results;
2144
+ }
2145
+ async function checkDuplicateConcepts(root) {
2146
+ const pages = await collectAllPages(root);
2147
+ const titleMap = /* @__PURE__ */ new Map();
2148
+ for (const page of pages) {
2149
+ const { meta } = parseFrontmatter(page.content);
2150
+ const title = typeof meta.title === "string" ? meta.title : "";
2151
+ if (!title) continue;
2152
+ const normalizedTitle = title.toLowerCase().trim();
2153
+ const existing = titleMap.get(normalizedTitle) ?? [];
2154
+ existing.push(page.filePath);
2155
+ titleMap.set(normalizedTitle, existing);
2156
+ }
2157
+ const results = [];
2158
+ for (const [title, files] of titleMap) {
2159
+ if (files.length <= 1) continue;
2160
+ for (const file of files) {
2161
+ results.push({
2162
+ rule: "duplicate-concept",
2163
+ severity: "error",
2164
+ file,
2165
+ message: `Duplicate title "${title}" \u2014 also in ${files.filter((f) => f !== file).join(", ")}`
2166
+ });
2167
+ }
2168
+ }
2169
+ return results;
2170
+ }
2171
+ async function checkEmptyPages(root) {
2172
+ const pages = await collectAllPages(root);
2173
+ const results = [];
2174
+ for (const page of pages) {
2175
+ const { meta, body } = parseFrontmatter(page.content);
2176
+ const hasTitle = typeof meta.title === "string" && meta.title.trim() !== "";
2177
+ const isBodyEmpty = body.trim().length < MIN_BODY_LENGTH;
2178
+ if (hasTitle && isBodyEmpty) {
2179
+ results.push({
2180
+ rule: "empty-page",
2181
+ severity: "warning",
2182
+ file: page.filePath,
2183
+ message: `Page body is empty or too short (< ${MIN_BODY_LENGTH} chars)`
2184
+ });
2185
+ }
2186
+ }
2187
+ return results;
2188
+ }
2189
+ function stripSpanSuffix(entry) {
2190
+ const colonIdx = entry.indexOf(":");
2191
+ const hashIdx = entry.indexOf("#");
2192
+ const cuts = [colonIdx, hashIdx].filter((i) => i >= 0);
2193
+ if (cuts.length === 0) return entry;
2194
+ return entry.slice(0, Math.min(...cuts));
2195
+ }
2196
+ async function checkLowConfidencePages(root) {
2197
+ const pages = await collectAllPages(root);
2198
+ const results = [];
2199
+ for (const page of pages) {
2200
+ const { meta } = parseFrontmatter(page.content);
2201
+ const { confidence } = parseProvenanceMetadata(meta);
2202
+ if (confidence === void 0 || confidence >= LOW_CONFIDENCE_THRESHOLD) continue;
2203
+ results.push({
2204
+ rule: "low-confidence",
2205
+ severity: "warning",
2206
+ file: page.filePath,
2207
+ message: `Page confidence ${confidence.toFixed(2)} is below ${LOW_CONFIDENCE_THRESHOLD}`
2208
+ });
2209
+ }
2210
+ return results;
2211
+ }
2212
+ async function checkContradictedPages(root) {
2213
+ const pages = await collectAllPages(root);
2214
+ const results = [];
2215
+ for (const page of pages) {
2216
+ const { meta } = parseFrontmatter(page.content);
2217
+ const { contradictedBy } = parseProvenanceMetadata(meta);
2218
+ if (!contradictedBy || contradictedBy.length === 0) continue;
2219
+ const slugs = contradictedBy.map((r) => r.slug).join(", ");
2220
+ results.push({
2221
+ rule: "contradicted-page",
2222
+ severity: "warning",
2223
+ file: page.filePath,
2224
+ message: `Page contradicts: ${slugs}`
2225
+ });
2226
+ }
2227
+ return results;
2228
+ }
2229
+ async function checkInferredWithoutCitations(root) {
2230
+ const pages = await collectAllPages(root);
2231
+ const results = [];
2232
+ for (const page of pages) {
2233
+ const { meta, body } = parseFrontmatter(page.content);
2234
+ const provenance = parseProvenanceMetadata(meta);
2235
+ const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
2236
+ if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
2237
+ results.push({
2238
+ rule: "excess-inferred-paragraphs",
2239
+ severity: "warning",
2240
+ file: page.filePath,
2241
+ message: `Page has ${inferred} inferred paragraphs without citations (max ${MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS})`
2242
+ });
2243
+ }
2244
+ return results;
2245
+ }
2246
+ var PROSE_PARAGRAPH_LEAD = /^[A-Za-z]/;
2247
+ function countUncitedProseParagraphs(body) {
2248
+ const paragraphs = body.split(/\n\s*\n/);
2249
+ let count = 0;
2250
+ for (const block of paragraphs) {
2251
+ const trimmed = block.trim();
2252
+ if (trimmed.length === 0) continue;
2253
+ if (!PROSE_PARAGRAPH_LEAD.test(trimmed)) continue;
2254
+ if (CITATION_PATTERN.test(trimmed)) {
2255
+ CITATION_PATTERN.lastIndex = 0;
2256
+ continue;
2257
+ }
2258
+ CITATION_PATTERN.lastIndex = 0;
2259
+ count += 1;
2260
+ }
2261
+ return count;
2262
+ }
2263
+ var COLON_SPAN_PATTERN = /^[^:#]+:(\d+)(?:-(\d+))?$/;
2264
+ var HASH_SPAN_PATTERN = /^[^:#]+#L(\d+)(?:-L(\d+))?$/;
2265
+ async function checkSchemaCrossLinks(root, schema) {
2266
+ const pages = await collectAllPages(root);
2267
+ const results = [];
2268
+ for (const page of pages) {
2269
+ const { meta, body } = parseFrontmatter(page.content);
2270
+ const kind = resolvePageKind(meta.kind, schema);
2271
+ const rule = schema.kinds[kind];
2272
+ if (rule.minWikilinks <= 0) continue;
2273
+ const linkCount = countWikilinks(body);
2274
+ if (linkCount >= rule.minWikilinks) continue;
2275
+ results.push({
2276
+ rule: "schema-cross-link-minimum",
2277
+ severity: "warning",
2278
+ file: page.filePath,
2279
+ message: `Page kind "${kind}" requires at least ${rule.minWikilinks} [[wikilinks]] but only ${linkCount} found.`
2280
+ });
2281
+ }
2282
+ return results;
2283
+ }
2284
+ function checkPageCrossLinks(content, filePath, schema) {
2285
+ const { meta, body } = parseFrontmatter(content);
2286
+ const kind = resolvePageKind(meta.kind, schema);
2287
+ const rule = schema.kinds[kind];
2288
+ if (rule.minWikilinks <= 0) return [];
2289
+ const linkCount = countWikilinks(body);
2290
+ if (linkCount >= rule.minWikilinks) return [];
2291
+ return [
2292
+ {
2293
+ rule: "schema-cross-link-minimum",
2294
+ severity: "warning",
2295
+ file: filePath,
2296
+ message: `Page kind "${kind}" requires at least ${rule.minWikilinks} [[wikilinks]] but only ${linkCount} found.`
2297
+ }
2298
+ ];
2299
+ }
2300
+ function parseLineRange(entry) {
2301
+ const colonMatch = COLON_SPAN_PATTERN.exec(entry);
2302
+ if (colonMatch) {
2303
+ const start = Number(colonMatch[1]);
2304
+ const end = colonMatch[2] !== void 0 ? Number(colonMatch[2]) : start;
2305
+ return { start, end };
2306
+ }
2307
+ const hashMatch = HASH_SPAN_PATTERN.exec(entry);
2308
+ if (hashMatch) {
2309
+ const start = Number(hashMatch[1]);
2310
+ const end = hashMatch[2] !== void 0 ? Number(hashMatch[2]) : start;
2311
+ return { start, end };
2312
+ }
2313
+ return null;
2314
+ }
2315
+ function countLines(content) {
2316
+ if (content.length === 0) return 0;
2317
+ return content.split("\n").length;
2318
+ }
2319
+ async function checkBrokenCitations(root) {
2320
+ const pages = await collectAllPages(root);
2321
+ const sourcesDir = path16.join(root, SOURCES_DIR);
2322
+ const results = [];
2323
+ const lineCountCache = /* @__PURE__ */ new Map();
2324
+ for (const page of pages) {
2325
+ for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
2326
+ await collectBrokenForMarker(captured, line, page.filePath, sourcesDir, lineCountCache, results);
2327
+ }
2328
+ }
2329
+ return results;
2330
+ }
2331
+ async function collectBrokenForMarker(captured, line, pageFile, sourcesDir, lineCountCache, out) {
2332
+ for (const part of captured.split(",")) {
2333
+ const trimmed = part.trim();
2334
+ if (trimmed.length === 0) continue;
2335
+ const filename = stripSpanSuffix(trimmed);
2336
+ const citedPath = path16.join(sourcesDir, filename);
2337
+ if (!existsSync6(citedPath)) {
2338
+ out.push({
2339
+ rule: "broken-citation",
2340
+ severity: "error",
2341
+ file: pageFile,
2342
+ message: `Broken citation ^[${filename}] \u2014 source file not found`,
2343
+ line
2344
+ });
2345
+ continue;
2346
+ }
2347
+ const range = parseLineRange(trimmed);
2348
+ if (range === null) continue;
2349
+ const lineCount = await resolveLineCount(citedPath, filename, lineCountCache);
2350
+ if (range.end <= lineCount) continue;
2351
+ out.push({
2352
+ rule: "broken-citation",
2353
+ severity: "error",
2354
+ file: pageFile,
2355
+ message: `Claim-level span ^[${trimmed}] is out of bounds (source has only ${lineCount} lines)`,
2356
+ line
2357
+ });
2358
+ }
2359
+ }
2360
+ async function resolveLineCount(citedPath, filename, cache) {
2361
+ const cached = cache.get(filename);
2362
+ if (cached !== void 0) return cached;
2363
+ const content = await safeReadFile(citedPath);
2364
+ const lineCount = countLines(content);
2365
+ cache.set(filename, lineCount);
2366
+ return lineCount;
2367
+ }
2368
+ async function checkMalformedClaimCitations(root) {
2369
+ const pages = await collectAllPages(root);
2370
+ const results = [];
2371
+ for (const page of pages) {
2372
+ for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
2373
+ for (const part of captured.split(",")) {
2374
+ if (!isMalformedCitationEntry(part)) continue;
2375
+ results.push({
2376
+ rule: "malformed-claim-citation",
2377
+ severity: "error",
2378
+ file: page.filePath,
2379
+ message: `Malformed claim citation ^[${captured}] \u2014 expected file.md, file.md:N-N, or file.md#LN-LN`,
2380
+ line
2381
+ });
2382
+ }
2383
+ }
2384
+ }
2385
+ return results;
2386
+ }
2387
+
2388
+ // src/compiler/page-renderer.ts
2389
+ import { readdir as readdir8 } from "fs/promises";
2390
+ import path17 from "path";
2391
+
2392
+ // src/compiler/provenance.ts
2393
+ function addProvenanceMeta(fields, concept) {
2394
+ if (typeof concept.confidence === "number") {
2395
+ fields.confidence = concept.confidence;
2396
+ }
2397
+ if (concept.provenanceState) {
2398
+ fields.provenanceState = concept.provenanceState;
2399
+ }
2400
+ if (concept.contradictedBy && concept.contradictedBy.length > 0) {
2401
+ fields.contradictedBy = concept.contradictedBy;
2402
+ }
2403
+ if (typeof concept.inferredParagraphs === "number") {
2404
+ fields.inferredParagraphs = concept.inferredParagraphs;
2405
+ }
2406
+ }
2407
+ function reportContradictionWarnings(conceptTitle, concept) {
2408
+ const refs = concept.contradictedBy;
2409
+ if (!refs || refs.length === 0) return;
2410
+ const slugs = refs.map((r) => r.slug).join(", ");
2411
+ status(
2412
+ "!",
2413
+ warn(`Contradiction reported on "${conceptTitle}" \u2014 conflicts with: ${slugs}`)
2414
+ );
2415
+ }
2416
+
2417
+ // src/compiler/page-renderer.ts
2418
+ var RELATED_PAGE_CONTEXT_LIMIT = 5;
2419
+ async function renderMergedPageContent(root, entry, schema) {
2420
+ const pagePath = path17.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
2421
+ const existingPage = await safeReadFile(pagePath);
2422
+ const relatedPages = await loadRelatedPages(root, entry.slug);
2423
+ const system = buildPagePrompt(
2424
+ entry.concept.concept,
2425
+ entry.combinedContent,
2426
+ existingPage,
2427
+ relatedPages
2428
+ );
2429
+ const pageBody = await callClaude({
2430
+ system,
2431
+ messages: [
2432
+ { role: "user", content: `Write the wiki page for "${entry.concept.concept}".` }
2433
+ ]
2434
+ });
2435
+ const frontmatter = buildMergedFrontmatter(entry, existingPage, schema);
2436
+ reportContradictionWarnings(entry.concept.concept, entry.concept);
2437
+ return `${frontmatter}
2438
+
2439
+ ${pageBody}
2440
+ `;
2441
+ }
2442
+ function buildMergedFrontmatter(entry, existingPage, schema) {
2443
+ const now = (/* @__PURE__ */ new Date()).toISOString();
2444
+ const existing = existingPage ? parseFrontmatter(existingPage) : null;
2445
+ const createdAt = existing?.meta.createdAt && typeof existing.meta.createdAt === "string" ? existing.meta.createdAt : now;
2446
+ const frontmatterFields = {
2447
+ title: entry.concept.concept,
2448
+ summary: entry.concept.summary,
2449
+ sources: entry.sourceFiles,
2450
+ kind: schema.defaultKind,
2451
+ createdAt,
2452
+ updatedAt: now
2453
+ };
2454
+ addObsidianMeta(frontmatterFields, entry.concept.concept, entry.concept.tags ?? []);
2455
+ addProvenanceMeta(frontmatterFields, entry.concept);
2456
+ return buildFrontmatter(frontmatterFields);
2457
+ }
2458
+ async function loadRelatedPages(root, excludeSlug) {
2459
+ const conceptsPath = path17.join(root, CONCEPTS_DIR);
2460
+ let files;
2461
+ try {
2462
+ files = await readdir8(conceptsPath);
2463
+ } catch {
2464
+ return "";
2465
+ }
2466
+ const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
2467
+ const contents = [];
2468
+ for (const f of related) {
2469
+ const content = await safeReadFile(path17.join(conceptsPath, f));
2470
+ if (!content) continue;
2471
+ const { meta } = parseFrontmatter(content);
2472
+ if (meta.orphaned) continue;
2473
+ contents.push(content);
2474
+ }
2475
+ return contents.join("\n\n---\n\n");
2476
+ }
2477
+
2478
+ // src/compiler/index.ts
2479
+ import pLimit from "p-limit";
2480
+ function emptyCompileResult() {
2481
+ return { compiled: 0, skipped: 0, deleted: 0, concepts: [], pages: [], errors: [] };
2482
+ }
2483
+ async function compile(root, options = {}) {
2484
+ await compileAndReport(root, options);
2485
+ }
2486
+ async function compileAndReport(root, options = {}) {
2487
+ header("llmwiki compile");
2488
+ const locked = await acquireLock(root);
2489
+ if (!locked) {
2490
+ status("!", error("Could not acquire lock. Try again later."));
2491
+ return {
2492
+ ...emptyCompileResult(),
2493
+ errors: ["Could not acquire .llmwiki/lock \u2014 another compile is in progress."]
2494
+ };
2495
+ }
2496
+ try {
2497
+ return await runCompilePipeline(root, options);
2498
+ } finally {
2499
+ await releaseLock(root);
2500
+ }
2501
+ }
2502
+ function bucketChanges(changes) {
2503
+ return {
2504
+ toCompile: changes.filter((c) => c.status === "new" || c.status === "changed"),
2505
+ deleted: changes.filter((c) => c.status === "deleted"),
2506
+ unchanged: changes.filter((c) => c.status === "unchanged")
2507
+ };
2508
+ }
2509
+ async function generatePagesPhase(root, extractions, frozenSlugs, schema, options) {
2510
+ const merged = mergeExtractions(extractions, frozenSlugs);
2511
+ const sourceStates = options.review ? await buildExtractionSourceStates(root, extractions) : {};
2512
+ const limit = pLimit(COMPILE_CONCURRENCY);
2513
+ const errors = [];
2514
+ const candidates = [];
2515
+ const pages = await Promise.all(
2516
+ merged.map((entry) => limit(async () => {
2517
+ const result = await generateMergedPage(root, entry, schema, options, sourceStates);
2518
+ if (result.error) errors.push(result.error);
2519
+ if (result.candidateId) candidates.push(result.candidateId);
2520
+ return entry;
2521
+ }))
2522
+ );
2523
+ return { pages, errors, candidates };
2524
+ }
2525
+ async function persistExtractionStates(root, extractions) {
2526
+ for (const result of extractions) {
2527
+ if (result.concepts.length === 0) continue;
2528
+ await persistSourceState(root, result.sourcePath, result.sourceFile, result.concepts);
2529
+ }
2530
+ }
2531
+ function summarizeCompile(buckets, generation, extractions, options) {
2532
+ header("Compilation complete");
2533
+ status("\u2713", success(
1990
2534
  `${buckets.toCompile.length} compiled, ${buckets.unchanged.length} skipped, ${buckets.deleted.length} deleted`
1991
2535
  ));
1992
2536
  if (options.review && generation.candidates.length > 0) {
@@ -2016,12 +2560,24 @@ function summarizeCompile(buckets, generation, extractions, options) {
2016
2560
  return baseResult;
2017
2561
  }
2018
2562
  async function runCompilePipeline(root, options) {
2563
+ const schema = await loadSchema(root);
2564
+ reportSchemaStatus(schema);
2019
2565
  const state = await readState(root);
2020
2566
  const changes = await detectChanges(root, state);
2021
2567
  augmentWithAffectedSources(changes, findAffectedSources(state, changes));
2022
2568
  const buckets = bucketChanges(changes);
2023
2569
  if (buckets.toCompile.length === 0 && buckets.deleted.length === 0) {
2024
2570
  status("\u2713", success("Nothing to compile \u2014 all sources up to date."));
2571
+ if (!options.review) {
2572
+ const emptyGeneration = { pages: [], errors: [], candidates: [] };
2573
+ await generateSeedPages(root, schema, emptyGeneration);
2574
+ await finalizeWiki(root, emptyGeneration.pages);
2575
+ return {
2576
+ ...emptyCompileResult(),
2577
+ skipped: buckets.unchanged.length,
2578
+ errors: emptyGeneration.errors
2579
+ };
2580
+ }
2025
2581
  return { ...emptyCompileResult(), skipped: buckets.unchanged.length };
2026
2582
  }
2027
2583
  printChangesSummary(changes);
@@ -2034,17 +2590,23 @@ async function runCompilePipeline(root, options) {
2034
2590
  if (!options.review) {
2035
2591
  await freezeFailedExtractions(root, extractions, frozenSlugs);
2036
2592
  }
2037
- const generation = await generatePagesPhase(root, extractions, frozenSlugs, options);
2593
+ const generation = await generatePagesPhase(root, extractions, frozenSlugs, schema, options);
2038
2594
  if (!options.review) {
2039
2595
  await persistExtractionStates(root, extractions);
2040
2596
  if (frozenSlugs.size > 0) {
2041
2597
  await orphanUnownedFrozenPages(root, frozenSlugs);
2042
2598
  }
2043
2599
  await persistFrozenSlugs(root, frozenSlugs, extractions);
2600
+ await generateSeedPages(root, schema, generation);
2044
2601
  await finalizeWiki(root, generation.pages);
2045
2602
  }
2046
2603
  return summarizeCompile(buckets, generation, extractions, options);
2047
2604
  }
2605
+ function reportSchemaStatus(schema) {
2606
+ if (schema.loadedFrom) {
2607
+ status("i", dim(`Schema: ${schema.loadedFrom}`));
2608
+ }
2609
+ }
2048
2610
  function augmentWithAffectedSources(changes, affected) {
2049
2611
  for (const file of affected) {
2050
2612
  status("~", info(`${file} [affected by shared concept]`));
@@ -2105,9 +2667,9 @@ function printChangesSummary(changes) {
2105
2667
  }
2106
2668
  async function extractForSource(root, sourceFile) {
2107
2669
  status("*", info(`Extracting: ${sourceFile}`));
2108
- const sourcePath = path16.join(root, SOURCES_DIR, sourceFile);
2109
- const sourceContent = await readFile8(sourcePath, "utf-8");
2110
- const existingIndex = await safeReadFile(path16.join(root, INDEX_FILE));
2670
+ const sourcePath = path18.join(root, SOURCES_DIR, sourceFile);
2671
+ const sourceContent = await readFile10(sourcePath, "utf-8");
2672
+ const existingIndex = await safeReadFile(path18.join(root, INDEX_FILE));
2111
2673
  const concepts = await extractConcepts(sourceContent, existingIndex);
2112
2674
  if (concepts.length > 0) {
2113
2675
  const names = concepts.map((c) => c.concept).join(", ");
@@ -2165,27 +2727,77 @@ ${result.sourceContent}`
2165
2727
  }
2166
2728
  return Array.from(bySlug.values());
2167
2729
  }
2168
- async function generateMergedPage(root, entry, options, sourceStates) {
2169
- const fullPage = await renderMergedPageContent(root, entry);
2730
+ async function generateMergedPage(root, entry, schema, options, sourceStates) {
2731
+ const fullPage = await renderMergedPageContent(root, entry, schema);
2170
2732
  if (options.review) {
2171
- return await persistReviewCandidate(root, entry, fullPage, sourceStates);
2733
+ return await persistReviewCandidate(root, entry, fullPage, sourceStates, schema);
2172
2734
  }
2173
- const pagePath = path16.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
2735
+ const pagePath = path18.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
2174
2736
  const error2 = await writePageIfValid(pagePath, fullPage, entry.concept.concept);
2175
2737
  return { error: error2 ?? void 0 };
2176
2738
  }
2177
- async function persistReviewCandidate(root, entry, fullPage, sourceStates) {
2739
+ async function persistReviewCandidate(root, entry, fullPage, sourceStates, schema) {
2740
+ const virtualPath = `wiki/concepts/${entry.slug}.md`;
2741
+ const violations = checkPageCrossLinks(fullPage, virtualPath, schema);
2178
2742
  const candidate = await writeCandidate(root, {
2179
2743
  title: entry.concept.concept,
2180
2744
  slug: entry.slug,
2181
2745
  summary: entry.concept.summary,
2182
2746
  sources: entry.sourceFiles,
2183
2747
  body: fullPage,
2184
- sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles)
2748
+ sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles),
2749
+ schemaViolations: violations.length > 0 ? violations : void 0
2185
2750
  });
2186
2751
  status("?", info(`Candidate ready: ${candidate.id} (${entry.slug})`));
2187
2752
  return { candidateId: candidate.id };
2188
2753
  }
2754
+ async function generateSeedPages(root, schema, generation) {
2755
+ if (schema.seedPages.length === 0) return;
2756
+ for (const seed of schema.seedPages) {
2757
+ const error2 = await generateSingleSeedPage(root, schema, seed);
2758
+ if (error2) generation.errors.push(error2);
2759
+ }
2760
+ }
2761
+ async function generateSingleSeedPage(root, schema, seed) {
2762
+ const slug = slugify(seed.title);
2763
+ const pagePath = path18.join(root, CONCEPTS_DIR, `${slug}.md`);
2764
+ const relatedContent = await loadSeedRelatedPages(root, seed.relatedSlugs ?? []);
2765
+ const rule = schema.kinds[seed.kind];
2766
+ const system = buildSeedPagePrompt(seed, rule, relatedContent);
2767
+ const pageBody = await callClaude({
2768
+ system,
2769
+ messages: [{ role: "user", content: `Write the ${seed.kind} page titled "${seed.title}".` }]
2770
+ });
2771
+ const now = (/* @__PURE__ */ new Date()).toISOString();
2772
+ const existing = await safeReadFile(pagePath);
2773
+ const existingMeta = existing ? parseFrontmatter(existing).meta : null;
2774
+ const createdAt = typeof existingMeta?.createdAt === "string" ? existingMeta.createdAt : now;
2775
+ const typedFields = {
2776
+ title: seed.title,
2777
+ summary: seed.summary,
2778
+ sources: [],
2779
+ kind: seed.kind,
2780
+ createdAt,
2781
+ updatedAt: now
2782
+ };
2783
+ const frontmatterFields = { ...typedFields };
2784
+ addObsidianMeta(frontmatterFields, seed.title, []);
2785
+ const frontmatter = buildFrontmatter(frontmatterFields);
2786
+ return await writePageIfValid(pagePath, `${frontmatter}
2787
+
2788
+ ${pageBody}
2789
+ `, seed.title);
2790
+ }
2791
+ async function loadSeedRelatedPages(root, slugs) {
2792
+ if (slugs.length === 0) return "";
2793
+ const contents = [];
2794
+ for (const slug of slugs) {
2795
+ const pagePath = path18.join(root, CONCEPTS_DIR, `${slug}.md`);
2796
+ const content = await safeReadFile(pagePath);
2797
+ if (content) contents.push(content);
2798
+ }
2799
+ return contents.join("\n\n---\n\n");
2800
+ }
2189
2801
  async function extractConcepts(sourceContent, existingIndex) {
2190
2802
  const system = buildExtractionPrompt(sourceContent, existingIndex);
2191
2803
  const rawOutput = await callClaude({
@@ -2223,7 +2835,7 @@ async function persistSourceState(root, sourcePath, sourceFile, concepts) {
2223
2835
 
2224
2836
  // src/commands/compile.ts
2225
2837
  async function compileCommand(options = {}) {
2226
- if (!existsSync5(SOURCES_DIR)) {
2838
+ if (!existsSync7(SOURCES_DIR)) {
2227
2839
  status(
2228
2840
  "!",
2229
2841
  warn("No sources found. Run `llmwiki ingest <url>` first.")
@@ -2234,8 +2846,8 @@ async function compileCommand(options = {}) {
2234
2846
  }
2235
2847
 
2236
2848
  // src/commands/query.ts
2237
- import { existsSync as existsSync6 } from "fs";
2238
- import path17 from "path";
2849
+ import { existsSync as existsSync8 } from "fs";
2850
+ import path19 from "path";
2239
2851
  var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
2240
2852
  var PAGE_SELECTION_TOOL = {
2241
2853
  name: "select_pages",
@@ -2290,7 +2902,7 @@ async function selectRelevantPages(root, question) {
2290
2902
  const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
2291
2903
  return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2 };
2292
2904
  }
2293
- const indexContent = await safeReadFile(path17.join(root, INDEX_FILE));
2905
+ const indexContent = await safeReadFile(path19.join(root, INDEX_FILE));
2294
2906
  const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
2295
2907
  return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning };
2296
2908
  }
@@ -2308,7 +2920,7 @@ async function loadSelectedPages(root, slugs) {
2308
2920
  for (const slug of slugs) {
2309
2921
  let content = "";
2310
2922
  for (const dir of PAGE_DIRS) {
2311
- const candidate = await safeReadFile(path17.join(root, dir, `${slug}.md`));
2923
+ const candidate = await safeReadFile(path19.join(root, dir, `${slug}.md`));
2312
2924
  if (!candidate) continue;
2313
2925
  const { meta } = parseFrontmatter(candidate);
2314
2926
  if (meta.orphaned) continue;
@@ -2344,7 +2956,7 @@ function summarizeAnswer(answer) {
2344
2956
  }
2345
2957
  async function saveQueryPage(root, question, answer) {
2346
2958
  const slug = slugify(question);
2347
- const filePath = path17.join(root, QUERIES_DIR, `${slug}.md`);
2959
+ const filePath = path19.join(root, QUERIES_DIR, `${slug}.md`);
2348
2960
  const frontmatter = buildFrontmatter({
2349
2961
  title: question,
2350
2962
  summary: summarizeAnswer(answer),
@@ -2370,7 +2982,7 @@ ${answer}
2370
2982
  return slug;
2371
2983
  }
2372
2984
  async function generateAnswer(root, question, options = {}) {
2373
- if (!existsSync6(path17.join(root, INDEX_FILE))) {
2985
+ if (!existsSync8(path19.join(root, INDEX_FILE))) {
2374
2986
  throw new Error("Wiki index not found. Run `llmwiki compile` first.");
2375
2987
  }
2376
2988
  const { pages, reasoning } = await selectRelevantPages(root, question);
@@ -2387,7 +2999,7 @@ async function generateAnswer(root, question, options = {}) {
2387
2999
  return { answer, selectedPages: pages, reasoning, saved };
2388
3000
  }
2389
3001
  async function queryCommand(root, question, options) {
2390
- if (!existsSync6(path17.join(root, INDEX_FILE))) {
3002
+ if (!existsSync8(path19.join(root, INDEX_FILE))) {
2391
3003
  status("!", error("Wiki index not found. Run `llmwiki compile` first."));
2392
3004
  return;
2393
3005
  }
@@ -2415,12 +3027,12 @@ async function queryCommand(root, question, options) {
2415
3027
 
2416
3028
  // src/commands/watch.ts
2417
3029
  import { watch as chokidarWatch } from "chokidar";
2418
- import { existsSync as existsSync7 } from "fs";
2419
- import path18 from "path";
3030
+ import { existsSync as existsSync9 } from "fs";
3031
+ import path20 from "path";
2420
3032
  var DEBOUNCE_MS = 500;
2421
3033
  async function watchCommand() {
2422
- const sourcesPath = path18.resolve(SOURCES_DIR);
2423
- if (!existsSync7(sourcesPath)) {
3034
+ const sourcesPath = path20.resolve(SOURCES_DIR);
3035
+ if (!existsSync9(sourcesPath)) {
2424
3036
  status(
2425
3037
  "!",
2426
3038
  warn("No sources/ directory found. Run `llmwiki ingest <url>` first.")
@@ -2454,7 +3066,7 @@ async function watchCommand() {
2454
3066
  const scheduleCompile = (eventPath, event) => {
2455
3067
  status(
2456
3068
  "~",
2457
- dim(`${event}: ${path18.basename(eventPath)}`)
3069
+ dim(`${event}: ${path20.basename(eventPath)}`)
2458
3070
  );
2459
3071
  if (debounceTimer) clearTimeout(debounceTimer);
2460
3072
  debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
@@ -2468,261 +3080,30 @@ async function watchCommand() {
2468
3080
  });
2469
3081
  }
2470
3082
 
2471
- // src/linter/rules.ts
2472
- import { readdir as readdir8, readFile as readFile9 } from "fs/promises";
2473
- import { existsSync as existsSync8 } from "fs";
2474
- import path19 from "path";
2475
- var MIN_BODY_LENGTH = 50;
2476
- var WIKILINK_PATTERN = /\[\[([^\]]+)\]\]/g;
2477
- var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
2478
- function findMatchesInContent(content, pattern) {
2479
- const results = [];
2480
- const lines = content.split("\n");
2481
- for (let i = 0; i < lines.length; i++) {
2482
- const matches = lines[i].matchAll(pattern);
2483
- for (const match of matches) {
2484
- results.push({ captured: match[1], line: i + 1 });
2485
- }
2486
- }
2487
- return results;
2488
- }
2489
- async function readMarkdownFiles(dirPath) {
2490
- if (!existsSync8(dirPath)) return [];
2491
- const entries = await readdir8(dirPath);
2492
- const mdFiles = entries.filter((f) => f.endsWith(".md"));
2493
- const results = await Promise.all(
2494
- mdFiles.map(async (fileName) => {
2495
- const filePath = path19.join(dirPath, fileName);
2496
- const content = await readFile9(filePath, "utf-8");
2497
- return { filePath, content };
2498
- })
2499
- );
2500
- return results;
2501
- }
2502
- async function collectAllPages(root) {
2503
- const conceptPages = await readMarkdownFiles(path19.join(root, CONCEPTS_DIR));
2504
- const queryPages = await readMarkdownFiles(path19.join(root, QUERIES_DIR));
2505
- return [...conceptPages, ...queryPages];
2506
- }
2507
- function buildPageSlugSet(pages) {
2508
- const slugs = /* @__PURE__ */ new Set();
2509
- for (const page of pages) {
2510
- const baseName = path19.basename(page.filePath, ".md");
2511
- slugs.add(baseName.toLowerCase());
2512
- }
2513
- return slugs;
2514
- }
2515
- async function checkBrokenWikilinks(root) {
2516
- const pages = await collectAllPages(root);
2517
- const existingSlugs = buildPageSlugSet(pages);
2518
- const results = [];
2519
- for (const page of pages) {
2520
- for (const { captured, line } of findMatchesInContent(page.content, WIKILINK_PATTERN)) {
2521
- const linkSlug = slugify(captured);
2522
- if (!existingSlugs.has(linkSlug)) {
2523
- results.push({
2524
- rule: "broken-wikilink",
2525
- severity: "error",
2526
- file: page.filePath,
2527
- message: `Broken wikilink [[${captured}]] \u2014 no matching page found`,
2528
- line
2529
- });
2530
- }
2531
- }
2532
- }
2533
- return results;
2534
- }
2535
- async function checkOrphanedPages(root) {
2536
- const pages = await collectAllPages(root);
2537
- const results = [];
2538
- for (const page of pages) {
2539
- const { meta } = parseFrontmatter(page.content);
2540
- if (meta.orphaned === true) {
2541
- results.push({
2542
- rule: "orphaned-page",
2543
- severity: "warning",
2544
- file: page.filePath,
2545
- message: `Page is marked as orphaned`
2546
- });
2547
- }
2548
- }
2549
- return results;
2550
- }
2551
- async function checkMissingSummaries(root) {
2552
- const pages = await collectAllPages(root);
2553
- const results = [];
2554
- for (const page of pages) {
2555
- const { meta } = parseFrontmatter(page.content);
2556
- const summary = meta.summary;
2557
- const isMissing = !summary || typeof summary === "string" && summary.trim() === "";
2558
- if (isMissing) {
2559
- results.push({
2560
- rule: "missing-summary",
2561
- severity: "warning",
2562
- file: page.filePath,
2563
- message: `Page has no summary in frontmatter`
2564
- });
2565
- }
2566
- }
2567
- return results;
2568
- }
2569
- async function checkDuplicateConcepts(root) {
2570
- const pages = await collectAllPages(root);
2571
- const titleMap = /* @__PURE__ */ new Map();
2572
- for (const page of pages) {
2573
- const { meta } = parseFrontmatter(page.content);
2574
- const title = typeof meta.title === "string" ? meta.title : "";
2575
- if (!title) continue;
2576
- const normalizedTitle = title.toLowerCase().trim();
2577
- const existing = titleMap.get(normalizedTitle) ?? [];
2578
- existing.push(page.filePath);
2579
- titleMap.set(normalizedTitle, existing);
2580
- }
2581
- const results = [];
2582
- for (const [title, files] of titleMap) {
2583
- if (files.length <= 1) continue;
2584
- for (const file of files) {
2585
- results.push({
2586
- rule: "duplicate-concept",
2587
- severity: "error",
2588
- file,
2589
- message: `Duplicate title "${title}" \u2014 also in ${files.filter((f) => f !== file).join(", ")}`
2590
- });
2591
- }
2592
- }
2593
- return results;
2594
- }
2595
- async function checkEmptyPages(root) {
2596
- const pages = await collectAllPages(root);
2597
- const results = [];
2598
- for (const page of pages) {
2599
- const { meta, body } = parseFrontmatter(page.content);
2600
- const hasTitle = typeof meta.title === "string" && meta.title.trim() !== "";
2601
- const isBodyEmpty = body.trim().length < MIN_BODY_LENGTH;
2602
- if (hasTitle && isBodyEmpty) {
2603
- results.push({
2604
- rule: "empty-page",
2605
- severity: "warning",
2606
- file: page.filePath,
2607
- message: `Page body is empty or too short (< ${MIN_BODY_LENGTH} chars)`
2608
- });
2609
- }
2610
- }
2611
- return results;
2612
- }
2613
- async function checkLowConfidencePages(root) {
2614
- const pages = await collectAllPages(root);
2615
- const results = [];
2616
- for (const page of pages) {
2617
- const { meta } = parseFrontmatter(page.content);
2618
- const { confidence } = parseProvenanceMetadata(meta);
2619
- if (confidence === void 0 || confidence >= LOW_CONFIDENCE_THRESHOLD) continue;
2620
- results.push({
2621
- rule: "low-confidence",
2622
- severity: "warning",
2623
- file: page.filePath,
2624
- message: `Page confidence ${confidence.toFixed(2)} is below ${LOW_CONFIDENCE_THRESHOLD}`
2625
- });
2626
- }
2627
- return results;
2628
- }
2629
- async function checkContradictedPages(root) {
2630
- const pages = await collectAllPages(root);
2631
- const results = [];
2632
- for (const page of pages) {
2633
- const { meta } = parseFrontmatter(page.content);
2634
- const { contradictedBy } = parseProvenanceMetadata(meta);
2635
- if (!contradictedBy || contradictedBy.length === 0) continue;
2636
- const slugs = contradictedBy.map((r) => r.slug).join(", ");
2637
- results.push({
2638
- rule: "contradicted-page",
2639
- severity: "warning",
2640
- file: page.filePath,
2641
- message: `Page contradicts: ${slugs}`
2642
- });
2643
- }
2644
- return results;
2645
- }
2646
- async function checkInferredWithoutCitations(root) {
2647
- const pages = await collectAllPages(root);
2648
- const results = [];
2649
- for (const page of pages) {
2650
- const { meta, body } = parseFrontmatter(page.content);
2651
- const provenance = parseProvenanceMetadata(meta);
2652
- const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
2653
- if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
2654
- results.push({
2655
- rule: "excess-inferred-paragraphs",
2656
- severity: "warning",
2657
- file: page.filePath,
2658
- message: `Page has ${inferred} inferred paragraphs without citations (max ${MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS})`
2659
- });
2660
- }
2661
- return results;
2662
- }
2663
- var PROSE_PARAGRAPH_LEAD = /^[A-Za-z]/;
2664
- function countUncitedProseParagraphs(body) {
2665
- const paragraphs = body.split(/\n\s*\n/);
2666
- let count = 0;
2667
- for (const block of paragraphs) {
2668
- const trimmed = block.trim();
2669
- if (trimmed.length === 0) continue;
2670
- if (!PROSE_PARAGRAPH_LEAD.test(trimmed)) continue;
2671
- if (CITATION_PATTERN.test(trimmed)) {
2672
- CITATION_PATTERN.lastIndex = 0;
2673
- continue;
2674
- }
2675
- CITATION_PATTERN.lastIndex = 0;
2676
- count += 1;
2677
- }
2678
- return count;
2679
- }
2680
- function splitCitationFilenames(captured) {
2681
- return captured.split(",").map((s) => s.trim()).filter((s) => s.length > 0);
2682
- }
2683
- async function checkBrokenCitations(root) {
2684
- const pages = await collectAllPages(root);
2685
- const sourcesDir = path19.join(root, SOURCES_DIR);
2686
- const results = [];
2687
- for (const page of pages) {
2688
- for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
2689
- for (const filename of splitCitationFilenames(captured)) {
2690
- const citedPath = path19.join(sourcesDir, filename);
2691
- if (!existsSync8(citedPath)) {
2692
- results.push({
2693
- rule: "broken-citation",
2694
- severity: "error",
2695
- file: page.filePath,
2696
- message: `Broken citation ^[${filename}] \u2014 source file not found`,
2697
- line
2698
- });
2699
- }
2700
- }
2701
- }
2702
- }
2703
- return results;
2704
- }
2705
-
2706
3083
  // src/linter/index.ts
2707
- var ALL_RULES = [
3084
+ var RULES_WITHOUT_SCHEMA = [
2708
3085
  checkBrokenWikilinks,
2709
3086
  checkOrphanedPages,
2710
3087
  checkMissingSummaries,
2711
3088
  checkDuplicateConcepts,
2712
3089
  checkEmptyPages,
2713
3090
  checkBrokenCitations,
3091
+ checkMalformedClaimCitations,
2714
3092
  checkLowConfidencePages,
2715
3093
  checkContradictedPages,
2716
3094
  checkInferredWithoutCitations
2717
3095
  ];
3096
+ var RULES_WITH_SCHEMA = [checkSchemaCrossLinks];
2718
3097
  function countBySeverity(results, severity) {
2719
3098
  return results.filter((r) => r.severity === severity).length;
2720
3099
  }
2721
3100
  async function lint(root) {
2722
- const ruleResults = await Promise.all(
2723
- ALL_RULES.map((rule) => rule(root))
2724
- );
2725
- const results = ruleResults.flat();
3101
+ const schema = await loadSchema(root);
3102
+ const [plainResults, schemaResults] = await Promise.all([
3103
+ Promise.all(RULES_WITHOUT_SCHEMA.map((rule) => rule(root))),
3104
+ Promise.all(RULES_WITH_SCHEMA.map((rule) => rule(root, schema)))
3105
+ ]);
3106
+ const results = [...plainResults.flat(), ...schemaResults.flat()];
2726
3107
  return {
2727
3108
  errors: countBySeverity(results, "error"),
2728
3109
  warnings: countBySeverity(results, "warning"),
@@ -2750,6 +3131,9 @@ function printResult(result) {
2750
3131
  }
2751
3132
  async function lintCommand() {
2752
3133
  header("Linting wiki");
3134
+ const schema = await loadSchema(process.cwd());
3135
+ const schemaSource = schema.loadedFrom ?? "defaults (no schema file)";
3136
+ status("i", dim(`Schema: ${schemaSource}`));
2753
3137
  const summary = await lint(process.cwd());
2754
3138
  for (const result of summary.results) {
2755
3139
  printResult(result);
@@ -2766,6 +3150,36 @@ async function lintCommand() {
2766
3150
  }
2767
3151
  }
2768
3152
 
3153
+ // src/commands/schema.ts
3154
+ import { existsSync as existsSync10 } from "fs";
3155
+ import { mkdir as mkdir6, writeFile as writeFile5 } from "fs/promises";
3156
+ import path21 from "path";
3157
+ async function schemaInitCommand() {
3158
+ const root = process.cwd();
3159
+ const defaults = buildDefaultSchema();
3160
+ const targetPath = defaultSchemaInitPath(root);
3161
+ if (existsSync10(targetPath)) {
3162
+ status("!", warn(`Schema file already exists at ${targetPath}`));
3163
+ return;
3164
+ }
3165
+ await mkdir6(path21.dirname(targetPath), { recursive: true });
3166
+ const serializable = {
3167
+ version: defaults.version,
3168
+ defaultKind: defaults.defaultKind,
3169
+ kinds: defaults.kinds,
3170
+ seedPages: defaults.seedPages
3171
+ };
3172
+ await writeFile5(targetPath, `${JSON.stringify(serializable, null, 2)}
3173
+ `, "utf-8");
3174
+ status("+", success(`Wrote schema to ${targetPath}`));
3175
+ }
3176
+ async function schemaShowCommand() {
3177
+ const schema = await loadSchema(process.cwd());
3178
+ const loadedFrom = schema.loadedFrom ?? "(defaults \u2014 no schema file found)";
3179
+ header(`Schema (${loadedFrom})`);
3180
+ console.log(serializeSchemaToYaml(schema));
3181
+ }
3182
+
2769
3183
  // src/commands/review-list.ts
2770
3184
  async function reviewListCommand() {
2771
3185
  header("Pending review candidates");
@@ -2797,10 +3211,17 @@ async function reviewShowCommand(id) {
2797
3211
  status("i", dim(`generated: ${candidate.generatedAt}`));
2798
3212
  console.log();
2799
3213
  console.log(candidate.body);
3214
+ if (candidate.schemaViolations && candidate.schemaViolations.length > 0) {
3215
+ console.log();
3216
+ header("Schema violations");
3217
+ for (const v of candidate.schemaViolations) {
3218
+ status("!", warn(`[${v.severity}] ${v.message}`));
3219
+ }
3220
+ }
2800
3221
  }
2801
3222
 
2802
3223
  // src/commands/review-approve.ts
2803
- import path20 from "path";
3224
+ import path22 from "path";
2804
3225
 
2805
3226
  // src/commands/review-helpers.ts
2806
3227
  async function runReviewUnderLock(id, underLock) {
@@ -2832,7 +3253,7 @@ async function approveUnderLock(root, id) {
2832
3253
  process.exitCode = 1;
2833
3254
  return;
2834
3255
  }
2835
- const pagePath = path20.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
3256
+ const pagePath = path22.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
2836
3257
  await atomicWrite(pagePath, candidate.body);
2837
3258
  status("+", success(`Approved \u2192 ${source(pagePath)}`));
2838
3259
  await persistCandidateSourceStates(root, candidate);
@@ -2892,7 +3313,7 @@ import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js
2892
3313
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
2893
3314
 
2894
3315
  // src/mcp/tools.ts
2895
- import path21 from "path";
3316
+ import path23 from "path";
2896
3317
  import { z } from "zod";
2897
3318
 
2898
3319
  // src/mcp/provider-check.ts
@@ -3022,7 +3443,7 @@ async function pickSearchSlugs(root, question) {
3022
3443
  if (candidates.length > 0) return candidates.map((c) => c.slug);
3023
3444
  } catch {
3024
3445
  }
3025
- const indexContent = await safeReadFile(path21.join(root, INDEX_FILE));
3446
+ const indexContent = await safeReadFile(path23.join(root, INDEX_FILE));
3026
3447
  const { pages } = await selectPages(question, indexContent);
3027
3448
  return pages;
3028
3449
  }
@@ -3071,8 +3492,8 @@ function registerStatusTool(server, root) {
3071
3492
  );
3072
3493
  }
3073
3494
  async function collectStatus(root) {
3074
- const concepts = await collectPageSummaries(path21.join(root, CONCEPTS_DIR));
3075
- const queries = await collectPageSummaries(path21.join(root, QUERIES_DIR));
3495
+ const concepts = await collectPageSummaries(path23.join(root, CONCEPTS_DIR));
3496
+ const queries = await collectPageSummaries(path23.join(root, QUERIES_DIR));
3076
3497
  const state = await readState(root);
3077
3498
  const changes = await detectChanges(root, state);
3078
3499
  const orphans = await findOrphanedSlugs(root);
@@ -3089,7 +3510,7 @@ async function collectStatus(root) {
3089
3510
  };
3090
3511
  }
3091
3512
  async function findOrphanedSlugs(root) {
3092
- const scanned = await scanWikiPages(path21.join(root, CONCEPTS_DIR));
3513
+ const scanned = await scanWikiPages(path23.join(root, CONCEPTS_DIR));
3093
3514
  return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
3094
3515
  }
3095
3516
  async function loadPageRecords(root, slugs) {
@@ -3102,7 +3523,7 @@ async function loadPageRecords(root, slugs) {
3102
3523
  }
3103
3524
  async function readPage(root, slug) {
3104
3525
  for (const dir of PAGE_DIRS2) {
3105
- const content = await safeReadFile(path21.join(root, dir, `${slug}.md`));
3526
+ const content = await safeReadFile(path23.join(root, dir, `${slug}.md`));
3106
3527
  if (!content) continue;
3107
3528
  const { meta, body } = parseFrontmatter(content);
3108
3529
  if (meta.orphaned) continue;
@@ -3117,7 +3538,7 @@ async function readPage(root, slug) {
3117
3538
  }
3118
3539
 
3119
3540
  // src/mcp/resources.ts
3120
- import path22 from "path";
3541
+ import path24 from "path";
3121
3542
  import { readdir as readdir9 } from "fs/promises";
3122
3543
  import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
3123
3544
  function jsonContent(uri, payload) {
@@ -3151,7 +3572,7 @@ function registerIndexResource(server, root) {
3151
3572
  mimeType: "text/markdown"
3152
3573
  },
3153
3574
  async (uri) => {
3154
- const content = await safeReadFile(path22.join(root, INDEX_FILE));
3575
+ const content = await safeReadFile(path24.join(root, INDEX_FILE));
3155
3576
  return { contents: [markdownContent(uri, content)] };
3156
3577
  }
3157
3578
  );
@@ -3218,7 +3639,7 @@ function registerQueryResource(server, root) {
3218
3639
  );
3219
3640
  }
3220
3641
  async function listSources(root) {
3221
- const sourcesPath = path22.join(root, SOURCES_DIR);
3642
+ const sourcesPath = path24.join(root, SOURCES_DIR);
3222
3643
  let files;
3223
3644
  try {
3224
3645
  files = await readdir9(sourcesPath);
@@ -3227,14 +3648,14 @@ async function listSources(root) {
3227
3648
  }
3228
3649
  const records = [];
3229
3650
  for (const file of files.filter((f) => f.endsWith(".md"))) {
3230
- const content = await safeReadFile(path22.join(sourcesPath, file));
3651
+ const content = await safeReadFile(path24.join(sourcesPath, file));
3231
3652
  const { meta } = parseFrontmatter(content);
3232
3653
  records.push({ filename: file, ...meta });
3233
3654
  }
3234
3655
  return records;
3235
3656
  }
3236
3657
  async function loadPageWithMeta(root, dir, slug) {
3237
- const filePath = path22.join(root, dir, `${slug}.md`);
3658
+ const filePath = path24.join(root, dir, `${slug}.md`);
3238
3659
  const content = await safeReadFile(filePath);
3239
3660
  if (!content) {
3240
3661
  throw new Error(`Page not found: ${dir}/${slug}.md`);
@@ -3243,7 +3664,7 @@ async function loadPageWithMeta(root, dir, slug) {
3243
3664
  return { slug, meta, body: body.trim() };
3244
3665
  }
3245
3666
  async function listPagesUnder(root, dir, scheme) {
3246
- const pagesPath = path22.join(root, dir);
3667
+ const pagesPath = path24.join(root, dir);
3247
3668
  let files;
3248
3669
  try {
3249
3670
  files = await readdir9(pagesPath);
@@ -3353,6 +3774,23 @@ program.command("lint").description("Run rule-based quality checks against the w
3353
3774
  process.exit(1);
3354
3775
  }
3355
3776
  });
3777
+ var schemaCmd = program.command("schema").description("Inspect or initialize the project's wiki schema config");
3778
+ schemaCmd.command("init").description("Write a starter schema file to .llmwiki/schema.json").action(async () => {
3779
+ try {
3780
+ await schemaInitCommand();
3781
+ } catch (err) {
3782
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
3783
+ process.exit(1);
3784
+ }
3785
+ });
3786
+ schemaCmd.command("show").description("Print the resolved schema for this project").action(async () => {
3787
+ try {
3788
+ await schemaShowCommand();
3789
+ } catch (err) {
3790
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
3791
+ process.exit(1);
3792
+ }
3793
+ });
3356
3794
  program.command("serve").description("Start an MCP server exposing wiki tools and resources over stdio").option("--root <dir>", "Project root directory", process.cwd()).action(async (options) => {
3357
3795
  try {
3358
3796
  await startMCPServer({ root: options.root, version });