llm-wiki-compiler 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -4
- package/dist/cli.js +908 -470
- package/dist/cli.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -13,6 +13,8 @@ import { mkdir as mkdir2, writeFile as writeFile2 } from "fs/promises";
|
|
|
13
13
|
import { writeFile, rename, readFile, mkdir } from "fs/promises";
|
|
14
14
|
import path from "path";
|
|
15
15
|
import yaml from "js-yaml";
|
|
16
|
+
var SPAN_SUFFIX_PATTERN = /^(?<file>[^:#]+)(?:(?::(?<colonStart>\d+)(?:-(?<colonEnd>\d+))?)|(?:#L(?<hashStart>\d+)(?:-L(?<hashEnd>\d+))?))?$/;
|
|
17
|
+
var MIN_LINE_NUMBER = 1;
|
|
16
18
|
var VALID_PROVENANCE_STATES = /* @__PURE__ */ new Set([
|
|
17
19
|
"extracted",
|
|
18
20
|
"merged",
|
|
@@ -49,6 +51,23 @@ async function atomicWrite(filePath, content) {
|
|
|
49
51
|
await writeFile(tmpPath, content, "utf-8");
|
|
50
52
|
await rename(tmpPath, filePath);
|
|
51
53
|
}
|
|
54
|
+
function isValidLineRange(start, end) {
|
|
55
|
+
return start >= MIN_LINE_NUMBER && end >= start;
|
|
56
|
+
}
|
|
57
|
+
function isMalformedCitationEntry(entry) {
|
|
58
|
+
const trimmed = entry.trim();
|
|
59
|
+
if (trimmed.length === 0) return true;
|
|
60
|
+
if (!trimmed.includes(":") && !trimmed.includes("#")) return false;
|
|
61
|
+
const match = SPAN_SUFFIX_PATTERN.exec(trimmed);
|
|
62
|
+
if (!match || !match.groups) return true;
|
|
63
|
+
const { colonStart, colonEnd, hashStart, hashEnd } = match.groups;
|
|
64
|
+
const start = colonStart ?? hashStart;
|
|
65
|
+
const end = colonEnd ?? hashEnd;
|
|
66
|
+
if (start === void 0) return false;
|
|
67
|
+
const startLine = Number(start);
|
|
68
|
+
const endLine = end === void 0 ? startLine : Number(end);
|
|
69
|
+
return !isValidLineRange(startLine, endLine);
|
|
70
|
+
}
|
|
52
71
|
async function safeReadFile(filePath) {
|
|
53
72
|
try {
|
|
54
73
|
return await readFile(filePath, "utf-8");
|
|
@@ -120,6 +139,8 @@ var PROVIDER_MODELS = {
|
|
|
120
139
|
minimax: "MiniMax-M2.7"
|
|
121
140
|
};
|
|
122
141
|
var OLLAMA_DEFAULT_HOST = "http://localhost:11434/v1";
|
|
142
|
+
var OPENAI_DEFAULT_TIMEOUT_MS = 10 * 60 * 1e3;
|
|
143
|
+
var OLLAMA_DEFAULT_TIMEOUT_MS = 30 * 60 * 1e3;
|
|
123
144
|
var SOURCES_DIR = "sources";
|
|
124
145
|
var CONCEPTS_DIR = "wiki/concepts";
|
|
125
146
|
var QUERIES_DIR = "wiki/queries";
|
|
@@ -324,11 +345,11 @@ async function ingest(source2) {
|
|
|
324
345
|
}
|
|
325
346
|
|
|
326
347
|
// src/commands/compile.ts
|
|
327
|
-
import { existsSync as
|
|
348
|
+
import { existsSync as existsSync7 } from "fs";
|
|
328
349
|
|
|
329
350
|
// src/compiler/index.ts
|
|
330
|
-
import { readFile as
|
|
331
|
-
import
|
|
351
|
+
import { readFile as readFile10 } from "fs/promises";
|
|
352
|
+
import path18 from "path";
|
|
332
353
|
|
|
333
354
|
// src/utils/state.ts
|
|
334
355
|
import { readFile as readFile3, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
|
|
@@ -556,6 +577,15 @@ var AnthropicProvider = class {
|
|
|
556
577
|
|
|
557
578
|
// src/providers/openai.ts
|
|
558
579
|
import OpenAI from "openai";
|
|
580
|
+
function readTimeoutEnv(name) {
|
|
581
|
+
const raw = process.env[name]?.trim();
|
|
582
|
+
if (!raw) return void 0;
|
|
583
|
+
const parsed = Number(raw);
|
|
584
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : void 0;
|
|
585
|
+
}
|
|
586
|
+
function resolveOpenAITimeoutMs() {
|
|
587
|
+
return readTimeoutEnv("LLMWIKI_REQUEST_TIMEOUT_MS");
|
|
588
|
+
}
|
|
559
589
|
function translateToolToOpenAI(tool) {
|
|
560
590
|
return {
|
|
561
591
|
type: "function",
|
|
@@ -575,11 +605,13 @@ var OpenAIProvider = class {
|
|
|
575
605
|
this.model = model;
|
|
576
606
|
this.configuredEmbeddingModel = options.embeddingModel;
|
|
577
607
|
const resolvedKey = options.apiKey ?? process.env.OPENAI_API_KEY ?? "";
|
|
608
|
+
const timeout = options.timeoutMs ?? resolveOpenAITimeoutMs() ?? OPENAI_DEFAULT_TIMEOUT_MS;
|
|
578
609
|
this.client = new OpenAI({
|
|
579
610
|
apiKey: resolvedKey,
|
|
580
|
-
baseURL: options.baseURL ?? null
|
|
611
|
+
baseURL: options.baseURL ?? null,
|
|
612
|
+
timeout
|
|
581
613
|
});
|
|
582
|
-
this.embeddingsClient = options.embeddingsBaseURL ? new OpenAI({ apiKey: resolvedKey, baseURL: options.embeddingsBaseURL }) : this.client;
|
|
614
|
+
this.embeddingsClient = options.embeddingsBaseURL ? new OpenAI({ apiKey: resolvedKey, baseURL: options.embeddingsBaseURL, timeout }) : this.client;
|
|
583
615
|
}
|
|
584
616
|
/** Send a single non-streaming completion request. */
|
|
585
617
|
async complete(system, messages, maxTokens) {
|
|
@@ -645,13 +677,17 @@ var OpenAIProvider = class {
|
|
|
645
677
|
};
|
|
646
678
|
|
|
647
679
|
// src/providers/ollama.ts
|
|
680
|
+
function resolveOllamaTimeoutMs(explicit) {
|
|
681
|
+
return explicit ?? readTimeoutEnv("OLLAMA_TIMEOUT_MS") ?? readTimeoutEnv("LLMWIKI_REQUEST_TIMEOUT_MS") ?? OLLAMA_DEFAULT_TIMEOUT_MS;
|
|
682
|
+
}
|
|
648
683
|
var OllamaProvider = class extends OpenAIProvider {
|
|
649
684
|
constructor(model, options) {
|
|
650
685
|
super(model, {
|
|
651
686
|
baseURL: options.baseURL,
|
|
652
687
|
apiKey: "ollama",
|
|
653
688
|
embeddingsBaseURL: options.embeddingsBaseURL,
|
|
654
|
-
embeddingModel: options.embeddingModel
|
|
689
|
+
embeddingModel: options.embeddingModel,
|
|
690
|
+
timeoutMs: resolveOllamaTimeoutMs(options.timeoutMs)
|
|
655
691
|
});
|
|
656
692
|
}
|
|
657
693
|
/** Ollama ships a dedicated embedding model (nomic-embed-text). */
|
|
@@ -1065,7 +1101,14 @@ ${relatedPages}` : "";
|
|
|
1065
1101
|
"Source attribution: at the end of each prose paragraph, append a citation",
|
|
1066
1102
|
"marker showing which source file(s) the paragraph drew from.",
|
|
1067
1103
|
"Format: ^[filename.md] for single-source, ^[source-a.md, source-b.md] for multi-source.",
|
|
1068
|
-
"
|
|
1104
|
+
"When a single sentence makes a specific factual claim and you can identify the",
|
|
1105
|
+
"exact line range it came from, you may use the claim-level form",
|
|
1106
|
+
"^[filename.md:START-END] (or ^[filename.md#LSTART-LEND]) at the end of that",
|
|
1107
|
+
"sentence \u2014 START and END are 1-indexed line numbers in the source file.",
|
|
1108
|
+
"Paragraph-level citations remain the default; only switch to claim-level form",
|
|
1109
|
+
"when it materially improves verifiability and the line range is unambiguous.",
|
|
1110
|
+
"Place citations only at the end of prose paragraphs or sentences \u2014 not on",
|
|
1111
|
+
"headings, list items, or code blocks.",
|
|
1069
1112
|
"Source filenames are visible as `--- SOURCE: filename.md ---` headers in the content below.",
|
|
1070
1113
|
"",
|
|
1071
1114
|
"If a paragraph is your inference rather than a direct extraction, leave it",
|
|
@@ -1106,6 +1149,20 @@ function mapRawConcept(c) {
|
|
|
1106
1149
|
inferredParagraphs: typeof c.inferred_paragraphs === "number" && Number.isInteger(c.inferred_paragraphs) && c.inferred_paragraphs >= 0 ? c.inferred_paragraphs : void 0
|
|
1107
1150
|
};
|
|
1108
1151
|
}
|
|
1152
|
+
function buildSeedPagePrompt(seed, rule, relatedPagesContent) {
|
|
1153
|
+
const minLinks = rule.minWikilinks;
|
|
1154
|
+
const linkExpectation = minLinks > 0 ? `Include at least ${minLinks} [[wikilinks]] to related pages.` : "Use [[wikilinks]] when referencing other pages.";
|
|
1155
|
+
return [
|
|
1156
|
+
`You are a wiki author. Write a ${seed.kind} page titled "${seed.title}".`,
|
|
1157
|
+
`Page-kind guidance: ${rule.description}`,
|
|
1158
|
+
`Summary line for context: ${seed.summary}`,
|
|
1159
|
+
"Draw facts only from the related wiki pages provided below.",
|
|
1160
|
+
linkExpectation,
|
|
1161
|
+
"Write in a neutral, informative tone. Be concise but thorough.",
|
|
1162
|
+
"\n\n--- RELATED PAGES ---\n\n",
|
|
1163
|
+
relatedPagesContent
|
|
1164
|
+
].join("\n");
|
|
1165
|
+
}
|
|
1109
1166
|
function parseConcepts(toolOutput) {
|
|
1110
1167
|
try {
|
|
1111
1168
|
const parsed = JSON.parse(toolOutput);
|
|
@@ -1116,6 +1173,149 @@ function parseConcepts(toolOutput) {
|
|
|
1116
1173
|
}
|
|
1117
1174
|
}
|
|
1118
1175
|
|
|
1176
|
+
// src/schema/types.ts
|
|
1177
|
+
var PAGE_KINDS = [
|
|
1178
|
+
"concept",
|
|
1179
|
+
"entity",
|
|
1180
|
+
"comparison",
|
|
1181
|
+
"overview"
|
|
1182
|
+
];
|
|
1183
|
+
|
|
1184
|
+
// src/schema/defaults.ts
|
|
1185
|
+
var DEFAULT_MIN_LINKS = {
|
|
1186
|
+
concept: 0,
|
|
1187
|
+
entity: 1,
|
|
1188
|
+
comparison: 2,
|
|
1189
|
+
overview: 3
|
|
1190
|
+
};
|
|
1191
|
+
var DEFAULT_DESCRIPTIONS = {
|
|
1192
|
+
concept: "A standalone idea, technique, or pattern worth documenting.",
|
|
1193
|
+
entity: "A specific thing \u2014 a person, product, organization, or named artifact.",
|
|
1194
|
+
comparison: "A side-by-side analysis weighing two or more concepts or entities.",
|
|
1195
|
+
overview: "A top-down map page that situates several concepts within a domain."
|
|
1196
|
+
};
|
|
1197
|
+
function buildDefaultKindRules() {
|
|
1198
|
+
return {
|
|
1199
|
+
concept: { minWikilinks: DEFAULT_MIN_LINKS.concept, description: DEFAULT_DESCRIPTIONS.concept },
|
|
1200
|
+
entity: { minWikilinks: DEFAULT_MIN_LINKS.entity, description: DEFAULT_DESCRIPTIONS.entity },
|
|
1201
|
+
comparison: {
|
|
1202
|
+
minWikilinks: DEFAULT_MIN_LINKS.comparison,
|
|
1203
|
+
description: DEFAULT_DESCRIPTIONS.comparison
|
|
1204
|
+
},
|
|
1205
|
+
overview: {
|
|
1206
|
+
minWikilinks: DEFAULT_MIN_LINKS.overview,
|
|
1207
|
+
description: DEFAULT_DESCRIPTIONS.overview
|
|
1208
|
+
}
|
|
1209
|
+
};
|
|
1210
|
+
}
|
|
1211
|
+
function buildDefaultSchema() {
|
|
1212
|
+
return {
|
|
1213
|
+
version: 1,
|
|
1214
|
+
defaultKind: "concept",
|
|
1215
|
+
kinds: buildDefaultKindRules(),
|
|
1216
|
+
seedPages: [],
|
|
1217
|
+
loadedFrom: null
|
|
1218
|
+
};
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
// src/schema/loader.ts
|
|
1222
|
+
import { existsSync as existsSync2 } from "fs";
|
|
1223
|
+
import { readFile as readFile6 } from "fs/promises";
|
|
1224
|
+
import path9 from "path";
|
|
1225
|
+
import yaml2 from "js-yaml";
|
|
1226
|
+
var SCHEMA_CANDIDATE_PATHS = [
|
|
1227
|
+
".llmwiki/schema.json",
|
|
1228
|
+
".llmwiki/schema.yaml",
|
|
1229
|
+
".llmwiki/schema.yml",
|
|
1230
|
+
"wiki/.schema.yaml",
|
|
1231
|
+
"wiki/.schema.yml"
|
|
1232
|
+
];
|
|
1233
|
+
function findSchemaPath(root) {
|
|
1234
|
+
for (const candidate of SCHEMA_CANDIDATE_PATHS) {
|
|
1235
|
+
const absolute = path9.join(root, candidate);
|
|
1236
|
+
if (existsSync2(absolute)) return absolute;
|
|
1237
|
+
}
|
|
1238
|
+
return null;
|
|
1239
|
+
}
|
|
1240
|
+
function parseSchemaFile(filePath, content) {
|
|
1241
|
+
const isJson = filePath.endsWith(".json");
|
|
1242
|
+
const parsed = isJson ? JSON.parse(content) : yaml2.load(content);
|
|
1243
|
+
if (parsed && typeof parsed === "object") return parsed;
|
|
1244
|
+
return {};
|
|
1245
|
+
}
|
|
1246
|
+
function isPageKind(value) {
|
|
1247
|
+
return typeof value === "string" && PAGE_KINDS.includes(value);
|
|
1248
|
+
}
|
|
1249
|
+
function mergeKindRule(defaults, override) {
|
|
1250
|
+
if (!override) return defaults;
|
|
1251
|
+
const minWikilinks = typeof override.minWikilinks === "number" ? override.minWikilinks : defaults.minWikilinks;
|
|
1252
|
+
const description = typeof override.description === "string" ? override.description : defaults.description;
|
|
1253
|
+
return { minWikilinks, description };
|
|
1254
|
+
}
|
|
1255
|
+
function mergeKinds(defaults, overrides) {
|
|
1256
|
+
const merged = { ...defaults };
|
|
1257
|
+
if (!overrides) return merged;
|
|
1258
|
+
for (const kind of PAGE_KINDS) {
|
|
1259
|
+
merged[kind] = mergeKindRule(defaults[kind], overrides[kind]);
|
|
1260
|
+
}
|
|
1261
|
+
return merged;
|
|
1262
|
+
}
|
|
1263
|
+
function normalizeSeedPage(entry) {
|
|
1264
|
+
if (typeof entry.title !== "string" || entry.title.trim() === "") return null;
|
|
1265
|
+
if (!isPageKind(entry.kind)) return null;
|
|
1266
|
+
const summary = typeof entry.summary === "string" ? entry.summary : "";
|
|
1267
|
+
const relatedSlugs = Array.isArray(entry.relatedSlugs) ? entry.relatedSlugs.filter((slug) => typeof slug === "string") : void 0;
|
|
1268
|
+
return { title: entry.title, kind: entry.kind, summary, relatedSlugs };
|
|
1269
|
+
}
|
|
1270
|
+
function normalizeSeedPages(entries) {
|
|
1271
|
+
if (!Array.isArray(entries)) return [];
|
|
1272
|
+
return entries.map(normalizeSeedPage).filter((entry) => entry !== null);
|
|
1273
|
+
}
|
|
1274
|
+
function applyOverrides(defaults, overrides, loadedFrom) {
|
|
1275
|
+
const defaultKind = isPageKind(overrides.defaultKind) ? overrides.defaultKind : defaults.defaultKind;
|
|
1276
|
+
return {
|
|
1277
|
+
version: 1,
|
|
1278
|
+
defaultKind,
|
|
1279
|
+
kinds: mergeKinds(defaults.kinds, overrides.kinds),
|
|
1280
|
+
seedPages: normalizeSeedPages(overrides.seedPages),
|
|
1281
|
+
loadedFrom
|
|
1282
|
+
};
|
|
1283
|
+
}
|
|
1284
|
+
async function loadSchema(root) {
|
|
1285
|
+
const defaults = buildDefaultSchema();
|
|
1286
|
+
const schemaPath = findSchemaPath(root);
|
|
1287
|
+
if (!schemaPath) return defaults;
|
|
1288
|
+
const raw = await readFile6(schemaPath, "utf-8");
|
|
1289
|
+
const parsed = parseSchemaFile(schemaPath, raw);
|
|
1290
|
+
return applyOverrides(defaults, parsed, schemaPath);
|
|
1291
|
+
}
|
|
1292
|
+
function defaultSchemaInitPath(root) {
|
|
1293
|
+
return path9.join(root, SCHEMA_CANDIDATE_PATHS[0]);
|
|
1294
|
+
}
|
|
1295
|
+
|
|
1296
|
+
// src/schema/helpers.ts
|
|
1297
|
+
import yaml3 from "js-yaml";
|
|
1298
|
+
var WIKILINK_PATTERN = /\[\[([^\]]+)\]\]/g;
|
|
1299
|
+
function resolvePageKind(rawKind, schema) {
|
|
1300
|
+
if (typeof rawKind === "string" && PAGE_KINDS.includes(rawKind)) {
|
|
1301
|
+
return rawKind;
|
|
1302
|
+
}
|
|
1303
|
+
return schema.defaultKind;
|
|
1304
|
+
}
|
|
1305
|
+
function countWikilinks(body) {
|
|
1306
|
+
const matches = body.match(WIKILINK_PATTERN);
|
|
1307
|
+
return matches ? matches.length : 0;
|
|
1308
|
+
}
|
|
1309
|
+
function serializeSchemaToYaml(schema) {
|
|
1310
|
+
const serializable = {
|
|
1311
|
+
version: schema.version,
|
|
1312
|
+
defaultKind: schema.defaultKind,
|
|
1313
|
+
kinds: schema.kinds,
|
|
1314
|
+
seedPages: schema.seedPages
|
|
1315
|
+
};
|
|
1316
|
+
return yaml3.dump(serializable, { lineWidth: -1, quotingType: '"' });
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1119
1319
|
// src/compiler/deps.ts
|
|
1120
1320
|
function buildConceptToSourcesMap(sources) {
|
|
1121
1321
|
const conceptMap = /* @__PURE__ */ new Map();
|
|
@@ -1262,7 +1462,7 @@ async function freezeFailedExtractions(root, results, frozenSlugs) {
|
|
|
1262
1462
|
}
|
|
1263
1463
|
|
|
1264
1464
|
// src/compiler/orphan.ts
|
|
1265
|
-
import
|
|
1465
|
+
import path10 from "path";
|
|
1266
1466
|
async function markOrphaned(root, sourceFile, state) {
|
|
1267
1467
|
const sourceEntry = state.sources[sourceFile];
|
|
1268
1468
|
if (!sourceEntry) return;
|
|
@@ -1288,7 +1488,7 @@ async function orphanUnownedFrozenPages(root, frozenSlugs) {
|
|
|
1288
1488
|
}
|
|
1289
1489
|
}
|
|
1290
1490
|
async function orphanPage(root, slug, reason) {
|
|
1291
|
-
const pagePath =
|
|
1491
|
+
const pagePath = path10.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
1292
1492
|
const content = await safeReadFile(pagePath);
|
|
1293
1493
|
if (!content) return;
|
|
1294
1494
|
const { meta } = parseFrontmatter(content);
|
|
@@ -1299,18 +1499,18 @@ async function orphanPage(root, slug, reason) {
|
|
|
1299
1499
|
}
|
|
1300
1500
|
|
|
1301
1501
|
// src/compiler/resolver.ts
|
|
1302
|
-
import { readdir as readdir2, readFile as
|
|
1303
|
-
import
|
|
1304
|
-
import { existsSync as
|
|
1502
|
+
import { readdir as readdir2, readFile as readFile7 } from "fs/promises";
|
|
1503
|
+
import path11 from "path";
|
|
1504
|
+
import { existsSync as existsSync3 } from "fs";
|
|
1305
1505
|
async function buildTitleIndex(root) {
|
|
1306
|
-
const conceptsDir =
|
|
1307
|
-
if (!
|
|
1506
|
+
const conceptsDir = path11.join(root, CONCEPTS_DIR);
|
|
1507
|
+
if (!existsSync3(conceptsDir)) return [];
|
|
1308
1508
|
const files = await readdir2(conceptsDir);
|
|
1309
1509
|
const pages = [];
|
|
1310
1510
|
for (const file of files) {
|
|
1311
1511
|
if (!file.endsWith(".md")) continue;
|
|
1312
|
-
const filePath =
|
|
1313
|
-
const content = await
|
|
1512
|
+
const filePath = path11.join(conceptsDir, file);
|
|
1513
|
+
const content = await readFile7(filePath, "utf-8");
|
|
1314
1514
|
const { meta } = parseFrontmatter(content);
|
|
1315
1515
|
if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
|
|
1316
1516
|
pages.push({
|
|
@@ -1364,7 +1564,7 @@ function addWikilinks(body, titles, selfTitle) {
|
|
|
1364
1564
|
const matches = findTitleMatches(result, page.title);
|
|
1365
1565
|
for (const m of matches.reverse()) {
|
|
1366
1566
|
if (!isLinkablePosition(result, m.start, m.end)) continue;
|
|
1367
|
-
result = result.slice(0, m.start) + `[[${page.title}]]` + result.slice(m.end);
|
|
1567
|
+
result = result.slice(0, m.start) + `[[${page.slug}|${page.title}]]` + result.slice(m.end);
|
|
1368
1568
|
}
|
|
1369
1569
|
}
|
|
1370
1570
|
return result;
|
|
@@ -1396,7 +1596,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
|
|
|
1396
1596
|
let count = 0;
|
|
1397
1597
|
for (const page of titleIndex) {
|
|
1398
1598
|
if (newSlugs.includes(page.slug)) continue;
|
|
1399
|
-
const content = await
|
|
1599
|
+
const content = await readFile7(page.filePath, "utf-8");
|
|
1400
1600
|
const { body } = parseFrontmatter(content);
|
|
1401
1601
|
const linked = addWikilinks(body, newTitles, page.title);
|
|
1402
1602
|
if (linked !== body) {
|
|
@@ -1408,7 +1608,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
|
|
|
1408
1608
|
return count;
|
|
1409
1609
|
}
|
|
1410
1610
|
async function linkPage(page, titleIndex) {
|
|
1411
|
-
const content = await
|
|
1611
|
+
const content = await readFile7(page.filePath, "utf-8");
|
|
1412
1612
|
const { body } = parseFrontmatter(content);
|
|
1413
1613
|
const linked = addWikilinks(body, titleIndex, page.title);
|
|
1414
1614
|
if (linked === body) return false;
|
|
@@ -1419,17 +1619,17 @@ async function linkPage(page, titleIndex) {
|
|
|
1419
1619
|
|
|
1420
1620
|
// src/compiler/indexgen.ts
|
|
1421
1621
|
import { readdir as readdir3 } from "fs/promises";
|
|
1422
|
-
import
|
|
1622
|
+
import path12 from "path";
|
|
1423
1623
|
async function generateIndex(root) {
|
|
1424
1624
|
status("*", info("Generating index..."));
|
|
1425
|
-
const conceptsPath =
|
|
1426
|
-
const queriesPath =
|
|
1625
|
+
const conceptsPath = path12.join(root, CONCEPTS_DIR);
|
|
1626
|
+
const queriesPath = path12.join(root, QUERIES_DIR);
|
|
1427
1627
|
const concepts = await collectPageSummaries(conceptsPath);
|
|
1428
1628
|
const queries = await collectPageSummaries(queriesPath);
|
|
1429
1629
|
concepts.sort((a, b) => a.title.localeCompare(b.title));
|
|
1430
1630
|
queries.sort((a, b) => a.title.localeCompare(b.title));
|
|
1431
1631
|
const indexContent = buildIndexContent(concepts, queries);
|
|
1432
|
-
const indexPath =
|
|
1632
|
+
const indexPath = path12.join(root, INDEX_FILE);
|
|
1433
1633
|
await atomicWrite(indexPath, indexContent);
|
|
1434
1634
|
const total = concepts.length + queries.length;
|
|
1435
1635
|
status("+", success(`Index updated with ${total} pages.`));
|
|
@@ -1443,7 +1643,7 @@ async function scanWikiPages(dirPath) {
|
|
|
1443
1643
|
}
|
|
1444
1644
|
const scanned = [];
|
|
1445
1645
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
1446
|
-
const content = await safeReadFile(
|
|
1646
|
+
const content = await safeReadFile(path12.join(dirPath, file));
|
|
1447
1647
|
const { meta } = parseFrontmatter(content);
|
|
1448
1648
|
scanned.push({ slug: file.replace(/\.md$/, ""), meta });
|
|
1449
1649
|
}
|
|
@@ -1463,12 +1663,12 @@ function stripWikilinks(text) {
|
|
|
1463
1663
|
function buildIndexContent(concepts, queries) {
|
|
1464
1664
|
const lines = ["# Knowledge Wiki", "", "## Concepts", ""];
|
|
1465
1665
|
for (const page of concepts) {
|
|
1466
|
-
lines.push(`- **[[${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
|
|
1666
|
+
lines.push(`- **[[${page.slug}|${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
|
|
1467
1667
|
}
|
|
1468
1668
|
if (queries.length > 0) {
|
|
1469
1669
|
lines.push("", "## Saved Queries", "");
|
|
1470
1670
|
for (const page of queries) {
|
|
1471
|
-
lines.push(`- **[[${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
|
|
1671
|
+
lines.push(`- **[[${page.slug}|${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
|
|
1472
1672
|
}
|
|
1473
1673
|
}
|
|
1474
1674
|
const total = concepts.length + queries.length;
|
|
@@ -1480,7 +1680,7 @@ function buildIndexContent(concepts, queries) {
|
|
|
1480
1680
|
|
|
1481
1681
|
// src/compiler/obsidian.ts
|
|
1482
1682
|
import { readdir as readdir4 } from "fs/promises";
|
|
1483
|
-
import
|
|
1683
|
+
import path13 from "path";
|
|
1484
1684
|
var ABBREVIATION_MIN_WORDS = 3;
|
|
1485
1685
|
var SWAP_CONJUNCTIONS = [" and ", " or "];
|
|
1486
1686
|
function addObsidianMeta(frontmatter, conceptTitle, tags) {
|
|
@@ -1522,11 +1722,11 @@ function generateAbbreviation(title) {
|
|
|
1522
1722
|
return abbreviation;
|
|
1523
1723
|
}
|
|
1524
1724
|
async function generateMOC(root) {
|
|
1525
|
-
const conceptsPath =
|
|
1725
|
+
const conceptsPath = path13.join(root, CONCEPTS_DIR);
|
|
1526
1726
|
const pages = await loadConceptPages(conceptsPath);
|
|
1527
1727
|
const tagGroups = groupPagesByTag(pages);
|
|
1528
1728
|
const content = buildMOCContent(tagGroups);
|
|
1529
|
-
await atomicWrite(
|
|
1729
|
+
await atomicWrite(path13.join(root, MOC_FILE), content);
|
|
1530
1730
|
}
|
|
1531
1731
|
async function loadConceptPages(conceptsPath) {
|
|
1532
1732
|
let files;
|
|
@@ -1538,13 +1738,14 @@ async function loadConceptPages(conceptsPath) {
|
|
|
1538
1738
|
const pages = [];
|
|
1539
1739
|
for (const file of files) {
|
|
1540
1740
|
if (!file.endsWith(".md")) continue;
|
|
1541
|
-
const content = await safeReadFile(
|
|
1741
|
+
const content = await safeReadFile(path13.join(conceptsPath, file));
|
|
1542
1742
|
if (!content) continue;
|
|
1543
1743
|
const { meta } = parseFrontmatter(content);
|
|
1544
1744
|
if (meta.orphaned) continue;
|
|
1545
|
-
const
|
|
1745
|
+
const slug = file.replace(/\.md$/, "");
|
|
1746
|
+
const title = typeof meta.title === "string" ? meta.title : slug;
|
|
1546
1747
|
const tags = Array.isArray(meta.tags) ? meta.tags : [];
|
|
1547
|
-
pages.push({ title, tags });
|
|
1748
|
+
pages.push({ slug, title, tags });
|
|
1548
1749
|
}
|
|
1549
1750
|
return pages;
|
|
1550
1751
|
}
|
|
@@ -1552,21 +1753,21 @@ function groupPagesByTag(pages) {
|
|
|
1552
1753
|
const groups = /* @__PURE__ */ new Map();
|
|
1553
1754
|
for (const page of pages) {
|
|
1554
1755
|
if (page.tags.length === 0) {
|
|
1555
|
-
appendToGroup(groups, "Uncategorized", page
|
|
1756
|
+
appendToGroup(groups, "Uncategorized", page);
|
|
1556
1757
|
continue;
|
|
1557
1758
|
}
|
|
1558
1759
|
for (const tag of page.tags) {
|
|
1559
|
-
appendToGroup(groups, tag, page
|
|
1760
|
+
appendToGroup(groups, tag, page);
|
|
1560
1761
|
}
|
|
1561
1762
|
}
|
|
1562
1763
|
return groups;
|
|
1563
1764
|
}
|
|
1564
|
-
function appendToGroup(groups, key,
|
|
1765
|
+
function appendToGroup(groups, key, page) {
|
|
1565
1766
|
const existing = groups.get(key);
|
|
1566
1767
|
if (existing) {
|
|
1567
|
-
existing.push(
|
|
1768
|
+
existing.push(page);
|
|
1568
1769
|
} else {
|
|
1569
|
-
groups.set(key, [
|
|
1770
|
+
groups.set(key, [page]);
|
|
1570
1771
|
}
|
|
1571
1772
|
}
|
|
1572
1773
|
function buildMOCContent(tagGroups) {
|
|
@@ -1577,10 +1778,10 @@ function buildMOCContent(tagGroups) {
|
|
|
1577
1778
|
return a.localeCompare(b);
|
|
1578
1779
|
});
|
|
1579
1780
|
for (const tag of sortedTags) {
|
|
1580
|
-
const
|
|
1781
|
+
const pages = tagGroups.get(tag) ?? [];
|
|
1581
1782
|
lines.push(`## ${tag}`, "");
|
|
1582
|
-
for (const
|
|
1583
|
-
lines.push(`- [[${title}]]`);
|
|
1783
|
+
for (const page of pages.sort((a, b) => a.title.localeCompare(b.title))) {
|
|
1784
|
+
lines.push(`- [[${page.slug}|${page.title}]]`);
|
|
1584
1785
|
}
|
|
1585
1786
|
lines.push("");
|
|
1586
1787
|
}
|
|
@@ -1588,9 +1789,9 @@ function buildMOCContent(tagGroups) {
|
|
|
1588
1789
|
}
|
|
1589
1790
|
|
|
1590
1791
|
// src/utils/embeddings.ts
|
|
1591
|
-
import { readFile as
|
|
1592
|
-
import { existsSync as
|
|
1593
|
-
import
|
|
1792
|
+
import { readFile as readFile8, readdir as readdir5 } from "fs/promises";
|
|
1793
|
+
import { existsSync as existsSync4 } from "fs";
|
|
1794
|
+
import path14 from "path";
|
|
1594
1795
|
function cosineSimilarity(a, b) {
|
|
1595
1796
|
if (a.length !== b.length || a.length === 0) return 0;
|
|
1596
1797
|
let dot = 0;
|
|
@@ -1613,13 +1814,13 @@ function findTopK(queryVec, store, k) {
|
|
|
1613
1814
|
return scored.slice(0, k).map((item) => item.entry);
|
|
1614
1815
|
}
|
|
1615
1816
|
async function readEmbeddingStore(root) {
|
|
1616
|
-
const filePath =
|
|
1617
|
-
if (!
|
|
1618
|
-
const raw = await
|
|
1817
|
+
const filePath = path14.join(root, EMBEDDINGS_FILE);
|
|
1818
|
+
if (!existsSync4(filePath)) return null;
|
|
1819
|
+
const raw = await readFile8(filePath, "utf-8");
|
|
1619
1820
|
return JSON.parse(raw);
|
|
1620
1821
|
}
|
|
1621
1822
|
async function writeEmbeddingStore(root, store) {
|
|
1622
|
-
const filePath =
|
|
1823
|
+
const filePath = path14.join(root, EMBEDDINGS_FILE);
|
|
1623
1824
|
await atomicWrite(filePath, JSON.stringify(store, null, 2));
|
|
1624
1825
|
}
|
|
1625
1826
|
async function findRelevantPages(root, question) {
|
|
@@ -1640,7 +1841,7 @@ async function findRelevantPages(root, question) {
|
|
|
1640
1841
|
async function collectPageRecords(root) {
|
|
1641
1842
|
const records = [];
|
|
1642
1843
|
for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
|
|
1643
|
-
const absDir =
|
|
1844
|
+
const absDir = path14.join(root, dir);
|
|
1644
1845
|
let files;
|
|
1645
1846
|
try {
|
|
1646
1847
|
files = await readdir5(absDir);
|
|
@@ -1648,7 +1849,7 @@ async function collectPageRecords(root) {
|
|
|
1648
1849
|
continue;
|
|
1649
1850
|
}
|
|
1650
1851
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
1651
|
-
const content = await safeReadFile(
|
|
1852
|
+
const content = await safeReadFile(path14.join(absDir, file));
|
|
1652
1853
|
const { meta } = parseFrontmatter(content);
|
|
1653
1854
|
if (meta.orphaned || typeof meta.title !== "string") continue;
|
|
1654
1855
|
records.push({
|
|
@@ -1741,8 +1942,8 @@ async function updateEmbeddings(root, changedSlugs) {
|
|
|
1741
1942
|
|
|
1742
1943
|
// src/compiler/candidates.ts
|
|
1743
1944
|
import { readdir as readdir6, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
|
|
1744
|
-
import { existsSync as
|
|
1745
|
-
import
|
|
1945
|
+
import { existsSync as existsSync5 } from "fs";
|
|
1946
|
+
import path15 from "path";
|
|
1746
1947
|
import { randomBytes } from "crypto";
|
|
1747
1948
|
var ID_SUFFIX_BYTES = 4;
|
|
1748
1949
|
var CANDIDATE_EXT = ".json";
|
|
@@ -1751,10 +1952,10 @@ function buildCandidateId(slug) {
|
|
|
1751
1952
|
return `${slug}-${suffix}`;
|
|
1752
1953
|
}
|
|
1753
1954
|
function candidatePath(root, id) {
|
|
1754
|
-
return
|
|
1955
|
+
return path15.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
|
|
1755
1956
|
}
|
|
1756
1957
|
function archivePath(root, id) {
|
|
1757
|
-
return
|
|
1958
|
+
return path15.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
|
|
1758
1959
|
}
|
|
1759
1960
|
async function writeCandidate(root, draft) {
|
|
1760
1961
|
const candidate = {
|
|
@@ -1765,7 +1966,8 @@ async function writeCandidate(root, draft) {
|
|
|
1765
1966
|
sources: draft.sources,
|
|
1766
1967
|
body: draft.body,
|
|
1767
1968
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1768
|
-
...draft.sourceStates ? { sourceStates: draft.sourceStates } : {}
|
|
1969
|
+
...draft.sourceStates ? { sourceStates: draft.sourceStates } : {},
|
|
1970
|
+
...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {}
|
|
1769
1971
|
};
|
|
1770
1972
|
await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
|
|
1771
1973
|
return candidate;
|
|
@@ -1804,8 +2006,8 @@ function isValidCandidate(value) {
|
|
|
1804
2006
|
return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
|
|
1805
2007
|
}
|
|
1806
2008
|
async function listCandidates(root) {
|
|
1807
|
-
const dir =
|
|
1808
|
-
if (!
|
|
2009
|
+
const dir = path15.join(root, CANDIDATES_DIR);
|
|
2010
|
+
if (!existsSync5(dir)) return [];
|
|
1809
2011
|
const entries = await readdir6(dir, { withFileTypes: true });
|
|
1810
2012
|
const candidates = [];
|
|
1811
2013
|
for (const entry of entries) {
|
|
@@ -1823,15 +2025,15 @@ async function countCandidates(root) {
|
|
|
1823
2025
|
}
|
|
1824
2026
|
async function deleteCandidate(root, id) {
|
|
1825
2027
|
const filePath = candidatePath(root, id);
|
|
1826
|
-
if (!
|
|
2028
|
+
if (!existsSync5(filePath)) return false;
|
|
1827
2029
|
await unlink2(filePath);
|
|
1828
2030
|
return true;
|
|
1829
2031
|
}
|
|
1830
2032
|
async function archiveCandidate(root, id) {
|
|
1831
2033
|
const sourcePath = candidatePath(root, id);
|
|
1832
|
-
if (!
|
|
2034
|
+
if (!existsSync5(sourcePath)) return false;
|
|
1833
2035
|
const target = archivePath(root, id);
|
|
1834
|
-
await mkdir5(
|
|
2036
|
+
await mkdir5(path15.dirname(target), { recursive: true });
|
|
1835
2037
|
try {
|
|
1836
2038
|
await rename3(sourcePath, target);
|
|
1837
2039
|
} catch {
|
|
@@ -1842,151 +2044,493 @@ async function archiveCandidate(root, id) {
|
|
|
1842
2044
|
return true;
|
|
1843
2045
|
}
|
|
1844
2046
|
|
|
1845
|
-
// src/
|
|
1846
|
-
import { readdir as readdir7 } from "fs/promises";
|
|
1847
|
-
import
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
if (typeof concept.inferredParagraphs === "number") {
|
|
1861
|
-
fields.inferredParagraphs = concept.inferredParagraphs;
|
|
2047
|
+
// src/linter/rules.ts
|
|
2048
|
+
import { readdir as readdir7, readFile as readFile9 } from "fs/promises";
|
|
2049
|
+
import { existsSync as existsSync6 } from "fs";
|
|
2050
|
+
import path16 from "path";
|
|
2051
|
+
var MIN_BODY_LENGTH = 50;
|
|
2052
|
+
var WIKILINK_PATTERN2 = /\[\[([^\]]+)\]\]/g;
|
|
2053
|
+
var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
|
|
2054
|
+
function findMatchesInContent(content, pattern) {
|
|
2055
|
+
const results = [];
|
|
2056
|
+
const lines = content.split("\n");
|
|
2057
|
+
for (let i = 0; i < lines.length; i++) {
|
|
2058
|
+
const matches = lines[i].matchAll(pattern);
|
|
2059
|
+
for (const match of matches) {
|
|
2060
|
+
results.push({ captured: match[1], line: i + 1 });
|
|
2061
|
+
}
|
|
1862
2062
|
}
|
|
2063
|
+
return results;
|
|
1863
2064
|
}
|
|
1864
|
-
function
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
const
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
}
|
|
1873
|
-
|
|
1874
|
-
// src/compiler/page-renderer.ts
|
|
1875
|
-
var RELATED_PAGE_CONTEXT_LIMIT = 5;
|
|
1876
|
-
async function renderMergedPageContent(root, entry) {
|
|
1877
|
-
const pagePath = path15.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
1878
|
-
const existingPage = await safeReadFile(pagePath);
|
|
1879
|
-
const relatedPages = await loadRelatedPages(root, entry.slug);
|
|
1880
|
-
const system = buildPagePrompt(
|
|
1881
|
-
entry.concept.concept,
|
|
1882
|
-
entry.combinedContent,
|
|
1883
|
-
existingPage,
|
|
1884
|
-
relatedPages
|
|
2065
|
+
async function readMarkdownFiles(dirPath) {
|
|
2066
|
+
if (!existsSync6(dirPath)) return [];
|
|
2067
|
+
const entries = await readdir7(dirPath);
|
|
2068
|
+
const mdFiles = entries.filter((f) => f.endsWith(".md"));
|
|
2069
|
+
const results = await Promise.all(
|
|
2070
|
+
mdFiles.map(async (fileName) => {
|
|
2071
|
+
const filePath = path16.join(dirPath, fileName);
|
|
2072
|
+
const content = await readFile9(filePath, "utf-8");
|
|
2073
|
+
return { filePath, content };
|
|
2074
|
+
})
|
|
1885
2075
|
);
|
|
1886
|
-
|
|
1887
|
-
system,
|
|
1888
|
-
messages: [
|
|
1889
|
-
{ role: "user", content: `Write the wiki page for "${entry.concept.concept}".` }
|
|
1890
|
-
]
|
|
1891
|
-
});
|
|
1892
|
-
const frontmatter = buildMergedFrontmatter(entry, existingPage);
|
|
1893
|
-
reportContradictionWarnings(entry.concept.concept, entry.concept);
|
|
1894
|
-
return `${frontmatter}
|
|
1895
|
-
|
|
1896
|
-
${pageBody}
|
|
1897
|
-
`;
|
|
2076
|
+
return results;
|
|
1898
2077
|
}
|
|
1899
|
-
function
|
|
1900
|
-
const
|
|
1901
|
-
const
|
|
1902
|
-
|
|
1903
|
-
const frontmatterFields = {
|
|
1904
|
-
title: entry.concept.concept,
|
|
1905
|
-
summary: entry.concept.summary,
|
|
1906
|
-
sources: entry.sourceFiles,
|
|
1907
|
-
createdAt,
|
|
1908
|
-
updatedAt: now
|
|
1909
|
-
};
|
|
1910
|
-
addObsidianMeta(frontmatterFields, entry.concept.concept, entry.concept.tags ?? []);
|
|
1911
|
-
addProvenanceMeta(frontmatterFields, entry.concept);
|
|
1912
|
-
return buildFrontmatter(frontmatterFields);
|
|
2078
|
+
async function collectAllPages(root) {
|
|
2079
|
+
const conceptPages = await readMarkdownFiles(path16.join(root, CONCEPTS_DIR));
|
|
2080
|
+
const queryPages = await readMarkdownFiles(path16.join(root, QUERIES_DIR));
|
|
2081
|
+
return [...conceptPages, ...queryPages];
|
|
1913
2082
|
}
|
|
1914
|
-
|
|
1915
|
-
const
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
} catch {
|
|
1920
|
-
return "";
|
|
1921
|
-
}
|
|
1922
|
-
const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
|
|
1923
|
-
const contents = [];
|
|
1924
|
-
for (const f of related) {
|
|
1925
|
-
const content = await safeReadFile(path15.join(conceptsPath, f));
|
|
1926
|
-
if (!content) continue;
|
|
1927
|
-
const { meta } = parseFrontmatter(content);
|
|
1928
|
-
if (meta.orphaned) continue;
|
|
1929
|
-
contents.push(content);
|
|
2083
|
+
function buildPageSlugSet(pages) {
|
|
2084
|
+
const slugs = /* @__PURE__ */ new Set();
|
|
2085
|
+
for (const page of pages) {
|
|
2086
|
+
const baseName = path16.basename(page.filePath, ".md");
|
|
2087
|
+
slugs.add(baseName.toLowerCase());
|
|
1930
2088
|
}
|
|
1931
|
-
return
|
|
1932
|
-
}
|
|
1933
|
-
|
|
1934
|
-
// src/compiler/index.ts
|
|
1935
|
-
import pLimit from "p-limit";
|
|
1936
|
-
function emptyCompileResult() {
|
|
1937
|
-
return { compiled: 0, skipped: 0, deleted: 0, concepts: [], pages: [], errors: [] };
|
|
1938
|
-
}
|
|
1939
|
-
async function compile(root, options = {}) {
|
|
1940
|
-
await compileAndReport(root, options);
|
|
2089
|
+
return slugs;
|
|
1941
2090
|
}
|
|
1942
|
-
async function
|
|
1943
|
-
|
|
1944
|
-
const
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
2091
|
+
async function checkBrokenWikilinks(root) {
|
|
2092
|
+
const pages = await collectAllPages(root);
|
|
2093
|
+
const existingSlugs = buildPageSlugSet(pages);
|
|
2094
|
+
const results = [];
|
|
2095
|
+
for (const page of pages) {
|
|
2096
|
+
for (const { captured, line } of findMatchesInContent(page.content, WIKILINK_PATTERN2)) {
|
|
2097
|
+
const linkSlug = slugify(captured);
|
|
2098
|
+
if (!existingSlugs.has(linkSlug)) {
|
|
2099
|
+
results.push({
|
|
2100
|
+
rule: "broken-wikilink",
|
|
2101
|
+
severity: "error",
|
|
2102
|
+
file: page.filePath,
|
|
2103
|
+
message: `Broken wikilink [[${captured}]] \u2014 no matching page found`,
|
|
2104
|
+
line
|
|
2105
|
+
});
|
|
2106
|
+
}
|
|
2107
|
+
}
|
|
1956
2108
|
}
|
|
2109
|
+
return results;
|
|
1957
2110
|
}
|
|
1958
|
-
function
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1971
|
-
const pages = await Promise.all(
|
|
1972
|
-
merged.map((entry) => limit(async () => {
|
|
1973
|
-
const result = await generateMergedPage(root, entry, options, sourceStates);
|
|
1974
|
-
if (result.error) errors.push(result.error);
|
|
1975
|
-
if (result.candidateId) candidates.push(result.candidateId);
|
|
1976
|
-
return entry;
|
|
1977
|
-
}))
|
|
1978
|
-
);
|
|
1979
|
-
return { pages, errors, candidates };
|
|
1980
|
-
}
|
|
1981
|
-
async function persistExtractionStates(root, extractions) {
|
|
1982
|
-
for (const result of extractions) {
|
|
1983
|
-
if (result.concepts.length === 0) continue;
|
|
1984
|
-
await persistSourceState(root, result.sourcePath, result.sourceFile, result.concepts);
|
|
2111
|
+
async function checkOrphanedPages(root) {
|
|
2112
|
+
const pages = await collectAllPages(root);
|
|
2113
|
+
const results = [];
|
|
2114
|
+
for (const page of pages) {
|
|
2115
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2116
|
+
if (meta.orphaned === true) {
|
|
2117
|
+
results.push({
|
|
2118
|
+
rule: "orphaned-page",
|
|
2119
|
+
severity: "warning",
|
|
2120
|
+
file: page.filePath,
|
|
2121
|
+
message: `Page is marked as orphaned`
|
|
2122
|
+
});
|
|
2123
|
+
}
|
|
1985
2124
|
}
|
|
2125
|
+
return results;
|
|
1986
2126
|
}
|
|
1987
|
-
function
|
|
1988
|
-
|
|
1989
|
-
|
|
2127
|
+
async function checkMissingSummaries(root) {
|
|
2128
|
+
const pages = await collectAllPages(root);
|
|
2129
|
+
const results = [];
|
|
2130
|
+
for (const page of pages) {
|
|
2131
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2132
|
+
const summary = meta.summary;
|
|
2133
|
+
const isMissing = !summary || typeof summary === "string" && summary.trim() === "";
|
|
2134
|
+
if (isMissing) {
|
|
2135
|
+
results.push({
|
|
2136
|
+
rule: "missing-summary",
|
|
2137
|
+
severity: "warning",
|
|
2138
|
+
file: page.filePath,
|
|
2139
|
+
message: `Page has no summary in frontmatter`
|
|
2140
|
+
});
|
|
2141
|
+
}
|
|
2142
|
+
}
|
|
2143
|
+
return results;
|
|
2144
|
+
}
|
|
2145
|
+
async function checkDuplicateConcepts(root) {
|
|
2146
|
+
const pages = await collectAllPages(root);
|
|
2147
|
+
const titleMap = /* @__PURE__ */ new Map();
|
|
2148
|
+
for (const page of pages) {
|
|
2149
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2150
|
+
const title = typeof meta.title === "string" ? meta.title : "";
|
|
2151
|
+
if (!title) continue;
|
|
2152
|
+
const normalizedTitle = title.toLowerCase().trim();
|
|
2153
|
+
const existing = titleMap.get(normalizedTitle) ?? [];
|
|
2154
|
+
existing.push(page.filePath);
|
|
2155
|
+
titleMap.set(normalizedTitle, existing);
|
|
2156
|
+
}
|
|
2157
|
+
const results = [];
|
|
2158
|
+
for (const [title, files] of titleMap) {
|
|
2159
|
+
if (files.length <= 1) continue;
|
|
2160
|
+
for (const file of files) {
|
|
2161
|
+
results.push({
|
|
2162
|
+
rule: "duplicate-concept",
|
|
2163
|
+
severity: "error",
|
|
2164
|
+
file,
|
|
2165
|
+
message: `Duplicate title "${title}" \u2014 also in ${files.filter((f) => f !== file).join(", ")}`
|
|
2166
|
+
});
|
|
2167
|
+
}
|
|
2168
|
+
}
|
|
2169
|
+
return results;
|
|
2170
|
+
}
|
|
2171
|
+
async function checkEmptyPages(root) {
|
|
2172
|
+
const pages = await collectAllPages(root);
|
|
2173
|
+
const results = [];
|
|
2174
|
+
for (const page of pages) {
|
|
2175
|
+
const { meta, body } = parseFrontmatter(page.content);
|
|
2176
|
+
const hasTitle = typeof meta.title === "string" && meta.title.trim() !== "";
|
|
2177
|
+
const isBodyEmpty = body.trim().length < MIN_BODY_LENGTH;
|
|
2178
|
+
if (hasTitle && isBodyEmpty) {
|
|
2179
|
+
results.push({
|
|
2180
|
+
rule: "empty-page",
|
|
2181
|
+
severity: "warning",
|
|
2182
|
+
file: page.filePath,
|
|
2183
|
+
message: `Page body is empty or too short (< ${MIN_BODY_LENGTH} chars)`
|
|
2184
|
+
});
|
|
2185
|
+
}
|
|
2186
|
+
}
|
|
2187
|
+
return results;
|
|
2188
|
+
}
|
|
2189
|
+
function stripSpanSuffix(entry) {
|
|
2190
|
+
const colonIdx = entry.indexOf(":");
|
|
2191
|
+
const hashIdx = entry.indexOf("#");
|
|
2192
|
+
const cuts = [colonIdx, hashIdx].filter((i) => i >= 0);
|
|
2193
|
+
if (cuts.length === 0) return entry;
|
|
2194
|
+
return entry.slice(0, Math.min(...cuts));
|
|
2195
|
+
}
|
|
2196
|
+
async function checkLowConfidencePages(root) {
|
|
2197
|
+
const pages = await collectAllPages(root);
|
|
2198
|
+
const results = [];
|
|
2199
|
+
for (const page of pages) {
|
|
2200
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2201
|
+
const { confidence } = parseProvenanceMetadata(meta);
|
|
2202
|
+
if (confidence === void 0 || confidence >= LOW_CONFIDENCE_THRESHOLD) continue;
|
|
2203
|
+
results.push({
|
|
2204
|
+
rule: "low-confidence",
|
|
2205
|
+
severity: "warning",
|
|
2206
|
+
file: page.filePath,
|
|
2207
|
+
message: `Page confidence ${confidence.toFixed(2)} is below ${LOW_CONFIDENCE_THRESHOLD}`
|
|
2208
|
+
});
|
|
2209
|
+
}
|
|
2210
|
+
return results;
|
|
2211
|
+
}
|
|
2212
|
+
async function checkContradictedPages(root) {
|
|
2213
|
+
const pages = await collectAllPages(root);
|
|
2214
|
+
const results = [];
|
|
2215
|
+
for (const page of pages) {
|
|
2216
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2217
|
+
const { contradictedBy } = parseProvenanceMetadata(meta);
|
|
2218
|
+
if (!contradictedBy || contradictedBy.length === 0) continue;
|
|
2219
|
+
const slugs = contradictedBy.map((r) => r.slug).join(", ");
|
|
2220
|
+
results.push({
|
|
2221
|
+
rule: "contradicted-page",
|
|
2222
|
+
severity: "warning",
|
|
2223
|
+
file: page.filePath,
|
|
2224
|
+
message: `Page contradicts: ${slugs}`
|
|
2225
|
+
});
|
|
2226
|
+
}
|
|
2227
|
+
return results;
|
|
2228
|
+
}
|
|
2229
|
+
async function checkInferredWithoutCitations(root) {
|
|
2230
|
+
const pages = await collectAllPages(root);
|
|
2231
|
+
const results = [];
|
|
2232
|
+
for (const page of pages) {
|
|
2233
|
+
const { meta, body } = parseFrontmatter(page.content);
|
|
2234
|
+
const provenance = parseProvenanceMetadata(meta);
|
|
2235
|
+
const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
|
|
2236
|
+
if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
|
|
2237
|
+
results.push({
|
|
2238
|
+
rule: "excess-inferred-paragraphs",
|
|
2239
|
+
severity: "warning",
|
|
2240
|
+
file: page.filePath,
|
|
2241
|
+
message: `Page has ${inferred} inferred paragraphs without citations (max ${MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS})`
|
|
2242
|
+
});
|
|
2243
|
+
}
|
|
2244
|
+
return results;
|
|
2245
|
+
}
|
|
2246
|
+
var PROSE_PARAGRAPH_LEAD = /^[A-Za-z]/;
|
|
2247
|
+
function countUncitedProseParagraphs(body) {
|
|
2248
|
+
const paragraphs = body.split(/\n\s*\n/);
|
|
2249
|
+
let count = 0;
|
|
2250
|
+
for (const block of paragraphs) {
|
|
2251
|
+
const trimmed = block.trim();
|
|
2252
|
+
if (trimmed.length === 0) continue;
|
|
2253
|
+
if (!PROSE_PARAGRAPH_LEAD.test(trimmed)) continue;
|
|
2254
|
+
if (CITATION_PATTERN.test(trimmed)) {
|
|
2255
|
+
CITATION_PATTERN.lastIndex = 0;
|
|
2256
|
+
continue;
|
|
2257
|
+
}
|
|
2258
|
+
CITATION_PATTERN.lastIndex = 0;
|
|
2259
|
+
count += 1;
|
|
2260
|
+
}
|
|
2261
|
+
return count;
|
|
2262
|
+
}
|
|
2263
|
+
var COLON_SPAN_PATTERN = /^[^:#]+:(\d+)(?:-(\d+))?$/;
|
|
2264
|
+
var HASH_SPAN_PATTERN = /^[^:#]+#L(\d+)(?:-L(\d+))?$/;
|
|
2265
|
+
async function checkSchemaCrossLinks(root, schema) {
|
|
2266
|
+
const pages = await collectAllPages(root);
|
|
2267
|
+
const results = [];
|
|
2268
|
+
for (const page of pages) {
|
|
2269
|
+
const { meta, body } = parseFrontmatter(page.content);
|
|
2270
|
+
const kind = resolvePageKind(meta.kind, schema);
|
|
2271
|
+
const rule = schema.kinds[kind];
|
|
2272
|
+
if (rule.minWikilinks <= 0) continue;
|
|
2273
|
+
const linkCount = countWikilinks(body);
|
|
2274
|
+
if (linkCount >= rule.minWikilinks) continue;
|
|
2275
|
+
results.push({
|
|
2276
|
+
rule: "schema-cross-link-minimum",
|
|
2277
|
+
severity: "warning",
|
|
2278
|
+
file: page.filePath,
|
|
2279
|
+
message: `Page kind "${kind}" requires at least ${rule.minWikilinks} [[wikilinks]] but only ${linkCount} found.`
|
|
2280
|
+
});
|
|
2281
|
+
}
|
|
2282
|
+
return results;
|
|
2283
|
+
}
|
|
2284
|
+
function checkPageCrossLinks(content, filePath, schema) {
|
|
2285
|
+
const { meta, body } = parseFrontmatter(content);
|
|
2286
|
+
const kind = resolvePageKind(meta.kind, schema);
|
|
2287
|
+
const rule = schema.kinds[kind];
|
|
2288
|
+
if (rule.minWikilinks <= 0) return [];
|
|
2289
|
+
const linkCount = countWikilinks(body);
|
|
2290
|
+
if (linkCount >= rule.minWikilinks) return [];
|
|
2291
|
+
return [
|
|
2292
|
+
{
|
|
2293
|
+
rule: "schema-cross-link-minimum",
|
|
2294
|
+
severity: "warning",
|
|
2295
|
+
file: filePath,
|
|
2296
|
+
message: `Page kind "${kind}" requires at least ${rule.minWikilinks} [[wikilinks]] but only ${linkCount} found.`
|
|
2297
|
+
}
|
|
2298
|
+
];
|
|
2299
|
+
}
|
|
2300
|
+
function parseLineRange(entry) {
|
|
2301
|
+
const colonMatch = COLON_SPAN_PATTERN.exec(entry);
|
|
2302
|
+
if (colonMatch) {
|
|
2303
|
+
const start = Number(colonMatch[1]);
|
|
2304
|
+
const end = colonMatch[2] !== void 0 ? Number(colonMatch[2]) : start;
|
|
2305
|
+
return { start, end };
|
|
2306
|
+
}
|
|
2307
|
+
const hashMatch = HASH_SPAN_PATTERN.exec(entry);
|
|
2308
|
+
if (hashMatch) {
|
|
2309
|
+
const start = Number(hashMatch[1]);
|
|
2310
|
+
const end = hashMatch[2] !== void 0 ? Number(hashMatch[2]) : start;
|
|
2311
|
+
return { start, end };
|
|
2312
|
+
}
|
|
2313
|
+
return null;
|
|
2314
|
+
}
|
|
2315
|
+
function countLines(content) {
|
|
2316
|
+
if (content.length === 0) return 0;
|
|
2317
|
+
return content.split("\n").length;
|
|
2318
|
+
}
|
|
2319
|
+
async function checkBrokenCitations(root) {
|
|
2320
|
+
const pages = await collectAllPages(root);
|
|
2321
|
+
const sourcesDir = path16.join(root, SOURCES_DIR);
|
|
2322
|
+
const results = [];
|
|
2323
|
+
const lineCountCache = /* @__PURE__ */ new Map();
|
|
2324
|
+
for (const page of pages) {
|
|
2325
|
+
for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
|
|
2326
|
+
await collectBrokenForMarker(captured, line, page.filePath, sourcesDir, lineCountCache, results);
|
|
2327
|
+
}
|
|
2328
|
+
}
|
|
2329
|
+
return results;
|
|
2330
|
+
}
|
|
2331
|
+
async function collectBrokenForMarker(captured, line, pageFile, sourcesDir, lineCountCache, out) {
|
|
2332
|
+
for (const part of captured.split(",")) {
|
|
2333
|
+
const trimmed = part.trim();
|
|
2334
|
+
if (trimmed.length === 0) continue;
|
|
2335
|
+
const filename = stripSpanSuffix(trimmed);
|
|
2336
|
+
const citedPath = path16.join(sourcesDir, filename);
|
|
2337
|
+
if (!existsSync6(citedPath)) {
|
|
2338
|
+
out.push({
|
|
2339
|
+
rule: "broken-citation",
|
|
2340
|
+
severity: "error",
|
|
2341
|
+
file: pageFile,
|
|
2342
|
+
message: `Broken citation ^[${filename}] \u2014 source file not found`,
|
|
2343
|
+
line
|
|
2344
|
+
});
|
|
2345
|
+
continue;
|
|
2346
|
+
}
|
|
2347
|
+
const range = parseLineRange(trimmed);
|
|
2348
|
+
if (range === null) continue;
|
|
2349
|
+
const lineCount = await resolveLineCount(citedPath, filename, lineCountCache);
|
|
2350
|
+
if (range.end <= lineCount) continue;
|
|
2351
|
+
out.push({
|
|
2352
|
+
rule: "broken-citation",
|
|
2353
|
+
severity: "error",
|
|
2354
|
+
file: pageFile,
|
|
2355
|
+
message: `Claim-level span ^[${trimmed}] is out of bounds (source has only ${lineCount} lines)`,
|
|
2356
|
+
line
|
|
2357
|
+
});
|
|
2358
|
+
}
|
|
2359
|
+
}
|
|
2360
|
+
async function resolveLineCount(citedPath, filename, cache) {
|
|
2361
|
+
const cached = cache.get(filename);
|
|
2362
|
+
if (cached !== void 0) return cached;
|
|
2363
|
+
const content = await safeReadFile(citedPath);
|
|
2364
|
+
const lineCount = countLines(content);
|
|
2365
|
+
cache.set(filename, lineCount);
|
|
2366
|
+
return lineCount;
|
|
2367
|
+
}
|
|
2368
|
+
async function checkMalformedClaimCitations(root) {
|
|
2369
|
+
const pages = await collectAllPages(root);
|
|
2370
|
+
const results = [];
|
|
2371
|
+
for (const page of pages) {
|
|
2372
|
+
for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
|
|
2373
|
+
for (const part of captured.split(",")) {
|
|
2374
|
+
if (!isMalformedCitationEntry(part)) continue;
|
|
2375
|
+
results.push({
|
|
2376
|
+
rule: "malformed-claim-citation",
|
|
2377
|
+
severity: "error",
|
|
2378
|
+
file: page.filePath,
|
|
2379
|
+
message: `Malformed claim citation ^[${captured}] \u2014 expected file.md, file.md:N-N, or file.md#LN-LN`,
|
|
2380
|
+
line
|
|
2381
|
+
});
|
|
2382
|
+
}
|
|
2383
|
+
}
|
|
2384
|
+
}
|
|
2385
|
+
return results;
|
|
2386
|
+
}
|
|
2387
|
+
|
|
2388
|
+
// src/compiler/page-renderer.ts
|
|
2389
|
+
import { readdir as readdir8 } from "fs/promises";
|
|
2390
|
+
import path17 from "path";
|
|
2391
|
+
|
|
2392
|
+
// src/compiler/provenance.ts
|
|
2393
|
+
function addProvenanceMeta(fields, concept) {
|
|
2394
|
+
if (typeof concept.confidence === "number") {
|
|
2395
|
+
fields.confidence = concept.confidence;
|
|
2396
|
+
}
|
|
2397
|
+
if (concept.provenanceState) {
|
|
2398
|
+
fields.provenanceState = concept.provenanceState;
|
|
2399
|
+
}
|
|
2400
|
+
if (concept.contradictedBy && concept.contradictedBy.length > 0) {
|
|
2401
|
+
fields.contradictedBy = concept.contradictedBy;
|
|
2402
|
+
}
|
|
2403
|
+
if (typeof concept.inferredParagraphs === "number") {
|
|
2404
|
+
fields.inferredParagraphs = concept.inferredParagraphs;
|
|
2405
|
+
}
|
|
2406
|
+
}
|
|
2407
|
+
function reportContradictionWarnings(conceptTitle, concept) {
|
|
2408
|
+
const refs = concept.contradictedBy;
|
|
2409
|
+
if (!refs || refs.length === 0) return;
|
|
2410
|
+
const slugs = refs.map((r) => r.slug).join(", ");
|
|
2411
|
+
status(
|
|
2412
|
+
"!",
|
|
2413
|
+
warn(`Contradiction reported on "${conceptTitle}" \u2014 conflicts with: ${slugs}`)
|
|
2414
|
+
);
|
|
2415
|
+
}
|
|
2416
|
+
|
|
2417
|
+
// src/compiler/page-renderer.ts
|
|
2418
|
+
var RELATED_PAGE_CONTEXT_LIMIT = 5;
|
|
2419
|
+
async function renderMergedPageContent(root, entry, schema) {
|
|
2420
|
+
const pagePath = path17.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
2421
|
+
const existingPage = await safeReadFile(pagePath);
|
|
2422
|
+
const relatedPages = await loadRelatedPages(root, entry.slug);
|
|
2423
|
+
const system = buildPagePrompt(
|
|
2424
|
+
entry.concept.concept,
|
|
2425
|
+
entry.combinedContent,
|
|
2426
|
+
existingPage,
|
|
2427
|
+
relatedPages
|
|
2428
|
+
);
|
|
2429
|
+
const pageBody = await callClaude({
|
|
2430
|
+
system,
|
|
2431
|
+
messages: [
|
|
2432
|
+
{ role: "user", content: `Write the wiki page for "${entry.concept.concept}".` }
|
|
2433
|
+
]
|
|
2434
|
+
});
|
|
2435
|
+
const frontmatter = buildMergedFrontmatter(entry, existingPage, schema);
|
|
2436
|
+
reportContradictionWarnings(entry.concept.concept, entry.concept);
|
|
2437
|
+
return `${frontmatter}
|
|
2438
|
+
|
|
2439
|
+
${pageBody}
|
|
2440
|
+
`;
|
|
2441
|
+
}
|
|
2442
|
+
function buildMergedFrontmatter(entry, existingPage, schema) {
|
|
2443
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
2444
|
+
const existing = existingPage ? parseFrontmatter(existingPage) : null;
|
|
2445
|
+
const createdAt = existing?.meta.createdAt && typeof existing.meta.createdAt === "string" ? existing.meta.createdAt : now;
|
|
2446
|
+
const frontmatterFields = {
|
|
2447
|
+
title: entry.concept.concept,
|
|
2448
|
+
summary: entry.concept.summary,
|
|
2449
|
+
sources: entry.sourceFiles,
|
|
2450
|
+
kind: schema.defaultKind,
|
|
2451
|
+
createdAt,
|
|
2452
|
+
updatedAt: now
|
|
2453
|
+
};
|
|
2454
|
+
addObsidianMeta(frontmatterFields, entry.concept.concept, entry.concept.tags ?? []);
|
|
2455
|
+
addProvenanceMeta(frontmatterFields, entry.concept);
|
|
2456
|
+
return buildFrontmatter(frontmatterFields);
|
|
2457
|
+
}
|
|
2458
|
+
async function loadRelatedPages(root, excludeSlug) {
|
|
2459
|
+
const conceptsPath = path17.join(root, CONCEPTS_DIR);
|
|
2460
|
+
let files;
|
|
2461
|
+
try {
|
|
2462
|
+
files = await readdir8(conceptsPath);
|
|
2463
|
+
} catch {
|
|
2464
|
+
return "";
|
|
2465
|
+
}
|
|
2466
|
+
const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
|
|
2467
|
+
const contents = [];
|
|
2468
|
+
for (const f of related) {
|
|
2469
|
+
const content = await safeReadFile(path17.join(conceptsPath, f));
|
|
2470
|
+
if (!content) continue;
|
|
2471
|
+
const { meta } = parseFrontmatter(content);
|
|
2472
|
+
if (meta.orphaned) continue;
|
|
2473
|
+
contents.push(content);
|
|
2474
|
+
}
|
|
2475
|
+
return contents.join("\n\n---\n\n");
|
|
2476
|
+
}
|
|
2477
|
+
|
|
2478
|
+
// src/compiler/index.ts
|
|
2479
|
+
import pLimit from "p-limit";
|
|
2480
|
+
function emptyCompileResult() {
|
|
2481
|
+
return { compiled: 0, skipped: 0, deleted: 0, concepts: [], pages: [], errors: [] };
|
|
2482
|
+
}
|
|
2483
|
+
async function compile(root, options = {}) {
|
|
2484
|
+
await compileAndReport(root, options);
|
|
2485
|
+
}
|
|
2486
|
+
async function compileAndReport(root, options = {}) {
|
|
2487
|
+
header("llmwiki compile");
|
|
2488
|
+
const locked = await acquireLock(root);
|
|
2489
|
+
if (!locked) {
|
|
2490
|
+
status("!", error("Could not acquire lock. Try again later."));
|
|
2491
|
+
return {
|
|
2492
|
+
...emptyCompileResult(),
|
|
2493
|
+
errors: ["Could not acquire .llmwiki/lock \u2014 another compile is in progress."]
|
|
2494
|
+
};
|
|
2495
|
+
}
|
|
2496
|
+
try {
|
|
2497
|
+
return await runCompilePipeline(root, options);
|
|
2498
|
+
} finally {
|
|
2499
|
+
await releaseLock(root);
|
|
2500
|
+
}
|
|
2501
|
+
}
|
|
2502
|
+
function bucketChanges(changes) {
|
|
2503
|
+
return {
|
|
2504
|
+
toCompile: changes.filter((c) => c.status === "new" || c.status === "changed"),
|
|
2505
|
+
deleted: changes.filter((c) => c.status === "deleted"),
|
|
2506
|
+
unchanged: changes.filter((c) => c.status === "unchanged")
|
|
2507
|
+
};
|
|
2508
|
+
}
|
|
2509
|
+
async function generatePagesPhase(root, extractions, frozenSlugs, schema, options) {
|
|
2510
|
+
const merged = mergeExtractions(extractions, frozenSlugs);
|
|
2511
|
+
const sourceStates = options.review ? await buildExtractionSourceStates(root, extractions) : {};
|
|
2512
|
+
const limit = pLimit(COMPILE_CONCURRENCY);
|
|
2513
|
+
const errors = [];
|
|
2514
|
+
const candidates = [];
|
|
2515
|
+
const pages = await Promise.all(
|
|
2516
|
+
merged.map((entry) => limit(async () => {
|
|
2517
|
+
const result = await generateMergedPage(root, entry, schema, options, sourceStates);
|
|
2518
|
+
if (result.error) errors.push(result.error);
|
|
2519
|
+
if (result.candidateId) candidates.push(result.candidateId);
|
|
2520
|
+
return entry;
|
|
2521
|
+
}))
|
|
2522
|
+
);
|
|
2523
|
+
return { pages, errors, candidates };
|
|
2524
|
+
}
|
|
2525
|
+
async function persistExtractionStates(root, extractions) {
|
|
2526
|
+
for (const result of extractions) {
|
|
2527
|
+
if (result.concepts.length === 0) continue;
|
|
2528
|
+
await persistSourceState(root, result.sourcePath, result.sourceFile, result.concepts);
|
|
2529
|
+
}
|
|
2530
|
+
}
|
|
2531
|
+
function summarizeCompile(buckets, generation, extractions, options) {
|
|
2532
|
+
header("Compilation complete");
|
|
2533
|
+
status("\u2713", success(
|
|
1990
2534
|
`${buckets.toCompile.length} compiled, ${buckets.unchanged.length} skipped, ${buckets.deleted.length} deleted`
|
|
1991
2535
|
));
|
|
1992
2536
|
if (options.review && generation.candidates.length > 0) {
|
|
@@ -2016,12 +2560,24 @@ function summarizeCompile(buckets, generation, extractions, options) {
|
|
|
2016
2560
|
return baseResult;
|
|
2017
2561
|
}
|
|
2018
2562
|
async function runCompilePipeline(root, options) {
|
|
2563
|
+
const schema = await loadSchema(root);
|
|
2564
|
+
reportSchemaStatus(schema);
|
|
2019
2565
|
const state = await readState(root);
|
|
2020
2566
|
const changes = await detectChanges(root, state);
|
|
2021
2567
|
augmentWithAffectedSources(changes, findAffectedSources(state, changes));
|
|
2022
2568
|
const buckets = bucketChanges(changes);
|
|
2023
2569
|
if (buckets.toCompile.length === 0 && buckets.deleted.length === 0) {
|
|
2024
2570
|
status("\u2713", success("Nothing to compile \u2014 all sources up to date."));
|
|
2571
|
+
if (!options.review) {
|
|
2572
|
+
const emptyGeneration = { pages: [], errors: [], candidates: [] };
|
|
2573
|
+
await generateSeedPages(root, schema, emptyGeneration);
|
|
2574
|
+
await finalizeWiki(root, emptyGeneration.pages);
|
|
2575
|
+
return {
|
|
2576
|
+
...emptyCompileResult(),
|
|
2577
|
+
skipped: buckets.unchanged.length,
|
|
2578
|
+
errors: emptyGeneration.errors
|
|
2579
|
+
};
|
|
2580
|
+
}
|
|
2025
2581
|
return { ...emptyCompileResult(), skipped: buckets.unchanged.length };
|
|
2026
2582
|
}
|
|
2027
2583
|
printChangesSummary(changes);
|
|
@@ -2034,17 +2590,23 @@ async function runCompilePipeline(root, options) {
|
|
|
2034
2590
|
if (!options.review) {
|
|
2035
2591
|
await freezeFailedExtractions(root, extractions, frozenSlugs);
|
|
2036
2592
|
}
|
|
2037
|
-
const generation = await generatePagesPhase(root, extractions, frozenSlugs, options);
|
|
2593
|
+
const generation = await generatePagesPhase(root, extractions, frozenSlugs, schema, options);
|
|
2038
2594
|
if (!options.review) {
|
|
2039
2595
|
await persistExtractionStates(root, extractions);
|
|
2040
2596
|
if (frozenSlugs.size > 0) {
|
|
2041
2597
|
await orphanUnownedFrozenPages(root, frozenSlugs);
|
|
2042
2598
|
}
|
|
2043
2599
|
await persistFrozenSlugs(root, frozenSlugs, extractions);
|
|
2600
|
+
await generateSeedPages(root, schema, generation);
|
|
2044
2601
|
await finalizeWiki(root, generation.pages);
|
|
2045
2602
|
}
|
|
2046
2603
|
return summarizeCompile(buckets, generation, extractions, options);
|
|
2047
2604
|
}
|
|
2605
|
+
function reportSchemaStatus(schema) {
|
|
2606
|
+
if (schema.loadedFrom) {
|
|
2607
|
+
status("i", dim(`Schema: ${schema.loadedFrom}`));
|
|
2608
|
+
}
|
|
2609
|
+
}
|
|
2048
2610
|
function augmentWithAffectedSources(changes, affected) {
|
|
2049
2611
|
for (const file of affected) {
|
|
2050
2612
|
status("~", info(`${file} [affected by shared concept]`));
|
|
@@ -2105,9 +2667,9 @@ function printChangesSummary(changes) {
|
|
|
2105
2667
|
}
|
|
2106
2668
|
async function extractForSource(root, sourceFile) {
|
|
2107
2669
|
status("*", info(`Extracting: ${sourceFile}`));
|
|
2108
|
-
const sourcePath =
|
|
2109
|
-
const sourceContent = await
|
|
2110
|
-
const existingIndex = await safeReadFile(
|
|
2670
|
+
const sourcePath = path18.join(root, SOURCES_DIR, sourceFile);
|
|
2671
|
+
const sourceContent = await readFile10(sourcePath, "utf-8");
|
|
2672
|
+
const existingIndex = await safeReadFile(path18.join(root, INDEX_FILE));
|
|
2111
2673
|
const concepts = await extractConcepts(sourceContent, existingIndex);
|
|
2112
2674
|
if (concepts.length > 0) {
|
|
2113
2675
|
const names = concepts.map((c) => c.concept).join(", ");
|
|
@@ -2165,27 +2727,77 @@ ${result.sourceContent}`
|
|
|
2165
2727
|
}
|
|
2166
2728
|
return Array.from(bySlug.values());
|
|
2167
2729
|
}
|
|
2168
|
-
async function generateMergedPage(root, entry, options, sourceStates) {
|
|
2169
|
-
const fullPage = await renderMergedPageContent(root, entry);
|
|
2730
|
+
async function generateMergedPage(root, entry, schema, options, sourceStates) {
|
|
2731
|
+
const fullPage = await renderMergedPageContent(root, entry, schema);
|
|
2170
2732
|
if (options.review) {
|
|
2171
|
-
return await persistReviewCandidate(root, entry, fullPage, sourceStates);
|
|
2733
|
+
return await persistReviewCandidate(root, entry, fullPage, sourceStates, schema);
|
|
2172
2734
|
}
|
|
2173
|
-
const pagePath =
|
|
2735
|
+
const pagePath = path18.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
2174
2736
|
const error2 = await writePageIfValid(pagePath, fullPage, entry.concept.concept);
|
|
2175
2737
|
return { error: error2 ?? void 0 };
|
|
2176
2738
|
}
|
|
2177
|
-
async function persistReviewCandidate(root, entry, fullPage, sourceStates) {
|
|
2739
|
+
async function persistReviewCandidate(root, entry, fullPage, sourceStates, schema) {
|
|
2740
|
+
const virtualPath = `wiki/concepts/${entry.slug}.md`;
|
|
2741
|
+
const violations = checkPageCrossLinks(fullPage, virtualPath, schema);
|
|
2178
2742
|
const candidate = await writeCandidate(root, {
|
|
2179
2743
|
title: entry.concept.concept,
|
|
2180
2744
|
slug: entry.slug,
|
|
2181
2745
|
summary: entry.concept.summary,
|
|
2182
2746
|
sources: entry.sourceFiles,
|
|
2183
2747
|
body: fullPage,
|
|
2184
|
-
sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles)
|
|
2748
|
+
sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles),
|
|
2749
|
+
schemaViolations: violations.length > 0 ? violations : void 0
|
|
2185
2750
|
});
|
|
2186
2751
|
status("?", info(`Candidate ready: ${candidate.id} (${entry.slug})`));
|
|
2187
2752
|
return { candidateId: candidate.id };
|
|
2188
2753
|
}
|
|
2754
|
+
async function generateSeedPages(root, schema, generation) {
|
|
2755
|
+
if (schema.seedPages.length === 0) return;
|
|
2756
|
+
for (const seed of schema.seedPages) {
|
|
2757
|
+
const error2 = await generateSingleSeedPage(root, schema, seed);
|
|
2758
|
+
if (error2) generation.errors.push(error2);
|
|
2759
|
+
}
|
|
2760
|
+
}
|
|
2761
|
+
async function generateSingleSeedPage(root, schema, seed) {
|
|
2762
|
+
const slug = slugify(seed.title);
|
|
2763
|
+
const pagePath = path18.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
2764
|
+
const relatedContent = await loadSeedRelatedPages(root, seed.relatedSlugs ?? []);
|
|
2765
|
+
const rule = schema.kinds[seed.kind];
|
|
2766
|
+
const system = buildSeedPagePrompt(seed, rule, relatedContent);
|
|
2767
|
+
const pageBody = await callClaude({
|
|
2768
|
+
system,
|
|
2769
|
+
messages: [{ role: "user", content: `Write the ${seed.kind} page titled "${seed.title}".` }]
|
|
2770
|
+
});
|
|
2771
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
2772
|
+
const existing = await safeReadFile(pagePath);
|
|
2773
|
+
const existingMeta = existing ? parseFrontmatter(existing).meta : null;
|
|
2774
|
+
const createdAt = typeof existingMeta?.createdAt === "string" ? existingMeta.createdAt : now;
|
|
2775
|
+
const typedFields = {
|
|
2776
|
+
title: seed.title,
|
|
2777
|
+
summary: seed.summary,
|
|
2778
|
+
sources: [],
|
|
2779
|
+
kind: seed.kind,
|
|
2780
|
+
createdAt,
|
|
2781
|
+
updatedAt: now
|
|
2782
|
+
};
|
|
2783
|
+
const frontmatterFields = { ...typedFields };
|
|
2784
|
+
addObsidianMeta(frontmatterFields, seed.title, []);
|
|
2785
|
+
const frontmatter = buildFrontmatter(frontmatterFields);
|
|
2786
|
+
return await writePageIfValid(pagePath, `${frontmatter}
|
|
2787
|
+
|
|
2788
|
+
${pageBody}
|
|
2789
|
+
`, seed.title);
|
|
2790
|
+
}
|
|
2791
|
+
async function loadSeedRelatedPages(root, slugs) {
|
|
2792
|
+
if (slugs.length === 0) return "";
|
|
2793
|
+
const contents = [];
|
|
2794
|
+
for (const slug of slugs) {
|
|
2795
|
+
const pagePath = path18.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
2796
|
+
const content = await safeReadFile(pagePath);
|
|
2797
|
+
if (content) contents.push(content);
|
|
2798
|
+
}
|
|
2799
|
+
return contents.join("\n\n---\n\n");
|
|
2800
|
+
}
|
|
2189
2801
|
async function extractConcepts(sourceContent, existingIndex) {
|
|
2190
2802
|
const system = buildExtractionPrompt(sourceContent, existingIndex);
|
|
2191
2803
|
const rawOutput = await callClaude({
|
|
@@ -2223,7 +2835,7 @@ async function persistSourceState(root, sourcePath, sourceFile, concepts) {
|
|
|
2223
2835
|
|
|
2224
2836
|
// src/commands/compile.ts
|
|
2225
2837
|
async function compileCommand(options = {}) {
|
|
2226
|
-
if (!
|
|
2838
|
+
if (!existsSync7(SOURCES_DIR)) {
|
|
2227
2839
|
status(
|
|
2228
2840
|
"!",
|
|
2229
2841
|
warn("No sources found. Run `llmwiki ingest <url>` first.")
|
|
@@ -2234,8 +2846,8 @@ async function compileCommand(options = {}) {
|
|
|
2234
2846
|
}
|
|
2235
2847
|
|
|
2236
2848
|
// src/commands/query.ts
|
|
2237
|
-
import { existsSync as
|
|
2238
|
-
import
|
|
2849
|
+
import { existsSync as existsSync8 } from "fs";
|
|
2850
|
+
import path19 from "path";
|
|
2239
2851
|
var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
|
|
2240
2852
|
var PAGE_SELECTION_TOOL = {
|
|
2241
2853
|
name: "select_pages",
|
|
@@ -2290,7 +2902,7 @@ async function selectRelevantPages(root, question) {
|
|
|
2290
2902
|
const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
|
|
2291
2903
|
return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2 };
|
|
2292
2904
|
}
|
|
2293
|
-
const indexContent = await safeReadFile(
|
|
2905
|
+
const indexContent = await safeReadFile(path19.join(root, INDEX_FILE));
|
|
2294
2906
|
const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
|
|
2295
2907
|
return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning };
|
|
2296
2908
|
}
|
|
@@ -2308,7 +2920,7 @@ async function loadSelectedPages(root, slugs) {
|
|
|
2308
2920
|
for (const slug of slugs) {
|
|
2309
2921
|
let content = "";
|
|
2310
2922
|
for (const dir of PAGE_DIRS) {
|
|
2311
|
-
const candidate = await safeReadFile(
|
|
2923
|
+
const candidate = await safeReadFile(path19.join(root, dir, `${slug}.md`));
|
|
2312
2924
|
if (!candidate) continue;
|
|
2313
2925
|
const { meta } = parseFrontmatter(candidate);
|
|
2314
2926
|
if (meta.orphaned) continue;
|
|
@@ -2344,7 +2956,7 @@ function summarizeAnswer(answer) {
|
|
|
2344
2956
|
}
|
|
2345
2957
|
async function saveQueryPage(root, question, answer) {
|
|
2346
2958
|
const slug = slugify(question);
|
|
2347
|
-
const filePath =
|
|
2959
|
+
const filePath = path19.join(root, QUERIES_DIR, `${slug}.md`);
|
|
2348
2960
|
const frontmatter = buildFrontmatter({
|
|
2349
2961
|
title: question,
|
|
2350
2962
|
summary: summarizeAnswer(answer),
|
|
@@ -2370,7 +2982,7 @@ ${answer}
|
|
|
2370
2982
|
return slug;
|
|
2371
2983
|
}
|
|
2372
2984
|
async function generateAnswer(root, question, options = {}) {
|
|
2373
|
-
if (!
|
|
2985
|
+
if (!existsSync8(path19.join(root, INDEX_FILE))) {
|
|
2374
2986
|
throw new Error("Wiki index not found. Run `llmwiki compile` first.");
|
|
2375
2987
|
}
|
|
2376
2988
|
const { pages, reasoning } = await selectRelevantPages(root, question);
|
|
@@ -2387,7 +2999,7 @@ async function generateAnswer(root, question, options = {}) {
|
|
|
2387
2999
|
return { answer, selectedPages: pages, reasoning, saved };
|
|
2388
3000
|
}
|
|
2389
3001
|
async function queryCommand(root, question, options) {
|
|
2390
|
-
if (!
|
|
3002
|
+
if (!existsSync8(path19.join(root, INDEX_FILE))) {
|
|
2391
3003
|
status("!", error("Wiki index not found. Run `llmwiki compile` first."));
|
|
2392
3004
|
return;
|
|
2393
3005
|
}
|
|
@@ -2415,12 +3027,12 @@ async function queryCommand(root, question, options) {
|
|
|
2415
3027
|
|
|
2416
3028
|
// src/commands/watch.ts
|
|
2417
3029
|
import { watch as chokidarWatch } from "chokidar";
|
|
2418
|
-
import { existsSync as
|
|
2419
|
-
import
|
|
3030
|
+
import { existsSync as existsSync9 } from "fs";
|
|
3031
|
+
import path20 from "path";
|
|
2420
3032
|
var DEBOUNCE_MS = 500;
|
|
2421
3033
|
async function watchCommand() {
|
|
2422
|
-
const sourcesPath =
|
|
2423
|
-
if (!
|
|
3034
|
+
const sourcesPath = path20.resolve(SOURCES_DIR);
|
|
3035
|
+
if (!existsSync9(sourcesPath)) {
|
|
2424
3036
|
status(
|
|
2425
3037
|
"!",
|
|
2426
3038
|
warn("No sources/ directory found. Run `llmwiki ingest <url>` first.")
|
|
@@ -2454,7 +3066,7 @@ async function watchCommand() {
|
|
|
2454
3066
|
const scheduleCompile = (eventPath, event) => {
|
|
2455
3067
|
status(
|
|
2456
3068
|
"~",
|
|
2457
|
-
dim(`${event}: ${
|
|
3069
|
+
dim(`${event}: ${path20.basename(eventPath)}`)
|
|
2458
3070
|
);
|
|
2459
3071
|
if (debounceTimer) clearTimeout(debounceTimer);
|
|
2460
3072
|
debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
|
|
@@ -2468,261 +3080,30 @@ async function watchCommand() {
|
|
|
2468
3080
|
});
|
|
2469
3081
|
}
|
|
2470
3082
|
|
|
2471
|
-
// src/linter/rules.ts
|
|
2472
|
-
import { readdir as readdir8, readFile as readFile9 } from "fs/promises";
|
|
2473
|
-
import { existsSync as existsSync8 } from "fs";
|
|
2474
|
-
import path19 from "path";
|
|
2475
|
-
var MIN_BODY_LENGTH = 50;
|
|
2476
|
-
var WIKILINK_PATTERN = /\[\[([^\]]+)\]\]/g;
|
|
2477
|
-
var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
|
|
2478
|
-
function findMatchesInContent(content, pattern) {
|
|
2479
|
-
const results = [];
|
|
2480
|
-
const lines = content.split("\n");
|
|
2481
|
-
for (let i = 0; i < lines.length; i++) {
|
|
2482
|
-
const matches = lines[i].matchAll(pattern);
|
|
2483
|
-
for (const match of matches) {
|
|
2484
|
-
results.push({ captured: match[1], line: i + 1 });
|
|
2485
|
-
}
|
|
2486
|
-
}
|
|
2487
|
-
return results;
|
|
2488
|
-
}
|
|
2489
|
-
async function readMarkdownFiles(dirPath) {
|
|
2490
|
-
if (!existsSync8(dirPath)) return [];
|
|
2491
|
-
const entries = await readdir8(dirPath);
|
|
2492
|
-
const mdFiles = entries.filter((f) => f.endsWith(".md"));
|
|
2493
|
-
const results = await Promise.all(
|
|
2494
|
-
mdFiles.map(async (fileName) => {
|
|
2495
|
-
const filePath = path19.join(dirPath, fileName);
|
|
2496
|
-
const content = await readFile9(filePath, "utf-8");
|
|
2497
|
-
return { filePath, content };
|
|
2498
|
-
})
|
|
2499
|
-
);
|
|
2500
|
-
return results;
|
|
2501
|
-
}
|
|
2502
|
-
async function collectAllPages(root) {
|
|
2503
|
-
const conceptPages = await readMarkdownFiles(path19.join(root, CONCEPTS_DIR));
|
|
2504
|
-
const queryPages = await readMarkdownFiles(path19.join(root, QUERIES_DIR));
|
|
2505
|
-
return [...conceptPages, ...queryPages];
|
|
2506
|
-
}
|
|
2507
|
-
function buildPageSlugSet(pages) {
|
|
2508
|
-
const slugs = /* @__PURE__ */ new Set();
|
|
2509
|
-
for (const page of pages) {
|
|
2510
|
-
const baseName = path19.basename(page.filePath, ".md");
|
|
2511
|
-
slugs.add(baseName.toLowerCase());
|
|
2512
|
-
}
|
|
2513
|
-
return slugs;
|
|
2514
|
-
}
|
|
2515
|
-
async function checkBrokenWikilinks(root) {
|
|
2516
|
-
const pages = await collectAllPages(root);
|
|
2517
|
-
const existingSlugs = buildPageSlugSet(pages);
|
|
2518
|
-
const results = [];
|
|
2519
|
-
for (const page of pages) {
|
|
2520
|
-
for (const { captured, line } of findMatchesInContent(page.content, WIKILINK_PATTERN)) {
|
|
2521
|
-
const linkSlug = slugify(captured);
|
|
2522
|
-
if (!existingSlugs.has(linkSlug)) {
|
|
2523
|
-
results.push({
|
|
2524
|
-
rule: "broken-wikilink",
|
|
2525
|
-
severity: "error",
|
|
2526
|
-
file: page.filePath,
|
|
2527
|
-
message: `Broken wikilink [[${captured}]] \u2014 no matching page found`,
|
|
2528
|
-
line
|
|
2529
|
-
});
|
|
2530
|
-
}
|
|
2531
|
-
}
|
|
2532
|
-
}
|
|
2533
|
-
return results;
|
|
2534
|
-
}
|
|
2535
|
-
async function checkOrphanedPages(root) {
|
|
2536
|
-
const pages = await collectAllPages(root);
|
|
2537
|
-
const results = [];
|
|
2538
|
-
for (const page of pages) {
|
|
2539
|
-
const { meta } = parseFrontmatter(page.content);
|
|
2540
|
-
if (meta.orphaned === true) {
|
|
2541
|
-
results.push({
|
|
2542
|
-
rule: "orphaned-page",
|
|
2543
|
-
severity: "warning",
|
|
2544
|
-
file: page.filePath,
|
|
2545
|
-
message: `Page is marked as orphaned`
|
|
2546
|
-
});
|
|
2547
|
-
}
|
|
2548
|
-
}
|
|
2549
|
-
return results;
|
|
2550
|
-
}
|
|
2551
|
-
async function checkMissingSummaries(root) {
|
|
2552
|
-
const pages = await collectAllPages(root);
|
|
2553
|
-
const results = [];
|
|
2554
|
-
for (const page of pages) {
|
|
2555
|
-
const { meta } = parseFrontmatter(page.content);
|
|
2556
|
-
const summary = meta.summary;
|
|
2557
|
-
const isMissing = !summary || typeof summary === "string" && summary.trim() === "";
|
|
2558
|
-
if (isMissing) {
|
|
2559
|
-
results.push({
|
|
2560
|
-
rule: "missing-summary",
|
|
2561
|
-
severity: "warning",
|
|
2562
|
-
file: page.filePath,
|
|
2563
|
-
message: `Page has no summary in frontmatter`
|
|
2564
|
-
});
|
|
2565
|
-
}
|
|
2566
|
-
}
|
|
2567
|
-
return results;
|
|
2568
|
-
}
|
|
2569
|
-
async function checkDuplicateConcepts(root) {
|
|
2570
|
-
const pages = await collectAllPages(root);
|
|
2571
|
-
const titleMap = /* @__PURE__ */ new Map();
|
|
2572
|
-
for (const page of pages) {
|
|
2573
|
-
const { meta } = parseFrontmatter(page.content);
|
|
2574
|
-
const title = typeof meta.title === "string" ? meta.title : "";
|
|
2575
|
-
if (!title) continue;
|
|
2576
|
-
const normalizedTitle = title.toLowerCase().trim();
|
|
2577
|
-
const existing = titleMap.get(normalizedTitle) ?? [];
|
|
2578
|
-
existing.push(page.filePath);
|
|
2579
|
-
titleMap.set(normalizedTitle, existing);
|
|
2580
|
-
}
|
|
2581
|
-
const results = [];
|
|
2582
|
-
for (const [title, files] of titleMap) {
|
|
2583
|
-
if (files.length <= 1) continue;
|
|
2584
|
-
for (const file of files) {
|
|
2585
|
-
results.push({
|
|
2586
|
-
rule: "duplicate-concept",
|
|
2587
|
-
severity: "error",
|
|
2588
|
-
file,
|
|
2589
|
-
message: `Duplicate title "${title}" \u2014 also in ${files.filter((f) => f !== file).join(", ")}`
|
|
2590
|
-
});
|
|
2591
|
-
}
|
|
2592
|
-
}
|
|
2593
|
-
return results;
|
|
2594
|
-
}
|
|
2595
|
-
async function checkEmptyPages(root) {
|
|
2596
|
-
const pages = await collectAllPages(root);
|
|
2597
|
-
const results = [];
|
|
2598
|
-
for (const page of pages) {
|
|
2599
|
-
const { meta, body } = parseFrontmatter(page.content);
|
|
2600
|
-
const hasTitle = typeof meta.title === "string" && meta.title.trim() !== "";
|
|
2601
|
-
const isBodyEmpty = body.trim().length < MIN_BODY_LENGTH;
|
|
2602
|
-
if (hasTitle && isBodyEmpty) {
|
|
2603
|
-
results.push({
|
|
2604
|
-
rule: "empty-page",
|
|
2605
|
-
severity: "warning",
|
|
2606
|
-
file: page.filePath,
|
|
2607
|
-
message: `Page body is empty or too short (< ${MIN_BODY_LENGTH} chars)`
|
|
2608
|
-
});
|
|
2609
|
-
}
|
|
2610
|
-
}
|
|
2611
|
-
return results;
|
|
2612
|
-
}
|
|
2613
|
-
async function checkLowConfidencePages(root) {
|
|
2614
|
-
const pages = await collectAllPages(root);
|
|
2615
|
-
const results = [];
|
|
2616
|
-
for (const page of pages) {
|
|
2617
|
-
const { meta } = parseFrontmatter(page.content);
|
|
2618
|
-
const { confidence } = parseProvenanceMetadata(meta);
|
|
2619
|
-
if (confidence === void 0 || confidence >= LOW_CONFIDENCE_THRESHOLD) continue;
|
|
2620
|
-
results.push({
|
|
2621
|
-
rule: "low-confidence",
|
|
2622
|
-
severity: "warning",
|
|
2623
|
-
file: page.filePath,
|
|
2624
|
-
message: `Page confidence ${confidence.toFixed(2)} is below ${LOW_CONFIDENCE_THRESHOLD}`
|
|
2625
|
-
});
|
|
2626
|
-
}
|
|
2627
|
-
return results;
|
|
2628
|
-
}
|
|
2629
|
-
async function checkContradictedPages(root) {
|
|
2630
|
-
const pages = await collectAllPages(root);
|
|
2631
|
-
const results = [];
|
|
2632
|
-
for (const page of pages) {
|
|
2633
|
-
const { meta } = parseFrontmatter(page.content);
|
|
2634
|
-
const { contradictedBy } = parseProvenanceMetadata(meta);
|
|
2635
|
-
if (!contradictedBy || contradictedBy.length === 0) continue;
|
|
2636
|
-
const slugs = contradictedBy.map((r) => r.slug).join(", ");
|
|
2637
|
-
results.push({
|
|
2638
|
-
rule: "contradicted-page",
|
|
2639
|
-
severity: "warning",
|
|
2640
|
-
file: page.filePath,
|
|
2641
|
-
message: `Page contradicts: ${slugs}`
|
|
2642
|
-
});
|
|
2643
|
-
}
|
|
2644
|
-
return results;
|
|
2645
|
-
}
|
|
2646
|
-
async function checkInferredWithoutCitations(root) {
|
|
2647
|
-
const pages = await collectAllPages(root);
|
|
2648
|
-
const results = [];
|
|
2649
|
-
for (const page of pages) {
|
|
2650
|
-
const { meta, body } = parseFrontmatter(page.content);
|
|
2651
|
-
const provenance = parseProvenanceMetadata(meta);
|
|
2652
|
-
const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
|
|
2653
|
-
if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
|
|
2654
|
-
results.push({
|
|
2655
|
-
rule: "excess-inferred-paragraphs",
|
|
2656
|
-
severity: "warning",
|
|
2657
|
-
file: page.filePath,
|
|
2658
|
-
message: `Page has ${inferred} inferred paragraphs without citations (max ${MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS})`
|
|
2659
|
-
});
|
|
2660
|
-
}
|
|
2661
|
-
return results;
|
|
2662
|
-
}
|
|
2663
|
-
var PROSE_PARAGRAPH_LEAD = /^[A-Za-z]/;
|
|
2664
|
-
function countUncitedProseParagraphs(body) {
|
|
2665
|
-
const paragraphs = body.split(/\n\s*\n/);
|
|
2666
|
-
let count = 0;
|
|
2667
|
-
for (const block of paragraphs) {
|
|
2668
|
-
const trimmed = block.trim();
|
|
2669
|
-
if (trimmed.length === 0) continue;
|
|
2670
|
-
if (!PROSE_PARAGRAPH_LEAD.test(trimmed)) continue;
|
|
2671
|
-
if (CITATION_PATTERN.test(trimmed)) {
|
|
2672
|
-
CITATION_PATTERN.lastIndex = 0;
|
|
2673
|
-
continue;
|
|
2674
|
-
}
|
|
2675
|
-
CITATION_PATTERN.lastIndex = 0;
|
|
2676
|
-
count += 1;
|
|
2677
|
-
}
|
|
2678
|
-
return count;
|
|
2679
|
-
}
|
|
2680
|
-
function splitCitationFilenames(captured) {
|
|
2681
|
-
return captured.split(",").map((s) => s.trim()).filter((s) => s.length > 0);
|
|
2682
|
-
}
|
|
2683
|
-
async function checkBrokenCitations(root) {
|
|
2684
|
-
const pages = await collectAllPages(root);
|
|
2685
|
-
const sourcesDir = path19.join(root, SOURCES_DIR);
|
|
2686
|
-
const results = [];
|
|
2687
|
-
for (const page of pages) {
|
|
2688
|
-
for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
|
|
2689
|
-
for (const filename of splitCitationFilenames(captured)) {
|
|
2690
|
-
const citedPath = path19.join(sourcesDir, filename);
|
|
2691
|
-
if (!existsSync8(citedPath)) {
|
|
2692
|
-
results.push({
|
|
2693
|
-
rule: "broken-citation",
|
|
2694
|
-
severity: "error",
|
|
2695
|
-
file: page.filePath,
|
|
2696
|
-
message: `Broken citation ^[${filename}] \u2014 source file not found`,
|
|
2697
|
-
line
|
|
2698
|
-
});
|
|
2699
|
-
}
|
|
2700
|
-
}
|
|
2701
|
-
}
|
|
2702
|
-
}
|
|
2703
|
-
return results;
|
|
2704
|
-
}
|
|
2705
|
-
|
|
2706
3083
|
// src/linter/index.ts
|
|
2707
|
-
var
|
|
3084
|
+
var RULES_WITHOUT_SCHEMA = [
|
|
2708
3085
|
checkBrokenWikilinks,
|
|
2709
3086
|
checkOrphanedPages,
|
|
2710
3087
|
checkMissingSummaries,
|
|
2711
3088
|
checkDuplicateConcepts,
|
|
2712
3089
|
checkEmptyPages,
|
|
2713
3090
|
checkBrokenCitations,
|
|
3091
|
+
checkMalformedClaimCitations,
|
|
2714
3092
|
checkLowConfidencePages,
|
|
2715
3093
|
checkContradictedPages,
|
|
2716
3094
|
checkInferredWithoutCitations
|
|
2717
3095
|
];
|
|
3096
|
+
var RULES_WITH_SCHEMA = [checkSchemaCrossLinks];
|
|
2718
3097
|
function countBySeverity(results, severity) {
|
|
2719
3098
|
return results.filter((r) => r.severity === severity).length;
|
|
2720
3099
|
}
|
|
2721
3100
|
async function lint(root) {
|
|
2722
|
-
const
|
|
2723
|
-
|
|
2724
|
-
|
|
2725
|
-
|
|
3101
|
+
const schema = await loadSchema(root);
|
|
3102
|
+
const [plainResults, schemaResults] = await Promise.all([
|
|
3103
|
+
Promise.all(RULES_WITHOUT_SCHEMA.map((rule) => rule(root))),
|
|
3104
|
+
Promise.all(RULES_WITH_SCHEMA.map((rule) => rule(root, schema)))
|
|
3105
|
+
]);
|
|
3106
|
+
const results = [...plainResults.flat(), ...schemaResults.flat()];
|
|
2726
3107
|
return {
|
|
2727
3108
|
errors: countBySeverity(results, "error"),
|
|
2728
3109
|
warnings: countBySeverity(results, "warning"),
|
|
@@ -2750,6 +3131,9 @@ function printResult(result) {
|
|
|
2750
3131
|
}
|
|
2751
3132
|
async function lintCommand() {
|
|
2752
3133
|
header("Linting wiki");
|
|
3134
|
+
const schema = await loadSchema(process.cwd());
|
|
3135
|
+
const schemaSource = schema.loadedFrom ?? "defaults (no schema file)";
|
|
3136
|
+
status("i", dim(`Schema: ${schemaSource}`));
|
|
2753
3137
|
const summary = await lint(process.cwd());
|
|
2754
3138
|
for (const result of summary.results) {
|
|
2755
3139
|
printResult(result);
|
|
@@ -2766,6 +3150,36 @@ async function lintCommand() {
|
|
|
2766
3150
|
}
|
|
2767
3151
|
}
|
|
2768
3152
|
|
|
3153
|
+
// src/commands/schema.ts
|
|
3154
|
+
import { existsSync as existsSync10 } from "fs";
|
|
3155
|
+
import { mkdir as mkdir6, writeFile as writeFile5 } from "fs/promises";
|
|
3156
|
+
import path21 from "path";
|
|
3157
|
+
async function schemaInitCommand() {
|
|
3158
|
+
const root = process.cwd();
|
|
3159
|
+
const defaults = buildDefaultSchema();
|
|
3160
|
+
const targetPath = defaultSchemaInitPath(root);
|
|
3161
|
+
if (existsSync10(targetPath)) {
|
|
3162
|
+
status("!", warn(`Schema file already exists at ${targetPath}`));
|
|
3163
|
+
return;
|
|
3164
|
+
}
|
|
3165
|
+
await mkdir6(path21.dirname(targetPath), { recursive: true });
|
|
3166
|
+
const serializable = {
|
|
3167
|
+
version: defaults.version,
|
|
3168
|
+
defaultKind: defaults.defaultKind,
|
|
3169
|
+
kinds: defaults.kinds,
|
|
3170
|
+
seedPages: defaults.seedPages
|
|
3171
|
+
};
|
|
3172
|
+
await writeFile5(targetPath, `${JSON.stringify(serializable, null, 2)}
|
|
3173
|
+
`, "utf-8");
|
|
3174
|
+
status("+", success(`Wrote schema to ${targetPath}`));
|
|
3175
|
+
}
|
|
3176
|
+
async function schemaShowCommand() {
|
|
3177
|
+
const schema = await loadSchema(process.cwd());
|
|
3178
|
+
const loadedFrom = schema.loadedFrom ?? "(defaults \u2014 no schema file found)";
|
|
3179
|
+
header(`Schema (${loadedFrom})`);
|
|
3180
|
+
console.log(serializeSchemaToYaml(schema));
|
|
3181
|
+
}
|
|
3182
|
+
|
|
2769
3183
|
// src/commands/review-list.ts
|
|
2770
3184
|
async function reviewListCommand() {
|
|
2771
3185
|
header("Pending review candidates");
|
|
@@ -2797,10 +3211,17 @@ async function reviewShowCommand(id) {
|
|
|
2797
3211
|
status("i", dim(`generated: ${candidate.generatedAt}`));
|
|
2798
3212
|
console.log();
|
|
2799
3213
|
console.log(candidate.body);
|
|
3214
|
+
if (candidate.schemaViolations && candidate.schemaViolations.length > 0) {
|
|
3215
|
+
console.log();
|
|
3216
|
+
header("Schema violations");
|
|
3217
|
+
for (const v of candidate.schemaViolations) {
|
|
3218
|
+
status("!", warn(`[${v.severity}] ${v.message}`));
|
|
3219
|
+
}
|
|
3220
|
+
}
|
|
2800
3221
|
}
|
|
2801
3222
|
|
|
2802
3223
|
// src/commands/review-approve.ts
|
|
2803
|
-
import
|
|
3224
|
+
import path22 from "path";
|
|
2804
3225
|
|
|
2805
3226
|
// src/commands/review-helpers.ts
|
|
2806
3227
|
async function runReviewUnderLock(id, underLock) {
|
|
@@ -2832,7 +3253,7 @@ async function approveUnderLock(root, id) {
|
|
|
2832
3253
|
process.exitCode = 1;
|
|
2833
3254
|
return;
|
|
2834
3255
|
}
|
|
2835
|
-
const pagePath =
|
|
3256
|
+
const pagePath = path22.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
|
|
2836
3257
|
await atomicWrite(pagePath, candidate.body);
|
|
2837
3258
|
status("+", success(`Approved \u2192 ${source(pagePath)}`));
|
|
2838
3259
|
await persistCandidateSourceStates(root, candidate);
|
|
@@ -2892,7 +3313,7 @@ import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js
|
|
|
2892
3313
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
2893
3314
|
|
|
2894
3315
|
// src/mcp/tools.ts
|
|
2895
|
-
import
|
|
3316
|
+
import path23 from "path";
|
|
2896
3317
|
import { z } from "zod";
|
|
2897
3318
|
|
|
2898
3319
|
// src/mcp/provider-check.ts
|
|
@@ -3022,7 +3443,7 @@ async function pickSearchSlugs(root, question) {
|
|
|
3022
3443
|
if (candidates.length > 0) return candidates.map((c) => c.slug);
|
|
3023
3444
|
} catch {
|
|
3024
3445
|
}
|
|
3025
|
-
const indexContent = await safeReadFile(
|
|
3446
|
+
const indexContent = await safeReadFile(path23.join(root, INDEX_FILE));
|
|
3026
3447
|
const { pages } = await selectPages(question, indexContent);
|
|
3027
3448
|
return pages;
|
|
3028
3449
|
}
|
|
@@ -3071,8 +3492,8 @@ function registerStatusTool(server, root) {
|
|
|
3071
3492
|
);
|
|
3072
3493
|
}
|
|
3073
3494
|
async function collectStatus(root) {
|
|
3074
|
-
const concepts = await collectPageSummaries(
|
|
3075
|
-
const queries = await collectPageSummaries(
|
|
3495
|
+
const concepts = await collectPageSummaries(path23.join(root, CONCEPTS_DIR));
|
|
3496
|
+
const queries = await collectPageSummaries(path23.join(root, QUERIES_DIR));
|
|
3076
3497
|
const state = await readState(root);
|
|
3077
3498
|
const changes = await detectChanges(root, state);
|
|
3078
3499
|
const orphans = await findOrphanedSlugs(root);
|
|
@@ -3089,7 +3510,7 @@ async function collectStatus(root) {
|
|
|
3089
3510
|
};
|
|
3090
3511
|
}
|
|
3091
3512
|
async function findOrphanedSlugs(root) {
|
|
3092
|
-
const scanned = await scanWikiPages(
|
|
3513
|
+
const scanned = await scanWikiPages(path23.join(root, CONCEPTS_DIR));
|
|
3093
3514
|
return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
|
|
3094
3515
|
}
|
|
3095
3516
|
async function loadPageRecords(root, slugs) {
|
|
@@ -3102,7 +3523,7 @@ async function loadPageRecords(root, slugs) {
|
|
|
3102
3523
|
}
|
|
3103
3524
|
async function readPage(root, slug) {
|
|
3104
3525
|
for (const dir of PAGE_DIRS2) {
|
|
3105
|
-
const content = await safeReadFile(
|
|
3526
|
+
const content = await safeReadFile(path23.join(root, dir, `${slug}.md`));
|
|
3106
3527
|
if (!content) continue;
|
|
3107
3528
|
const { meta, body } = parseFrontmatter(content);
|
|
3108
3529
|
if (meta.orphaned) continue;
|
|
@@ -3117,7 +3538,7 @@ async function readPage(root, slug) {
|
|
|
3117
3538
|
}
|
|
3118
3539
|
|
|
3119
3540
|
// src/mcp/resources.ts
|
|
3120
|
-
import
|
|
3541
|
+
import path24 from "path";
|
|
3121
3542
|
import { readdir as readdir9 } from "fs/promises";
|
|
3122
3543
|
import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3123
3544
|
function jsonContent(uri, payload) {
|
|
@@ -3151,7 +3572,7 @@ function registerIndexResource(server, root) {
|
|
|
3151
3572
|
mimeType: "text/markdown"
|
|
3152
3573
|
},
|
|
3153
3574
|
async (uri) => {
|
|
3154
|
-
const content = await safeReadFile(
|
|
3575
|
+
const content = await safeReadFile(path24.join(root, INDEX_FILE));
|
|
3155
3576
|
return { contents: [markdownContent(uri, content)] };
|
|
3156
3577
|
}
|
|
3157
3578
|
);
|
|
@@ -3218,7 +3639,7 @@ function registerQueryResource(server, root) {
|
|
|
3218
3639
|
);
|
|
3219
3640
|
}
|
|
3220
3641
|
async function listSources(root) {
|
|
3221
|
-
const sourcesPath =
|
|
3642
|
+
const sourcesPath = path24.join(root, SOURCES_DIR);
|
|
3222
3643
|
let files;
|
|
3223
3644
|
try {
|
|
3224
3645
|
files = await readdir9(sourcesPath);
|
|
@@ -3227,14 +3648,14 @@ async function listSources(root) {
|
|
|
3227
3648
|
}
|
|
3228
3649
|
const records = [];
|
|
3229
3650
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
3230
|
-
const content = await safeReadFile(
|
|
3651
|
+
const content = await safeReadFile(path24.join(sourcesPath, file));
|
|
3231
3652
|
const { meta } = parseFrontmatter(content);
|
|
3232
3653
|
records.push({ filename: file, ...meta });
|
|
3233
3654
|
}
|
|
3234
3655
|
return records;
|
|
3235
3656
|
}
|
|
3236
3657
|
async function loadPageWithMeta(root, dir, slug) {
|
|
3237
|
-
const filePath =
|
|
3658
|
+
const filePath = path24.join(root, dir, `${slug}.md`);
|
|
3238
3659
|
const content = await safeReadFile(filePath);
|
|
3239
3660
|
if (!content) {
|
|
3240
3661
|
throw new Error(`Page not found: ${dir}/${slug}.md`);
|
|
@@ -3243,7 +3664,7 @@ async function loadPageWithMeta(root, dir, slug) {
|
|
|
3243
3664
|
return { slug, meta, body: body.trim() };
|
|
3244
3665
|
}
|
|
3245
3666
|
async function listPagesUnder(root, dir, scheme) {
|
|
3246
|
-
const pagesPath =
|
|
3667
|
+
const pagesPath = path24.join(root, dir);
|
|
3247
3668
|
let files;
|
|
3248
3669
|
try {
|
|
3249
3670
|
files = await readdir9(pagesPath);
|
|
@@ -3353,6 +3774,23 @@ program.command("lint").description("Run rule-based quality checks against the w
|
|
|
3353
3774
|
process.exit(1);
|
|
3354
3775
|
}
|
|
3355
3776
|
});
|
|
3777
|
+
var schemaCmd = program.command("schema").description("Inspect or initialize the project's wiki schema config");
|
|
3778
|
+
schemaCmd.command("init").description("Write a starter schema file to .llmwiki/schema.json").action(async () => {
|
|
3779
|
+
try {
|
|
3780
|
+
await schemaInitCommand();
|
|
3781
|
+
} catch (err) {
|
|
3782
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3783
|
+
process.exit(1);
|
|
3784
|
+
}
|
|
3785
|
+
});
|
|
3786
|
+
schemaCmd.command("show").description("Print the resolved schema for this project").action(async () => {
|
|
3787
|
+
try {
|
|
3788
|
+
await schemaShowCommand();
|
|
3789
|
+
} catch (err) {
|
|
3790
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3791
|
+
process.exit(1);
|
|
3792
|
+
}
|
|
3793
|
+
});
|
|
3356
3794
|
program.command("serve").description("Start an MCP server exposing wiki tools and resources over stdio").option("--root <dir>", "Project root directory", process.cwd()).action(async (options) => {
|
|
3357
3795
|
try {
|
|
3358
3796
|
await startMCPServer({ root: options.root, version });
|