llm-wiki-compiler 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -10
- package/dist/cli.js +1161 -254
- package/dist/cli.js.map +1 -1
- package/package.json +2 -1
package/dist/cli.js
CHANGED
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
// src/cli.ts
|
|
4
4
|
import "dotenv/config";
|
|
5
|
-
import { createRequire } from "module";
|
|
5
|
+
import { createRequire as createRequire2 } from "module";
|
|
6
6
|
import { Command } from "commander";
|
|
7
7
|
|
|
8
8
|
// src/commands/ingest.ts
|
|
9
|
-
import
|
|
10
|
-
import {
|
|
9
|
+
import path8 from "path";
|
|
10
|
+
import { readFile as readFile7 } from "fs/promises";
|
|
11
11
|
|
|
12
12
|
// src/utils/markdown.ts
|
|
13
13
|
import { writeFile, rename, readFile, mkdir } from "fs/promises";
|
|
@@ -22,7 +22,7 @@ var VALID_PROVENANCE_STATES = /* @__PURE__ */ new Set([
|
|
|
22
22
|
"ambiguous"
|
|
23
23
|
]);
|
|
24
24
|
function slugify(title) {
|
|
25
|
-
return title.toLowerCase().replace(/['']/g, "").replace(/[^\
|
|
25
|
+
return title.toLowerCase().replace(/['']/g, "").replace(/[^\p{L}\p{N}\s-]/gu, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
26
26
|
}
|
|
27
27
|
function buildFrontmatter(fields) {
|
|
28
28
|
const dumped = yaml.dump(fields, { lineWidth: -1, quotingType: '"' }).trimEnd();
|
|
@@ -103,16 +103,11 @@ function parseContradictedBy(raw) {
|
|
|
103
103
|
const refs = raw.map(coerceContradictionEntry).filter((ref) => ref !== null);
|
|
104
104
|
return refs.length > 0 ? refs : void 0;
|
|
105
105
|
}
|
|
106
|
-
function parseInferredParagraphs(raw) {
|
|
107
|
-
if (typeof raw !== "number" || !Number.isInteger(raw) || raw < 0) return void 0;
|
|
108
|
-
return raw;
|
|
109
|
-
}
|
|
110
106
|
function parseProvenanceMetadata(meta) {
|
|
111
107
|
return {
|
|
112
108
|
confidence: parseConfidence(meta.confidence),
|
|
113
109
|
provenanceState: parseProvenanceState(meta.provenanceState),
|
|
114
|
-
contradictedBy: parseContradictedBy(meta.contradictedBy)
|
|
115
|
-
inferredParagraphs: parseInferredParagraphs(meta.inferredParagraphs)
|
|
110
|
+
contradictedBy: parseContradictedBy(meta.contradictedBy)
|
|
116
111
|
};
|
|
117
112
|
}
|
|
118
113
|
function validateWikiPage(content) {
|
|
@@ -123,9 +118,16 @@ function validateWikiPage(content) {
|
|
|
123
118
|
return true;
|
|
124
119
|
}
|
|
125
120
|
|
|
121
|
+
// src/utils/source-writer.ts
|
|
122
|
+
import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "fs/promises";
|
|
123
|
+
import path2 from "path";
|
|
124
|
+
import { createHash } from "crypto";
|
|
125
|
+
|
|
126
126
|
// src/utils/constants.ts
|
|
127
127
|
var MAX_SOURCE_CHARS = 1e5;
|
|
128
128
|
var MIN_SOURCE_CHARS = 50;
|
|
129
|
+
var DEFAULT_PROMPT_BUDGET_CHARS = 2e5;
|
|
130
|
+
var PROMPT_BUDGET_ENV_VAR = "LLMWIKI_PROMPT_BUDGET_CHARS";
|
|
129
131
|
var QUERY_PAGE_LIMIT = 5;
|
|
130
132
|
var COMPILE_CONCURRENCY = 5;
|
|
131
133
|
var RETRY_COUNT = 3;
|
|
@@ -169,6 +171,42 @@ var EMBEDDING_MODELS = {
|
|
|
169
171
|
ollama: "nomic-embed-text"
|
|
170
172
|
};
|
|
171
173
|
|
|
174
|
+
// src/utils/source-writer.ts
|
|
175
|
+
var COLLISION_HASH_LEN = 8;
|
|
176
|
+
function shortHashOfSource(source2) {
|
|
177
|
+
return createHash("sha256").update(source2).digest("hex").slice(0, COLLISION_HASH_LEN);
|
|
178
|
+
}
|
|
179
|
+
async function resolveCollisionFreeFilename(slug, source2) {
|
|
180
|
+
const candidate = `${slug}.md`;
|
|
181
|
+
const candidatePath2 = path2.join(SOURCES_DIR, candidate);
|
|
182
|
+
let existing;
|
|
183
|
+
try {
|
|
184
|
+
existing = await readFile2(candidatePath2, "utf-8");
|
|
185
|
+
} catch (err) {
|
|
186
|
+
const e = err;
|
|
187
|
+
if (e.code === "ENOENT") return candidate;
|
|
188
|
+
throw err;
|
|
189
|
+
}
|
|
190
|
+
const { meta } = parseFrontmatter(existing);
|
|
191
|
+
if (typeof meta.source === "string" && meta.source === source2) {
|
|
192
|
+
return candidate;
|
|
193
|
+
}
|
|
194
|
+
return `${slug}-${shortHashOfSource(source2)}.md`;
|
|
195
|
+
}
|
|
196
|
+
async function saveSource(title, document, source2) {
|
|
197
|
+
const slug = slugify(title);
|
|
198
|
+
if (!slug) {
|
|
199
|
+
throw new Error(
|
|
200
|
+
`Could not derive a filename from title "${title}". The title contains no letter or number characters. Rename the source file to one with at least one letter or digit.`
|
|
201
|
+
);
|
|
202
|
+
}
|
|
203
|
+
await mkdir2(SOURCES_DIR, { recursive: true });
|
|
204
|
+
const filename = await resolveCollisionFreeFilename(slug, source2);
|
|
205
|
+
const destPath = path2.join(SOURCES_DIR, filename);
|
|
206
|
+
await writeFile2(destPath, document, "utf-8");
|
|
207
|
+
return destPath;
|
|
208
|
+
}
|
|
209
|
+
|
|
172
210
|
// src/utils/output.ts
|
|
173
211
|
var RESET = "\x1B[0m";
|
|
174
212
|
var BOLD = "\x1B[1m";
|
|
@@ -244,13 +282,13 @@ async function ingestWeb(url) {
|
|
|
244
282
|
}
|
|
245
283
|
|
|
246
284
|
// src/ingest/file.ts
|
|
247
|
-
import { readFile as
|
|
248
|
-
import
|
|
285
|
+
import { readFile as readFile3 } from "fs/promises";
|
|
286
|
+
import path4 from "path";
|
|
249
287
|
|
|
250
288
|
// src/ingest/shared.ts
|
|
251
|
-
import
|
|
289
|
+
import path3 from "path";
|
|
252
290
|
function titleFromFilename(filePath) {
|
|
253
|
-
const basename =
|
|
291
|
+
const basename = path3.basename(filePath, path3.extname(filePath));
|
|
254
292
|
return basename.replace(/[-_]+/g, " ").trim();
|
|
255
293
|
}
|
|
256
294
|
|
|
@@ -262,20 +300,20 @@ ${text}
|
|
|
262
300
|
\`\`\``;
|
|
263
301
|
}
|
|
264
302
|
async function ingestFile(filePath) {
|
|
265
|
-
const ext =
|
|
303
|
+
const ext = path4.extname(filePath).toLowerCase();
|
|
266
304
|
if (!SUPPORTED_EXTENSIONS.has(ext)) {
|
|
267
305
|
throw new Error(
|
|
268
306
|
`Unsupported file type "${ext}". Only .md and .txt files are supported.`
|
|
269
307
|
);
|
|
270
308
|
}
|
|
271
|
-
const raw = await
|
|
309
|
+
const raw = await readFile3(filePath, "utf-8");
|
|
272
310
|
const title = titleFromFilename(filePath);
|
|
273
311
|
const content = ext === ".md" ? raw : wrapPlainText(raw);
|
|
274
312
|
return { title, content };
|
|
275
313
|
}
|
|
276
314
|
|
|
277
315
|
// src/ingest/pdf.ts
|
|
278
|
-
import { readFile as
|
|
316
|
+
import { readFile as readFile4 } from "fs/promises";
|
|
279
317
|
function resolveTitle(filePath, info2) {
|
|
280
318
|
if (info2 && typeof info2 === "object") {
|
|
281
319
|
const titleField = info2["Title"];
|
|
@@ -287,7 +325,7 @@ function resolveTitle(filePath, info2) {
|
|
|
287
325
|
}
|
|
288
326
|
async function ingestPdf(filePath) {
|
|
289
327
|
const { PDFParse } = await import("pdf-parse");
|
|
290
|
-
const buffer = await
|
|
328
|
+
const buffer = await readFile4(filePath);
|
|
291
329
|
const parser = new PDFParse({ data: new Uint8Array(buffer) });
|
|
292
330
|
try {
|
|
293
331
|
const textResult = await parser.getText();
|
|
@@ -301,8 +339,8 @@ async function ingestPdf(filePath) {
|
|
|
301
339
|
}
|
|
302
340
|
|
|
303
341
|
// src/ingest/image.ts
|
|
304
|
-
import { readFile as
|
|
305
|
-
import
|
|
342
|
+
import { readFile as readFile5 } from "fs/promises";
|
|
343
|
+
import path6 from "path";
|
|
306
344
|
import Anthropic2 from "@anthropic-ai/sdk";
|
|
307
345
|
|
|
308
346
|
// src/providers/anthropic.ts
|
|
@@ -419,7 +457,7 @@ var AnthropicProvider = class {
|
|
|
419
457
|
// src/utils/claude-settings.ts
|
|
420
458
|
import { readFileSync } from "fs";
|
|
421
459
|
import { homedir } from "os";
|
|
422
|
-
import
|
|
460
|
+
import path5 from "path";
|
|
423
461
|
var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
|
|
424
462
|
function isRecord(value) {
|
|
425
463
|
return typeof value === "object" && value !== null;
|
|
@@ -430,7 +468,7 @@ function normalize(value) {
|
|
|
430
468
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
431
469
|
}
|
|
432
470
|
function resolveClaudeSettingsPath(env) {
|
|
433
|
-
return env[CLAUDE_SETTINGS_PATH_ENV] ??
|
|
471
|
+
return env[CLAUDE_SETTINGS_PATH_ENV] ?? path5.join(homedir(), ".claude", "settings.json");
|
|
434
472
|
}
|
|
435
473
|
function readClaudeSettingsFile(settingsPath) {
|
|
436
474
|
try {
|
|
@@ -563,9 +601,9 @@ async function ingestImage(filePath) {
|
|
|
563
601
|
`Image ingest requires the Anthropic provider (vision). Current provider: "${providerName}". Set LLMWIKI_PROVIDER=anthropic and ANTHROPIC_API_KEY to use image ingest.`
|
|
564
602
|
);
|
|
565
603
|
}
|
|
566
|
-
const ext =
|
|
604
|
+
const ext = path6.extname(filePath).toLowerCase();
|
|
567
605
|
const mimeType = mimeTypeForExtension(ext);
|
|
568
|
-
const imageBuffer = await
|
|
606
|
+
const imageBuffer = await readFile5(filePath);
|
|
569
607
|
const imageData = imageBuffer.toString("base64");
|
|
570
608
|
const client = buildClient();
|
|
571
609
|
const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
|
|
@@ -575,8 +613,8 @@ async function ingestImage(filePath) {
|
|
|
575
613
|
}
|
|
576
614
|
|
|
577
615
|
// src/ingest/transcript.ts
|
|
578
|
-
import { readFile as
|
|
579
|
-
import
|
|
616
|
+
import { readFile as readFile6 } from "fs/promises";
|
|
617
|
+
import path7 from "path";
|
|
580
618
|
import { YoutubeTranscript } from "youtube-transcript";
|
|
581
619
|
var YOUTUBE_URL_PATTERN = /^https?:\/\/(www\.)?(youtube\.com\/watch|youtu\.be\/)/;
|
|
582
620
|
var SRT_SEQUENCE_PATTERN = /^\d+$/;
|
|
@@ -661,8 +699,8 @@ async function ingestTranscript(source2) {
|
|
|
661
699
|
if (isYoutubeUrl(source2)) {
|
|
662
700
|
return fetchYoutubeTranscript(source2);
|
|
663
701
|
}
|
|
664
|
-
const ext =
|
|
665
|
-
const raw = await
|
|
702
|
+
const ext = path7.extname(source2).toLowerCase();
|
|
703
|
+
const raw = await readFile6(source2, "utf-8");
|
|
666
704
|
if (ext === ".vtt") return parseVtt(raw, source2);
|
|
667
705
|
if (ext === ".srt") return parseSrt(raw, source2);
|
|
668
706
|
if (ext === ".txt") return parsePlainTranscript(raw, source2);
|
|
@@ -701,7 +739,7 @@ function hasSpeakerDialoguePattern(sample) {
|
|
|
701
739
|
return hasEnoughSpeakers && hasRepeatedSpeaker;
|
|
702
740
|
}
|
|
703
741
|
async function looksLikeTxtTranscript(filePath) {
|
|
704
|
-
const raw = await
|
|
742
|
+
const raw = await readFile7(filePath, "utf-8");
|
|
705
743
|
const sample = raw.slice(0, TXT_SNIFF_BYTES);
|
|
706
744
|
if (hasSpeakerDialoguePattern(sample)) return true;
|
|
707
745
|
const timestampMatches = sample.match(new RegExp(TIMESTAMP_PATTERN2.source, "gm"));
|
|
@@ -741,7 +779,7 @@ function enforceMinContent(content) {
|
|
|
741
779
|
}
|
|
742
780
|
async function detectSourceType(source2) {
|
|
743
781
|
if (!isUrl(source2)) {
|
|
744
|
-
const ext =
|
|
782
|
+
const ext = path8.extname(source2).toLowerCase();
|
|
745
783
|
if (ext === ".pdf") return "pdf";
|
|
746
784
|
if (IMAGE_EXTENSIONS.has(ext)) return "image";
|
|
747
785
|
if (TRANSCRIPT_EXTENSIONS.has(ext)) return "transcript";
|
|
@@ -787,13 +825,6 @@ async function fetchContent(source2, sourceType) {
|
|
|
787
825
|
return ingestFile(source2);
|
|
788
826
|
}
|
|
789
827
|
}
|
|
790
|
-
async function saveSource(title, document) {
|
|
791
|
-
const filename = `${slugify(title)}.md`;
|
|
792
|
-
const destPath = path7.join(SOURCES_DIR, filename);
|
|
793
|
-
await mkdir2(SOURCES_DIR, { recursive: true });
|
|
794
|
-
await writeFile2(destPath, document, "utf-8");
|
|
795
|
-
return destPath;
|
|
796
|
-
}
|
|
797
828
|
async function ingestSource(source2) {
|
|
798
829
|
const sourceType = await detectSourceType(source2);
|
|
799
830
|
status("*", info(`Ingesting [${sourceType}]: ${source2}`));
|
|
@@ -801,9 +832,9 @@ async function ingestSource(source2) {
|
|
|
801
832
|
const result = enforceCharLimit(content);
|
|
802
833
|
enforceMinContent(result.content);
|
|
803
834
|
const document = buildDocument(title, source2, result, sourceType);
|
|
804
|
-
const savedPath = await saveSource(title, document);
|
|
835
|
+
const savedPath = await saveSource(title, document, source2);
|
|
805
836
|
return {
|
|
806
|
-
filename:
|
|
837
|
+
filename: path8.basename(savedPath),
|
|
807
838
|
charCount: result.content.length,
|
|
808
839
|
truncated: result.truncated,
|
|
809
840
|
source: source2,
|
|
@@ -812,7 +843,7 @@ async function ingestSource(source2) {
|
|
|
812
843
|
}
|
|
813
844
|
async function ingest(source2) {
|
|
814
845
|
const result = await ingestSource(source2);
|
|
815
|
-
const savedPath =
|
|
846
|
+
const savedPath = path8.join(SOURCES_DIR, result.filename);
|
|
816
847
|
status(
|
|
817
848
|
"+",
|
|
818
849
|
success(`Saved ${bold(result.filename)} \u2192 ${source(savedPath)}`)
|
|
@@ -820,27 +851,390 @@ async function ingest(source2) {
|
|
|
820
851
|
status("\u2192", dim("Next: llmwiki compile"));
|
|
821
852
|
}
|
|
822
853
|
|
|
854
|
+
// src/commands/ingest-session.ts
|
|
855
|
+
import path12 from "path";
|
|
856
|
+
import { readdir, stat } from "fs/promises";
|
|
857
|
+
|
|
858
|
+
// src/adapters/claude.ts
|
|
859
|
+
import { readFile as readFile8 } from "fs/promises";
|
|
860
|
+
import path9 from "path";
|
|
861
|
+
|
|
862
|
+
// src/adapters/utils.ts
|
|
863
|
+
var MAX_TITLE_CHARS = 80;
|
|
864
|
+
function truncateTitle(text) {
|
|
865
|
+
const trimmed = text.trim();
|
|
866
|
+
return trimmed.length > MAX_TITLE_CHARS ? trimmed.slice(0, MAX_TITLE_CHARS).trimEnd() + "\u2026" : trimmed;
|
|
867
|
+
}
|
|
868
|
+
function resolveSessionTitle(rawTitle, firstUserContent, defaultTitle) {
|
|
869
|
+
if (rawTitle && rawTitle.trim().length > 0) return truncateTitle(rawTitle);
|
|
870
|
+
if (firstUserContent) {
|
|
871
|
+
const firstLine = firstUserContent.split("\n")[0];
|
|
872
|
+
if (firstLine.trim().length > 0) return truncateTitle(firstLine);
|
|
873
|
+
}
|
|
874
|
+
return defaultTitle;
|
|
875
|
+
}
|
|
876
|
+
function parseJsonOrThrow(raw, filePath) {
|
|
877
|
+
try {
|
|
878
|
+
return JSON.parse(raw);
|
|
879
|
+
} catch {
|
|
880
|
+
throw new Error(`Invalid JSON in session file: ${filePath}`);
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
// src/adapters/claude.ts
|
|
885
|
+
var CLAUDE_EXTENSION = ".jsonl";
|
|
886
|
+
var CLAUDE_TYPE_MARKERS = /* @__PURE__ */ new Set(["user", "assistant", "system", "tool_use", "tool_result"]);
|
|
887
|
+
function extractText(content) {
|
|
888
|
+
if (typeof content === "string") return content;
|
|
889
|
+
return content.filter((b) => b.type === "text" && typeof b.text === "string").map((b) => b.text).join("\n");
|
|
890
|
+
}
|
|
891
|
+
function titleFromFirstUserMessage(turns) {
|
|
892
|
+
const firstUser = turns.find((t) => t.role === "user" && t.content.trim().length > 0);
|
|
893
|
+
return resolveSessionTitle(void 0, firstUser?.content, "Claude Session");
|
|
894
|
+
}
|
|
895
|
+
function parseLine(line) {
|
|
896
|
+
try {
|
|
897
|
+
return JSON.parse(line);
|
|
898
|
+
} catch {
|
|
899
|
+
return null;
|
|
900
|
+
}
|
|
901
|
+
}
|
|
902
|
+
function eventToTurn(event) {
|
|
903
|
+
if (!event.message || !event.message.role) return null;
|
|
904
|
+
const role = event.message.role;
|
|
905
|
+
if (role !== "user" && role !== "assistant") return null;
|
|
906
|
+
const content = extractText(event.message.content);
|
|
907
|
+
if (content.trim().length === 0) return null;
|
|
908
|
+
return { role, content, timestamp: event.timestamp };
|
|
909
|
+
}
|
|
910
|
+
var claudeAdapter = {
|
|
911
|
+
name: "claude",
|
|
912
|
+
async detect(filePath) {
|
|
913
|
+
if (path9.extname(filePath).toLowerCase() !== CLAUDE_EXTENSION) return false;
|
|
914
|
+
const raw = await readFile8(filePath, "utf-8").catch(() => "");
|
|
915
|
+
const firstLine = raw.split("\n")[0].trim();
|
|
916
|
+
if (!firstLine.startsWith("{")) return false;
|
|
917
|
+
try {
|
|
918
|
+
const obj = JSON.parse(firstLine);
|
|
919
|
+
return typeof obj.type === "string" && CLAUDE_TYPE_MARKERS.has(obj.type);
|
|
920
|
+
} catch {
|
|
921
|
+
return false;
|
|
922
|
+
}
|
|
923
|
+
},
|
|
924
|
+
async parse(filePath) {
|
|
925
|
+
const raw = await readFile8(filePath, "utf-8");
|
|
926
|
+
const lines = raw.split("\n").filter((l) => l.trim().length > 0);
|
|
927
|
+
if (lines.length === 0) {
|
|
928
|
+
throw new Error(`Claude session file is empty: ${filePath}`);
|
|
929
|
+
}
|
|
930
|
+
const turns = [];
|
|
931
|
+
const timestamps = [];
|
|
932
|
+
for (const [index, line] of lines.entries()) {
|
|
933
|
+
const event = parseLine(line);
|
|
934
|
+
if (event === null) {
|
|
935
|
+
throw new Error(
|
|
936
|
+
`Malformed JSON on line ${index + 1} of Claude session: ${filePath}`
|
|
937
|
+
);
|
|
938
|
+
}
|
|
939
|
+
if (event.timestamp) timestamps.push(event.timestamp);
|
|
940
|
+
const turn = eventToTurn(event);
|
|
941
|
+
if (turn) turns.push(turn);
|
|
942
|
+
}
|
|
943
|
+
const title = titleFromFirstUserMessage(turns);
|
|
944
|
+
return {
|
|
945
|
+
title,
|
|
946
|
+
adapter: "claude",
|
|
947
|
+
startedAt: timestamps[0],
|
|
948
|
+
endedAt: timestamps[timestamps.length - 1],
|
|
949
|
+
participantIdentity: "Claude Code",
|
|
950
|
+
turns
|
|
951
|
+
};
|
|
952
|
+
}
|
|
953
|
+
};
|
|
954
|
+
|
|
955
|
+
// src/adapters/codex.ts
|
|
956
|
+
import { readFile as readFile9 } from "fs/promises";
|
|
957
|
+
import path10 from "path";
|
|
958
|
+
var CODEX_EXTENSION = ".json";
|
|
959
|
+
function unixToIso(ts) {
|
|
960
|
+
return new Date(ts * 1e3).toISOString();
|
|
961
|
+
}
|
|
962
|
+
function extractTurns(mapping) {
|
|
963
|
+
const turns = [];
|
|
964
|
+
for (const node of Object.values(mapping)) {
|
|
965
|
+
const msg = node.message;
|
|
966
|
+
if (!msg) continue;
|
|
967
|
+
const role = msg.author?.role;
|
|
968
|
+
if (role !== "user" && role !== "assistant") continue;
|
|
969
|
+
const content = (msg.content?.parts ?? []).join("\n").trim();
|
|
970
|
+
if (content.length === 0) continue;
|
|
971
|
+
turns.push({
|
|
972
|
+
role,
|
|
973
|
+
content,
|
|
974
|
+
timestamp: msg.create_time != null ? unixToIso(msg.create_time) : void 0
|
|
975
|
+
});
|
|
976
|
+
}
|
|
977
|
+
turns.sort((a, b) => {
|
|
978
|
+
if (!a.timestamp || !b.timestamp) return 0;
|
|
979
|
+
return a.timestamp.localeCompare(b.timestamp);
|
|
980
|
+
});
|
|
981
|
+
return turns;
|
|
982
|
+
}
|
|
983
|
+
function isCodexExport(value) {
|
|
984
|
+
return Array.isArray(value) && value.length > 0 && typeof value[0].mapping === "object";
|
|
985
|
+
}
|
|
986
|
+
var codexAdapter = {
|
|
987
|
+
name: "codex",
|
|
988
|
+
async detect(filePath) {
|
|
989
|
+
if (path10.extname(filePath).toLowerCase() !== CODEX_EXTENSION) return false;
|
|
990
|
+
const raw = await readFile9(filePath, "utf-8").catch(() => "");
|
|
991
|
+
if (raw.trimStart()[0] !== "[") return false;
|
|
992
|
+
try {
|
|
993
|
+
return isCodexExport(JSON.parse(raw));
|
|
994
|
+
} catch {
|
|
995
|
+
return false;
|
|
996
|
+
}
|
|
997
|
+
},
|
|
998
|
+
async parse(filePath) {
|
|
999
|
+
const raw = await readFile9(filePath, "utf-8");
|
|
1000
|
+
const parsed = parseJsonOrThrow(raw, filePath);
|
|
1001
|
+
if (!isCodexExport(parsed)) {
|
|
1002
|
+
throw new Error(
|
|
1003
|
+
`Codex session file does not contain a conversation array: ${filePath}`
|
|
1004
|
+
);
|
|
1005
|
+
}
|
|
1006
|
+
const conv = parsed[0];
|
|
1007
|
+
const turns = extractTurns(conv.mapping ?? {});
|
|
1008
|
+
const firstUser = turns.find((t) => t.role === "user");
|
|
1009
|
+
return {
|
|
1010
|
+
title: resolveSessionTitle(conv.title, firstUser?.content, "Codex Session"),
|
|
1011
|
+
adapter: "codex",
|
|
1012
|
+
startedAt: conv.create_time != null ? unixToIso(conv.create_time) : void 0,
|
|
1013
|
+
endedAt: conv.update_time != null ? unixToIso(conv.update_time) : void 0,
|
|
1014
|
+
participantIdentity: "OpenAI Codex",
|
|
1015
|
+
turns
|
|
1016
|
+
};
|
|
1017
|
+
}
|
|
1018
|
+
};
|
|
1019
|
+
|
|
1020
|
+
// src/adapters/cursor.ts
|
|
1021
|
+
import { readFile as readFile10 } from "fs/promises";
|
|
1022
|
+
import path11 from "path";
|
|
1023
|
+
var CURSOR_EXTENSION = ".json";
|
|
1024
|
+
function isTabsExport(value) {
|
|
1025
|
+
return typeof value === "object" && value !== null && "tabs" in value && Array.isArray(value.tabs);
|
|
1026
|
+
}
|
|
1027
|
+
function isFlatExport(value) {
|
|
1028
|
+
return typeof value === "object" && value !== null && "messages" in value && Array.isArray(value.messages);
|
|
1029
|
+
}
|
|
1030
|
+
function extractMessagesAndTitle(data) {
|
|
1031
|
+
if (isTabsExport(data)) {
|
|
1032
|
+
const tab = data.tabs[0];
|
|
1033
|
+
return { messages: tab?.messages ?? [], title: tab?.title };
|
|
1034
|
+
}
|
|
1035
|
+
return { messages: data.messages, title: data.title };
|
|
1036
|
+
}
|
|
1037
|
+
function toTurns(messages) {
|
|
1038
|
+
const turns = [];
|
|
1039
|
+
for (const msg of messages) {
|
|
1040
|
+
const role = msg.role;
|
|
1041
|
+
if (role !== "user" && role !== "assistant") continue;
|
|
1042
|
+
const content = (msg.content ?? "").trim();
|
|
1043
|
+
if (content.length === 0) continue;
|
|
1044
|
+
turns.push({ role, content, timestamp: msg.timestamp });
|
|
1045
|
+
}
|
|
1046
|
+
return turns;
|
|
1047
|
+
}
|
|
1048
|
+
var cursorAdapter = {
|
|
1049
|
+
name: "cursor",
|
|
1050
|
+
async detect(filePath) {
|
|
1051
|
+
if (path11.extname(filePath).toLowerCase() !== CURSOR_EXTENSION) return false;
|
|
1052
|
+
const raw = await readFile10(filePath, "utf-8").catch(() => "");
|
|
1053
|
+
if (raw.trimStart()[0] !== "{") return false;
|
|
1054
|
+
try {
|
|
1055
|
+
const parsed = JSON.parse(raw);
|
|
1056
|
+
return isTabsExport(parsed) || isFlatExport(parsed);
|
|
1057
|
+
} catch {
|
|
1058
|
+
return false;
|
|
1059
|
+
}
|
|
1060
|
+
},
|
|
1061
|
+
async parse(filePath) {
|
|
1062
|
+
const raw = await readFile10(filePath, "utf-8");
|
|
1063
|
+
const parsed = parseJsonOrThrow(raw, filePath);
|
|
1064
|
+
if (!isTabsExport(parsed) && !isFlatExport(parsed)) {
|
|
1065
|
+
throw new Error(
|
|
1066
|
+
`Cursor session file does not match a known Cursor export schema: ${filePath}`
|
|
1067
|
+
);
|
|
1068
|
+
}
|
|
1069
|
+
const { messages, title: rawTitle } = extractMessagesAndTitle(parsed);
|
|
1070
|
+
const turns = toTurns(messages);
|
|
1071
|
+
const firstUser = turns.find((t) => t.role === "user");
|
|
1072
|
+
const timestamps = turns.filter((t) => t.timestamp != null).map((t) => t.timestamp);
|
|
1073
|
+
return {
|
|
1074
|
+
title: resolveSessionTitle(rawTitle, firstUser?.content, "Cursor Session"),
|
|
1075
|
+
adapter: "cursor",
|
|
1076
|
+
startedAt: timestamps[0],
|
|
1077
|
+
endedAt: timestamps[timestamps.length - 1],
|
|
1078
|
+
participantIdentity: "Cursor AI",
|
|
1079
|
+
turns
|
|
1080
|
+
};
|
|
1081
|
+
}
|
|
1082
|
+
};
|
|
1083
|
+
|
|
1084
|
+
// src/adapters/registry.ts
|
|
1085
|
+
var ADAPTERS = [claudeAdapter, codexAdapter, cursorAdapter];
|
|
1086
|
+
async function detectAdapter(filePath) {
|
|
1087
|
+
for (const adapter of ADAPTERS) {
|
|
1088
|
+
if (await adapter.detect(filePath)) return adapter;
|
|
1089
|
+
}
|
|
1090
|
+
return null;
|
|
1091
|
+
}
|
|
1092
|
+
async function parseSessionFile(filePath) {
|
|
1093
|
+
const adapter = await detectAdapter(filePath);
|
|
1094
|
+
if (!adapter) {
|
|
1095
|
+
throw new Error(
|
|
1096
|
+
`No session adapter recognised the file: ${filePath}
|
|
1097
|
+
Supported formats: ${ADAPTERS.map((a) => a.name).join(", ")}`
|
|
1098
|
+
);
|
|
1099
|
+
}
|
|
1100
|
+
const session = await adapter.parse(filePath);
|
|
1101
|
+
assertSessionHasUsableTurns(session, filePath);
|
|
1102
|
+
return session;
|
|
1103
|
+
}
|
|
1104
|
+
function assertSessionHasUsableTurns(session, filePath) {
|
|
1105
|
+
const hasUsableTurn = session.turns.some(
|
|
1106
|
+
(t) => (t.role === "user" || t.role === "assistant") && t.content.trim().length > 0
|
|
1107
|
+
);
|
|
1108
|
+
if (!hasUsableTurn) {
|
|
1109
|
+
throw new Error(
|
|
1110
|
+
`${session.adapter} session has no usable turns: ${filePath}
|
|
1111
|
+
The file matches the ${session.adapter} export shape, but no user or assistant message with content was found. Re-export the session or delete the file if it is empty.`
|
|
1112
|
+
);
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
function formatSessionAsMarkdown(session) {
|
|
1116
|
+
const lines = [];
|
|
1117
|
+
for (const turn of session.turns) {
|
|
1118
|
+
const label = turn.role === "user" ? "User" : session.participantIdentity ?? "Assistant";
|
|
1119
|
+
const heading = turn.timestamp ? `### ${label} _(${turn.timestamp})_` : `### ${label}`;
|
|
1120
|
+
lines.push(heading);
|
|
1121
|
+
lines.push("");
|
|
1122
|
+
lines.push(turn.content);
|
|
1123
|
+
lines.push("");
|
|
1124
|
+
}
|
|
1125
|
+
return lines.join("\n").trimEnd();
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
// src/commands/ingest-session.ts
|
|
1129
|
+
function buildSessionFrontmatter(session, sourcePath) {
|
|
1130
|
+
const meta = {
|
|
1131
|
+
title: session.title,
|
|
1132
|
+
source: sourcePath,
|
|
1133
|
+
adapter: session.adapter,
|
|
1134
|
+
ingestedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
1135
|
+
};
|
|
1136
|
+
if (session.startedAt) meta.sessionStartedAt = session.startedAt;
|
|
1137
|
+
if (session.endedAt) meta.sessionEndedAt = session.endedAt;
|
|
1138
|
+
if (session.participantIdentity) meta.participant = session.participantIdentity;
|
|
1139
|
+
return buildFrontmatter(meta);
|
|
1140
|
+
}
|
|
1141
|
+
async function saveSessionSource(session, sourcePath) {
|
|
1142
|
+
const frontmatter = buildSessionFrontmatter(session, sourcePath);
|
|
1143
|
+
const body = formatSessionAsMarkdown(session);
|
|
1144
|
+
const document = `${frontmatter}
|
|
1145
|
+
|
|
1146
|
+
${body}
|
|
1147
|
+
`;
|
|
1148
|
+
return saveSource(session.title, document, sourcePath);
|
|
1149
|
+
}
|
|
1150
|
+
async function ingestSessionFile(filePath) {
|
|
1151
|
+
status("*", info(`Ingesting session: ${filePath}`));
|
|
1152
|
+
const session = await parseSessionFile(filePath);
|
|
1153
|
+
const savedPath = await saveSessionSource(session, filePath);
|
|
1154
|
+
status(
|
|
1155
|
+
"+",
|
|
1156
|
+
success(
|
|
1157
|
+
`Saved ${bold(path12.basename(savedPath))} [${session.adapter}] \u2192 ${source(savedPath)}`
|
|
1158
|
+
)
|
|
1159
|
+
);
|
|
1160
|
+
return {
|
|
1161
|
+
filename: path12.basename(savedPath),
|
|
1162
|
+
adapter: session.adapter,
|
|
1163
|
+
title: session.title,
|
|
1164
|
+
source: filePath
|
|
1165
|
+
};
|
|
1166
|
+
}
|
|
1167
|
+
async function listDirectoryFiles(dirPath) {
|
|
1168
|
+
const entries = await readdir(dirPath);
|
|
1169
|
+
const files = [];
|
|
1170
|
+
for (const entry of entries) {
|
|
1171
|
+
const full = path12.join(dirPath, entry);
|
|
1172
|
+
const info2 = await stat(full);
|
|
1173
|
+
if (info2.isFile()) files.push(full);
|
|
1174
|
+
}
|
|
1175
|
+
return files;
|
|
1176
|
+
}
|
|
1177
|
+
async function ingestDirectory(dirPath) {
|
|
1178
|
+
const files = await listDirectoryFiles(dirPath);
|
|
1179
|
+
if (files.length === 0) {
|
|
1180
|
+
throw new Error(`No files found in directory: ${dirPath}`);
|
|
1181
|
+
}
|
|
1182
|
+
status("*", info(`Scanning ${files.length} file(s) in: ${dirPath}`));
|
|
1183
|
+
let imported = 0;
|
|
1184
|
+
let skipped = 0;
|
|
1185
|
+
for (const file of files) {
|
|
1186
|
+
try {
|
|
1187
|
+
await ingestSessionFile(file);
|
|
1188
|
+
imported++;
|
|
1189
|
+
} catch (err) {
|
|
1190
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1191
|
+
status("!", warn(`Skipped ${path12.basename(file)}: ${message}`));
|
|
1192
|
+
skipped++;
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
if (imported === 0) {
|
|
1196
|
+
throw new Error(
|
|
1197
|
+
`No sessions imported from ${dirPath} (${skipped} file(s) skipped). Check that at least one file is in a supported session format.`
|
|
1198
|
+
);
|
|
1199
|
+
}
|
|
1200
|
+
status(
|
|
1201
|
+
"\u2192",
|
|
1202
|
+
dim(`Imported ${imported} session(s), skipped ${skipped}.`)
|
|
1203
|
+
);
|
|
1204
|
+
}
|
|
1205
|
+
async function ingestSession(targetPath) {
|
|
1206
|
+
const info2 = await stat(targetPath).catch(() => {
|
|
1207
|
+
throw new Error(`Path not found: ${targetPath}`);
|
|
1208
|
+
});
|
|
1209
|
+
if (info2.isDirectory()) {
|
|
1210
|
+
await ingestDirectory(targetPath);
|
|
1211
|
+
} else {
|
|
1212
|
+
await ingestSessionFile(targetPath);
|
|
1213
|
+
}
|
|
1214
|
+
status("\u2192", dim("Next: llmwiki compile"));
|
|
1215
|
+
}
|
|
1216
|
+
|
|
823
1217
|
// src/commands/compile.ts
|
|
824
1218
|
import { existsSync as existsSync7 } from "fs";
|
|
825
1219
|
|
|
826
1220
|
// src/compiler/index.ts
|
|
827
|
-
import { readFile as
|
|
828
|
-
import
|
|
1221
|
+
import { readFile as readFile18 } from "fs/promises";
|
|
1222
|
+
import path26 from "path";
|
|
829
1223
|
|
|
830
1224
|
// src/utils/state.ts
|
|
831
|
-
import { readFile as
|
|
1225
|
+
import { readFile as readFile11, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
|
|
832
1226
|
import { existsSync } from "fs";
|
|
833
|
-
import
|
|
1227
|
+
import path13 from "path";
|
|
834
1228
|
function emptyState() {
|
|
835
1229
|
return { version: 1, indexHash: "", sources: {} };
|
|
836
1230
|
}
|
|
837
1231
|
async function readState(root) {
|
|
838
|
-
const filePath =
|
|
1232
|
+
const filePath = path13.join(root, STATE_FILE);
|
|
839
1233
|
if (!existsSync(filePath)) {
|
|
840
1234
|
return emptyState();
|
|
841
1235
|
}
|
|
842
1236
|
try {
|
|
843
|
-
const raw = await
|
|
1237
|
+
const raw = await readFile11(filePath, "utf-8");
|
|
844
1238
|
return JSON.parse(raw);
|
|
845
1239
|
} catch {
|
|
846
1240
|
const bakPath = filePath + ".bak";
|
|
@@ -850,9 +1244,9 @@ async function readState(root) {
|
|
|
850
1244
|
}
|
|
851
1245
|
}
|
|
852
1246
|
async function writeState(root, state) {
|
|
853
|
-
const dir =
|
|
1247
|
+
const dir = path13.join(root, LLMWIKI_DIR);
|
|
854
1248
|
await mkdir3(dir, { recursive: true });
|
|
855
|
-
const filePath =
|
|
1249
|
+
const filePath = path13.join(root, STATE_FILE);
|
|
856
1250
|
const tmpPath = filePath + ".tmp";
|
|
857
1251
|
await writeFile3(tmpPath, JSON.stringify(state, null, 2), "utf-8");
|
|
858
1252
|
await rename2(tmpPath, filePath);
|
|
@@ -869,18 +1263,18 @@ async function removeSourceState(root, sourceFile) {
|
|
|
869
1263
|
}
|
|
870
1264
|
|
|
871
1265
|
// src/compiler/source-state.ts
|
|
872
|
-
import
|
|
1266
|
+
import path15 from "path";
|
|
873
1267
|
|
|
874
1268
|
// src/compiler/hasher.ts
|
|
875
|
-
import { createHash } from "crypto";
|
|
876
|
-
import { readFile as
|
|
877
|
-
import
|
|
1269
|
+
import { createHash as createHash2 } from "crypto";
|
|
1270
|
+
import { readFile as readFile12, readdir as readdir2 } from "fs/promises";
|
|
1271
|
+
import path14 from "path";
|
|
878
1272
|
async function hashFile(filePath) {
|
|
879
|
-
const content = await
|
|
880
|
-
return
|
|
1273
|
+
const content = await readFile12(filePath, "utf-8");
|
|
1274
|
+
return createHash2("sha256").update(content).digest("hex");
|
|
881
1275
|
}
|
|
882
1276
|
async function detectChanges(root, prevState) {
|
|
883
|
-
const sourcesPath =
|
|
1277
|
+
const sourcesPath = path14.join(root, SOURCES_DIR);
|
|
884
1278
|
const currentFiles = await listSourceFiles(sourcesPath);
|
|
885
1279
|
const changes = [];
|
|
886
1280
|
for (const file of currentFiles) {
|
|
@@ -893,14 +1287,14 @@ async function detectChanges(root, prevState) {
|
|
|
893
1287
|
}
|
|
894
1288
|
async function listSourceFiles(sourcesPath) {
|
|
895
1289
|
try {
|
|
896
|
-
const entries = await
|
|
1290
|
+
const entries = await readdir2(sourcesPath);
|
|
897
1291
|
return entries.filter((f) => f.endsWith(".md"));
|
|
898
1292
|
} catch {
|
|
899
1293
|
return [];
|
|
900
1294
|
}
|
|
901
1295
|
}
|
|
902
1296
|
async function classifyFile(root, file, prevState) {
|
|
903
|
-
const filePath =
|
|
1297
|
+
const filePath = path14.join(root, SOURCES_DIR, file);
|
|
904
1298
|
const hash = await hashFile(filePath);
|
|
905
1299
|
const prev = prevState.sources[file];
|
|
906
1300
|
if (!prev) return "new";
|
|
@@ -923,7 +1317,7 @@ async function buildExtractionSourceStates(root, extractions) {
|
|
|
923
1317
|
return snapshot;
|
|
924
1318
|
}
|
|
925
1319
|
async function buildEntry(root, result, compiledAt) {
|
|
926
|
-
const filePath =
|
|
1320
|
+
const filePath = path15.join(root, SOURCES_DIR, result.sourceFile);
|
|
927
1321
|
const hash = await hashFile(filePath);
|
|
928
1322
|
return {
|
|
929
1323
|
hash,
|
|
@@ -1161,8 +1555,8 @@ async function callClaude(options) {
|
|
|
1161
1555
|
}
|
|
1162
1556
|
|
|
1163
1557
|
// src/utils/lock.ts
|
|
1164
|
-
import { open, readFile as
|
|
1165
|
-
import
|
|
1558
|
+
import { open, readFile as readFile13, unlink, mkdir as mkdir4 } from "fs/promises";
|
|
1559
|
+
import path16 from "path";
|
|
1166
1560
|
var RECLAIM_SUFFIX = ".reclaim";
|
|
1167
1561
|
var MAX_ACQUIRE_ATTEMPTS = 2;
|
|
1168
1562
|
function isProcessAlive(pid) {
|
|
@@ -1174,8 +1568,8 @@ function isProcessAlive(pid) {
|
|
|
1174
1568
|
}
|
|
1175
1569
|
}
|
|
1176
1570
|
async function acquireLock(root) {
|
|
1177
|
-
const lockPath =
|
|
1178
|
-
await mkdir4(
|
|
1571
|
+
const lockPath = path16.join(root, LOCK_FILE);
|
|
1572
|
+
await mkdir4(path16.join(root, LLMWIKI_DIR), { recursive: true });
|
|
1179
1573
|
for (let attempt = 0; attempt < MAX_ACQUIRE_ATTEMPTS; attempt++) {
|
|
1180
1574
|
const created = await tryCreateLock(lockPath);
|
|
1181
1575
|
if (created) return true;
|
|
@@ -1238,7 +1632,7 @@ async function tryCreateLock(lockPath) {
|
|
|
1238
1632
|
}
|
|
1239
1633
|
async function isLockStale(lockPath) {
|
|
1240
1634
|
try {
|
|
1241
|
-
const content = await
|
|
1635
|
+
const content = await readFile13(lockPath, "utf-8");
|
|
1242
1636
|
const pid = parseInt(content.trim(), 10);
|
|
1243
1637
|
if (isNaN(pid)) return true;
|
|
1244
1638
|
return !isProcessAlive(pid);
|
|
@@ -1247,14 +1641,32 @@ async function isLockStale(lockPath) {
|
|
|
1247
1641
|
}
|
|
1248
1642
|
}
|
|
1249
1643
|
async function releaseLock(root) {
|
|
1250
|
-
const lockPath =
|
|
1644
|
+
const lockPath = path16.join(root, LOCK_FILE);
|
|
1251
1645
|
try {
|
|
1252
1646
|
await unlink(lockPath);
|
|
1253
1647
|
} catch {
|
|
1254
1648
|
}
|
|
1255
1649
|
}
|
|
1256
1650
|
|
|
1651
|
+
// src/utils/output-language.ts
|
|
1652
|
+
var LANG_ENV_VAR = "LLMWIKI_OUTPUT_LANG";
|
|
1653
|
+
function getOutputLanguage() {
|
|
1654
|
+
const raw = process.env[LANG_ENV_VAR];
|
|
1655
|
+
if (!raw) return null;
|
|
1656
|
+
const trimmed = raw.trim();
|
|
1657
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
1658
|
+
}
|
|
1659
|
+
function languageDirective() {
|
|
1660
|
+
const lang = getOutputLanguage();
|
|
1661
|
+
if (!lang) return "";
|
|
1662
|
+
return `Write the output in ${lang}.`;
|
|
1663
|
+
}
|
|
1664
|
+
|
|
1257
1665
|
// src/compiler/prompts.ts
|
|
1666
|
+
function withLangLine(...lines) {
|
|
1667
|
+
const lang = languageDirective();
|
|
1668
|
+
return lang ? [...lines, lang] : lines;
|
|
1669
|
+
}
|
|
1258
1670
|
var PROVENANCE_STATE_VALUES = [
|
|
1259
1671
|
"extracted",
|
|
1260
1672
|
"merged",
|
|
@@ -1309,10 +1721,6 @@ var CONCEPT_EXTRACTION_TOOL = {
|
|
|
1309
1721
|
required: ["slug"]
|
|
1310
1722
|
},
|
|
1311
1723
|
description: "Slugs of other concepts whose evidence contradicts this one."
|
|
1312
|
-
},
|
|
1313
|
-
inferred_paragraphs: {
|
|
1314
|
-
type: "integer",
|
|
1315
|
-
description: "Estimated number of paragraphs in the page that will be inferred rather than directly cited."
|
|
1316
1724
|
}
|
|
1317
1725
|
},
|
|
1318
1726
|
required: ["concept", "summary", "is_new"]
|
|
@@ -1329,11 +1737,13 @@ Here is the existing wiki index \u2014 avoid duplicating concepts already covere
|
|
|
1329
1737
|
|
|
1330
1738
|
${existingIndex}` : "\n\nNo existing wiki pages yet.";
|
|
1331
1739
|
return [
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1740
|
+
...withLangLine(
|
|
1741
|
+
"You are a knowledge extraction engine. Analyze the following source document",
|
|
1742
|
+
"and identify 3-8 distinct, meaningful concepts worth documenting as wiki pages.",
|
|
1743
|
+
"Each concept should be a standalone topic that someone might look up.",
|
|
1744
|
+
"Focus on key ideas, techniques, patterns, or entities \u2014 not trivial details.",
|
|
1745
|
+
"Use the extract_concepts tool to return your findings."
|
|
1746
|
+
),
|
|
1337
1747
|
"",
|
|
1338
1748
|
"For every concept, emit provenance metadata so downstream tools can reason",
|
|
1339
1749
|
"about reliability:",
|
|
@@ -1343,8 +1753,6 @@ ${existingIndex}` : "\n\nNo existing wiki pages yet.";
|
|
|
1343
1753
|
" or 'ambiguous' if the source is contradictory or unclear.",
|
|
1344
1754
|
" - contradicted_by: slugs of other concepts (in this batch or the index)",
|
|
1345
1755
|
" whose evidence conflicts with this one.",
|
|
1346
|
-
" - inferred_paragraphs: estimated number of paragraphs in the resulting",
|
|
1347
|
-
" page that will be inferred rather than directly citable.",
|
|
1348
1756
|
indexSection,
|
|
1349
1757
|
"\n\n--- SOURCE DOCUMENT ---\n\n",
|
|
1350
1758
|
sourceContent
|
|
@@ -1362,11 +1770,13 @@ Related wiki pages for cross-referencing:
|
|
|
1362
1770
|
|
|
1363
1771
|
${relatedPages}` : "";
|
|
1364
1772
|
return [
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1773
|
+
...withLangLine(
|
|
1774
|
+
`You are a wiki author. Write a clear, well-structured markdown page about "${concept}".`,
|
|
1775
|
+
"Draw facts only from the provided source material.",
|
|
1776
|
+
"Include a ## Sources section at the end listing the source document.",
|
|
1777
|
+
"Suggest [[wikilinks]] to related concepts where appropriate.",
|
|
1778
|
+
"Write in a neutral, informative tone. Be concise but thorough."
|
|
1779
|
+
),
|
|
1370
1780
|
"",
|
|
1371
1781
|
"Source attribution: at the end of each prose paragraph, append a citation",
|
|
1372
1782
|
"marker showing which source file(s) the paragraph drew from.",
|
|
@@ -1383,7 +1793,7 @@ ${relatedPages}` : "";
|
|
|
1383
1793
|
"",
|
|
1384
1794
|
"If a paragraph is your inference rather than a direct extraction, leave it",
|
|
1385
1795
|
"uncited \u2014 downstream lint rules will count uncited paragraphs as 'inferred'",
|
|
1386
|
-
"
|
|
1796
|
+
"so lint can surface excess-inferred-paragraphs warnings on review.",
|
|
1387
1797
|
existingSection,
|
|
1388
1798
|
relatedSection,
|
|
1389
1799
|
"\n\n--- SOURCE MATERIAL ---\n\n",
|
|
@@ -1415,20 +1825,21 @@ function mapRawConcept(c) {
|
|
|
1415
1825
|
tags: Array.isArray(c.tags) ? c.tags : void 0,
|
|
1416
1826
|
confidence: typeof c.confidence === "number" ? c.confidence : void 0,
|
|
1417
1827
|
provenanceState: provenance,
|
|
1418
|
-
contradictedBy: coerceContradictedBy(c.contradicted_by)
|
|
1419
|
-
inferredParagraphs: typeof c.inferred_paragraphs === "number" && Number.isInteger(c.inferred_paragraphs) && c.inferred_paragraphs >= 0 ? c.inferred_paragraphs : void 0
|
|
1828
|
+
contradictedBy: coerceContradictedBy(c.contradicted_by)
|
|
1420
1829
|
};
|
|
1421
1830
|
}
|
|
1422
1831
|
function buildSeedPagePrompt(seed, rule, relatedPagesContent) {
|
|
1423
1832
|
const minLinks = rule.minWikilinks;
|
|
1424
1833
|
const linkExpectation = minLinks > 0 ? `Include at least ${minLinks} [[wikilinks]] to related pages.` : "Use [[wikilinks]] when referencing other pages.";
|
|
1425
1834
|
return [
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1835
|
+
...withLangLine(
|
|
1836
|
+
`You are a wiki author. Write a ${seed.kind} page titled "${seed.title}".`,
|
|
1837
|
+
`Page-kind guidance: ${rule.description}`,
|
|
1838
|
+
`Summary line for context: ${seed.summary}`,
|
|
1839
|
+
"Draw facts only from the related wiki pages provided below.",
|
|
1840
|
+
linkExpectation,
|
|
1841
|
+
"Write in a neutral, informative tone. Be concise but thorough."
|
|
1842
|
+
),
|
|
1432
1843
|
"\n\n--- RELATED PAGES ---\n\n",
|
|
1433
1844
|
relatedPagesContent
|
|
1434
1845
|
].join("\n");
|
|
@@ -1490,8 +1901,8 @@ function buildDefaultSchema() {
|
|
|
1490
1901
|
|
|
1491
1902
|
// src/schema/loader.ts
|
|
1492
1903
|
import { existsSync as existsSync2 } from "fs";
|
|
1493
|
-
import { readFile as
|
|
1494
|
-
import
|
|
1904
|
+
import { readFile as readFile14 } from "fs/promises";
|
|
1905
|
+
import path17 from "path";
|
|
1495
1906
|
import yaml2 from "js-yaml";
|
|
1496
1907
|
var SCHEMA_CANDIDATE_PATHS = [
|
|
1497
1908
|
".llmwiki/schema.json",
|
|
@@ -1502,7 +1913,7 @@ var SCHEMA_CANDIDATE_PATHS = [
|
|
|
1502
1913
|
];
|
|
1503
1914
|
function findSchemaPath(root) {
|
|
1504
1915
|
for (const candidate of SCHEMA_CANDIDATE_PATHS) {
|
|
1505
|
-
const absolute =
|
|
1916
|
+
const absolute = path17.join(root, candidate);
|
|
1506
1917
|
if (existsSync2(absolute)) return absolute;
|
|
1507
1918
|
}
|
|
1508
1919
|
return null;
|
|
@@ -1555,12 +1966,12 @@ async function loadSchema(root) {
|
|
|
1555
1966
|
const defaults = buildDefaultSchema();
|
|
1556
1967
|
const schemaPath = findSchemaPath(root);
|
|
1557
1968
|
if (!schemaPath) return defaults;
|
|
1558
|
-
const raw = await
|
|
1969
|
+
const raw = await readFile14(schemaPath, "utf-8");
|
|
1559
1970
|
const parsed = parseSchemaFile(schemaPath, raw);
|
|
1560
1971
|
return applyOverrides(defaults, parsed, schemaPath);
|
|
1561
1972
|
}
|
|
1562
1973
|
function defaultSchemaInitPath(root) {
|
|
1563
|
-
return
|
|
1974
|
+
return path17.join(root, SCHEMA_CANDIDATE_PATHS[0]);
|
|
1564
1975
|
}
|
|
1565
1976
|
|
|
1566
1977
|
// src/schema/helpers.ts
|
|
@@ -1732,7 +2143,7 @@ async function freezeFailedExtractions(root, results, frozenSlugs) {
|
|
|
1732
2143
|
}
|
|
1733
2144
|
|
|
1734
2145
|
// src/compiler/orphan.ts
|
|
1735
|
-
import
|
|
2146
|
+
import path18 from "path";
|
|
1736
2147
|
async function markOrphaned(root, sourceFile, state) {
|
|
1737
2148
|
const sourceEntry = state.sources[sourceFile];
|
|
1738
2149
|
if (!sourceEntry) return;
|
|
@@ -1758,7 +2169,7 @@ async function orphanUnownedFrozenPages(root, frozenSlugs) {
|
|
|
1758
2169
|
}
|
|
1759
2170
|
}
|
|
1760
2171
|
async function orphanPage(root, slug, reason) {
|
|
1761
|
-
const pagePath =
|
|
2172
|
+
const pagePath = path18.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
1762
2173
|
const content = await safeReadFile(pagePath);
|
|
1763
2174
|
if (!content) return;
|
|
1764
2175
|
const { meta } = parseFrontmatter(content);
|
|
@@ -1769,18 +2180,18 @@ async function orphanPage(root, slug, reason) {
|
|
|
1769
2180
|
}
|
|
1770
2181
|
|
|
1771
2182
|
// src/compiler/resolver.ts
|
|
1772
|
-
import { readdir as
|
|
1773
|
-
import
|
|
2183
|
+
import { readdir as readdir3, readFile as readFile15 } from "fs/promises";
|
|
2184
|
+
import path19 from "path";
|
|
1774
2185
|
import { existsSync as existsSync3 } from "fs";
|
|
1775
2186
|
async function buildTitleIndex(root) {
|
|
1776
|
-
const conceptsDir =
|
|
2187
|
+
const conceptsDir = path19.join(root, CONCEPTS_DIR);
|
|
1777
2188
|
if (!existsSync3(conceptsDir)) return [];
|
|
1778
|
-
const files = await
|
|
2189
|
+
const files = await readdir3(conceptsDir);
|
|
1779
2190
|
const pages = [];
|
|
1780
2191
|
for (const file of files) {
|
|
1781
2192
|
if (!file.endsWith(".md")) continue;
|
|
1782
|
-
const filePath =
|
|
1783
|
-
const content = await
|
|
2193
|
+
const filePath = path19.join(conceptsDir, file);
|
|
2194
|
+
const content = await readFile15(filePath, "utf-8");
|
|
1784
2195
|
const { meta } = parseFrontmatter(content);
|
|
1785
2196
|
if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
|
|
1786
2197
|
pages.push({
|
|
@@ -1866,7 +2277,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
|
|
|
1866
2277
|
let count = 0;
|
|
1867
2278
|
for (const page of titleIndex) {
|
|
1868
2279
|
if (newSlugs.includes(page.slug)) continue;
|
|
1869
|
-
const content = await
|
|
2280
|
+
const content = await readFile15(page.filePath, "utf-8");
|
|
1870
2281
|
const { body } = parseFrontmatter(content);
|
|
1871
2282
|
const linked = addWikilinks(body, newTitles, page.title);
|
|
1872
2283
|
if (linked !== body) {
|
|
@@ -1878,7 +2289,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
|
|
|
1878
2289
|
return count;
|
|
1879
2290
|
}
|
|
1880
2291
|
async function linkPage(page, titleIndex) {
|
|
1881
|
-
const content = await
|
|
2292
|
+
const content = await readFile15(page.filePath, "utf-8");
|
|
1882
2293
|
const { body } = parseFrontmatter(content);
|
|
1883
2294
|
const linked = addWikilinks(body, titleIndex, page.title);
|
|
1884
2295
|
if (linked === body) return false;
|
|
@@ -1888,18 +2299,18 @@ async function linkPage(page, titleIndex) {
|
|
|
1888
2299
|
}
|
|
1889
2300
|
|
|
1890
2301
|
// src/compiler/indexgen.ts
|
|
1891
|
-
import { readdir as
|
|
1892
|
-
import
|
|
2302
|
+
import { readdir as readdir4 } from "fs/promises";
|
|
2303
|
+
import path20 from "path";
|
|
1893
2304
|
async function generateIndex(root) {
|
|
1894
2305
|
status("*", info("Generating index..."));
|
|
1895
|
-
const conceptsPath =
|
|
1896
|
-
const queriesPath =
|
|
2306
|
+
const conceptsPath = path20.join(root, CONCEPTS_DIR);
|
|
2307
|
+
const queriesPath = path20.join(root, QUERIES_DIR);
|
|
1897
2308
|
const concepts = await collectPageSummaries(conceptsPath);
|
|
1898
2309
|
const queries = await collectPageSummaries(queriesPath);
|
|
1899
2310
|
concepts.sort((a, b) => a.title.localeCompare(b.title));
|
|
1900
2311
|
queries.sort((a, b) => a.title.localeCompare(b.title));
|
|
1901
2312
|
const indexContent = buildIndexContent(concepts, queries);
|
|
1902
|
-
const indexPath =
|
|
2313
|
+
const indexPath = path20.join(root, INDEX_FILE);
|
|
1903
2314
|
await atomicWrite(indexPath, indexContent);
|
|
1904
2315
|
const total = concepts.length + queries.length;
|
|
1905
2316
|
status("+", success(`Index updated with ${total} pages.`));
|
|
@@ -1907,13 +2318,13 @@ async function generateIndex(root) {
|
|
|
1907
2318
|
async function scanWikiPages(dirPath) {
|
|
1908
2319
|
let files;
|
|
1909
2320
|
try {
|
|
1910
|
-
files = await
|
|
2321
|
+
files = await readdir4(dirPath);
|
|
1911
2322
|
} catch {
|
|
1912
2323
|
return [];
|
|
1913
2324
|
}
|
|
1914
2325
|
const scanned = [];
|
|
1915
2326
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
1916
|
-
const content = await safeReadFile(
|
|
2327
|
+
const content = await safeReadFile(path20.join(dirPath, file));
|
|
1917
2328
|
const { meta } = parseFrontmatter(content);
|
|
1918
2329
|
scanned.push({ slug: file.replace(/\.md$/, ""), meta });
|
|
1919
2330
|
}
|
|
@@ -1948,9 +2359,45 @@ function buildIndexContent(concepts, queries) {
|
|
|
1948
2359
|
return lines.join("\n");
|
|
1949
2360
|
}
|
|
1950
2361
|
|
|
2362
|
+
// src/compiler/prompt-budget.ts
|
|
2363
|
+
var TRUNCATION_MARKER = "\n\n[\u2026truncated for prompt budget \u2014 see #39\u2026]";
|
|
2364
|
+
function resolvePromptBudgetChars() {
|
|
2365
|
+
const raw = process.env[PROMPT_BUDGET_ENV_VAR];
|
|
2366
|
+
if (!raw) return DEFAULT_PROMPT_BUDGET_CHARS;
|
|
2367
|
+
const parsed = Number.parseInt(raw, 10);
|
|
2368
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_PROMPT_BUDGET_CHARS;
|
|
2369
|
+
return parsed;
|
|
2370
|
+
}
|
|
2371
|
+
function buildBudgetedCombinedContent(concept, slices) {
|
|
2372
|
+
const budget = resolvePromptBudgetChars();
|
|
2373
|
+
const totalRaw = slices.reduce((sum, s) => sum + s.content.length, 0);
|
|
2374
|
+
if (totalRaw <= budget) {
|
|
2375
|
+
return formatSlices(slices);
|
|
2376
|
+
}
|
|
2377
|
+
const perSource = Math.max(1, Math.floor(budget / slices.length));
|
|
2378
|
+
warnTruncation(concept, totalRaw, slices.length, perSource, budget);
|
|
2379
|
+
const trimmed = slices.map(
|
|
2380
|
+
(s) => s.content.length > perSource ? { ...s, content: s.content.slice(0, perSource) + TRUNCATION_MARKER } : s
|
|
2381
|
+
);
|
|
2382
|
+
return formatSlices(trimmed);
|
|
2383
|
+
}
|
|
2384
|
+
function formatSlices(slices) {
|
|
2385
|
+
return slices.map((s) => `--- SOURCE: ${s.file} ---
|
|
2386
|
+
|
|
2387
|
+
${s.content}`).join("\n\n");
|
|
2388
|
+
}
|
|
2389
|
+
function warnTruncation(concept, totalRaw, sourceCount, perSource, budget) {
|
|
2390
|
+
status(
|
|
2391
|
+
"!",
|
|
2392
|
+
warn(
|
|
2393
|
+
`Combined source content for "${concept}" (${totalRaw.toLocaleString()} chars across ${sourceCount} sources) exceeds the ${budget.toLocaleString()}-char prompt budget; truncating each source to ~${perSource.toLocaleString()} chars. Raise via ${PROMPT_BUDGET_ENV_VAR} when running against larger-context models.`
|
|
2394
|
+
)
|
|
2395
|
+
);
|
|
2396
|
+
}
|
|
2397
|
+
|
|
1951
2398
|
// src/compiler/obsidian.ts
|
|
1952
|
-
import { readdir as
|
|
1953
|
-
import
|
|
2399
|
+
import { readdir as readdir5 } from "fs/promises";
|
|
2400
|
+
import path21 from "path";
|
|
1954
2401
|
var ABBREVIATION_MIN_WORDS = 3;
|
|
1955
2402
|
var SWAP_CONJUNCTIONS = [" and ", " or "];
|
|
1956
2403
|
function addObsidianMeta(frontmatter, conceptTitle, tags) {
|
|
@@ -1992,23 +2439,23 @@ function generateAbbreviation(title) {
|
|
|
1992
2439
|
return abbreviation;
|
|
1993
2440
|
}
|
|
1994
2441
|
async function generateMOC(root) {
|
|
1995
|
-
const conceptsPath =
|
|
2442
|
+
const conceptsPath = path21.join(root, CONCEPTS_DIR);
|
|
1996
2443
|
const pages = await loadConceptPages(conceptsPath);
|
|
1997
2444
|
const tagGroups = groupPagesByTag(pages);
|
|
1998
2445
|
const content = buildMOCContent(tagGroups);
|
|
1999
|
-
await atomicWrite(
|
|
2446
|
+
await atomicWrite(path21.join(root, MOC_FILE), content);
|
|
2000
2447
|
}
|
|
2001
2448
|
async function loadConceptPages(conceptsPath) {
|
|
2002
2449
|
let files;
|
|
2003
2450
|
try {
|
|
2004
|
-
files = await
|
|
2451
|
+
files = await readdir5(conceptsPath);
|
|
2005
2452
|
} catch {
|
|
2006
2453
|
return [];
|
|
2007
2454
|
}
|
|
2008
2455
|
const pages = [];
|
|
2009
2456
|
for (const file of files) {
|
|
2010
2457
|
if (!file.endsWith(".md")) continue;
|
|
2011
|
-
const content = await safeReadFile(
|
|
2458
|
+
const content = await safeReadFile(path21.join(conceptsPath, file));
|
|
2012
2459
|
if (!content) continue;
|
|
2013
2460
|
const { meta } = parseFrontmatter(content);
|
|
2014
2461
|
if (meta.orphaned) continue;
|
|
@@ -2059,14 +2506,14 @@ function buildMOCContent(tagGroups) {
|
|
|
2059
2506
|
}
|
|
2060
2507
|
|
|
2061
2508
|
// src/utils/embeddings.ts
|
|
2062
|
-
import { readFile as
|
|
2509
|
+
import { readFile as readFile16, readdir as readdir6 } from "fs/promises";
|
|
2063
2510
|
import { existsSync as existsSync4 } from "fs";
|
|
2064
|
-
import
|
|
2511
|
+
import path22 from "path";
|
|
2065
2512
|
|
|
2066
2513
|
// src/utils/retrieval.ts
|
|
2067
|
-
import { createHash as
|
|
2514
|
+
import { createHash as createHash3 } from "crypto";
|
|
2068
2515
|
function hashChunkText(text) {
|
|
2069
|
-
return
|
|
2516
|
+
return createHash3("sha256").update(text, "utf8").digest("hex").slice(0, 16);
|
|
2070
2517
|
}
|
|
2071
2518
|
function splitIntoChunks(body) {
|
|
2072
2519
|
const paragraphs = extractParagraphs(body);
|
|
@@ -2226,13 +2673,13 @@ function findTopKChunks(queryVec, chunks, k) {
|
|
|
2226
2673
|
return scored.slice(0, k);
|
|
2227
2674
|
}
|
|
2228
2675
|
async function readEmbeddingStore(root) {
|
|
2229
|
-
const filePath =
|
|
2676
|
+
const filePath = path22.join(root, EMBEDDINGS_FILE);
|
|
2230
2677
|
if (!existsSync4(filePath)) return null;
|
|
2231
|
-
const raw = await
|
|
2678
|
+
const raw = await readFile16(filePath, "utf-8");
|
|
2232
2679
|
return JSON.parse(raw);
|
|
2233
2680
|
}
|
|
2234
2681
|
async function writeEmbeddingStore(root, store) {
|
|
2235
|
-
const filePath =
|
|
2682
|
+
const filePath = path22.join(root, EMBEDDINGS_FILE);
|
|
2236
2683
|
await atomicWrite(filePath, JSON.stringify(store, null, 2));
|
|
2237
2684
|
}
|
|
2238
2685
|
async function findRelevantPages(root, question) {
|
|
@@ -2264,10 +2711,10 @@ async function loadActiveStore(root, hasContent) {
|
|
|
2264
2711
|
async function collectPageRecords(root) {
|
|
2265
2712
|
const records = [];
|
|
2266
2713
|
for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
|
|
2267
|
-
const absDir =
|
|
2714
|
+
const absDir = path22.join(root, dir);
|
|
2268
2715
|
let files;
|
|
2269
2716
|
try {
|
|
2270
|
-
files = await
|
|
2717
|
+
files = await readdir6(absDir);
|
|
2271
2718
|
} catch {
|
|
2272
2719
|
continue;
|
|
2273
2720
|
}
|
|
@@ -2279,7 +2726,7 @@ async function collectPageRecords(root) {
|
|
|
2279
2726
|
return records;
|
|
2280
2727
|
}
|
|
2281
2728
|
async function readPageRecord(absDir, file) {
|
|
2282
|
-
const content = await safeReadFile(
|
|
2729
|
+
const content = await safeReadFile(path22.join(absDir, file));
|
|
2283
2730
|
const { meta, body } = parseFrontmatter(content);
|
|
2284
2731
|
if (meta.orphaned || typeof meta.title !== "string") return null;
|
|
2285
2732
|
return {
|
|
@@ -2441,9 +2888,9 @@ function shouldRunEmbedding(modelChanged, toEmbed, previousEntries, previousChun
|
|
|
2441
2888
|
}
|
|
2442
2889
|
|
|
2443
2890
|
// src/compiler/candidates.ts
|
|
2444
|
-
import { readdir as
|
|
2891
|
+
import { readdir as readdir7, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
|
|
2445
2892
|
import { existsSync as existsSync5 } from "fs";
|
|
2446
|
-
import
|
|
2893
|
+
import path23 from "path";
|
|
2447
2894
|
import { randomBytes } from "crypto";
|
|
2448
2895
|
var ID_SUFFIX_BYTES = 4;
|
|
2449
2896
|
var CANDIDATE_EXT = ".json";
|
|
@@ -2452,10 +2899,10 @@ function buildCandidateId(slug) {
|
|
|
2452
2899
|
return `${slug}-${suffix}`;
|
|
2453
2900
|
}
|
|
2454
2901
|
function candidatePath(root, id) {
|
|
2455
|
-
return
|
|
2902
|
+
return path23.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
|
|
2456
2903
|
}
|
|
2457
2904
|
function archivePath(root, id) {
|
|
2458
|
-
return
|
|
2905
|
+
return path23.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
|
|
2459
2906
|
}
|
|
2460
2907
|
async function writeCandidate(root, draft) {
|
|
2461
2908
|
const candidate = {
|
|
@@ -2467,7 +2914,8 @@ async function writeCandidate(root, draft) {
|
|
|
2467
2914
|
body: draft.body,
|
|
2468
2915
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2469
2916
|
...draft.sourceStates ? { sourceStates: draft.sourceStates } : {},
|
|
2470
|
-
...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {}
|
|
2917
|
+
...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {},
|
|
2918
|
+
...draft.provenanceViolations ? { provenanceViolations: draft.provenanceViolations } : {}
|
|
2471
2919
|
};
|
|
2472
2920
|
await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
|
|
2473
2921
|
return candidate;
|
|
@@ -2506,9 +2954,9 @@ function isValidCandidate(value) {
|
|
|
2506
2954
|
return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
|
|
2507
2955
|
}
|
|
2508
2956
|
async function listCandidates(root) {
|
|
2509
|
-
const dir =
|
|
2957
|
+
const dir = path23.join(root, CANDIDATES_DIR);
|
|
2510
2958
|
if (!existsSync5(dir)) return [];
|
|
2511
|
-
const entries = await
|
|
2959
|
+
const entries = await readdir7(dir, { withFileTypes: true });
|
|
2512
2960
|
const candidates = [];
|
|
2513
2961
|
for (const entry of entries) {
|
|
2514
2962
|
if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_EXT)) continue;
|
|
@@ -2533,7 +2981,7 @@ async function archiveCandidate(root, id) {
|
|
|
2533
2981
|
const sourcePath = candidatePath(root, id);
|
|
2534
2982
|
if (!existsSync5(sourcePath)) return false;
|
|
2535
2983
|
const target = archivePath(root, id);
|
|
2536
|
-
await mkdir5(
|
|
2984
|
+
await mkdir5(path23.dirname(target), { recursive: true });
|
|
2537
2985
|
try {
|
|
2538
2986
|
await rename3(sourcePath, target);
|
|
2539
2987
|
} catch {
|
|
@@ -2545,9 +2993,9 @@ async function archiveCandidate(root, id) {
|
|
|
2545
2993
|
}
|
|
2546
2994
|
|
|
2547
2995
|
// src/linter/rules.ts
|
|
2548
|
-
import { readdir as
|
|
2996
|
+
import { readdir as readdir8, readFile as readFile17 } from "fs/promises";
|
|
2549
2997
|
import { existsSync as existsSync6 } from "fs";
|
|
2550
|
-
import
|
|
2998
|
+
import path24 from "path";
|
|
2551
2999
|
var MIN_BODY_LENGTH = 50;
|
|
2552
3000
|
var WIKILINK_PATTERN2 = /\[\[([^\]]+)\]\]/g;
|
|
2553
3001
|
var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
|
|
@@ -2564,26 +3012,26 @@ function findMatchesInContent(content, pattern) {
|
|
|
2564
3012
|
}
|
|
2565
3013
|
async function readMarkdownFiles(dirPath) {
|
|
2566
3014
|
if (!existsSync6(dirPath)) return [];
|
|
2567
|
-
const entries = await
|
|
3015
|
+
const entries = await readdir8(dirPath);
|
|
2568
3016
|
const mdFiles = entries.filter((f) => f.endsWith(".md"));
|
|
2569
3017
|
const results = await Promise.all(
|
|
2570
3018
|
mdFiles.map(async (fileName) => {
|
|
2571
|
-
const filePath =
|
|
2572
|
-
const content = await
|
|
3019
|
+
const filePath = path24.join(dirPath, fileName);
|
|
3020
|
+
const content = await readFile17(filePath, "utf-8");
|
|
2573
3021
|
return { filePath, content };
|
|
2574
3022
|
})
|
|
2575
3023
|
);
|
|
2576
3024
|
return results;
|
|
2577
3025
|
}
|
|
2578
3026
|
async function collectAllPages(root) {
|
|
2579
|
-
const conceptPages = await readMarkdownFiles(
|
|
2580
|
-
const queryPages = await readMarkdownFiles(
|
|
3027
|
+
const conceptPages = await readMarkdownFiles(path24.join(root, CONCEPTS_DIR));
|
|
3028
|
+
const queryPages = await readMarkdownFiles(path24.join(root, QUERIES_DIR));
|
|
2581
3029
|
return [...conceptPages, ...queryPages];
|
|
2582
3030
|
}
|
|
2583
3031
|
function buildPageSlugSet(pages) {
|
|
2584
3032
|
const slugs = /* @__PURE__ */ new Set();
|
|
2585
3033
|
for (const page of pages) {
|
|
2586
|
-
const baseName =
|
|
3034
|
+
const baseName = path24.basename(page.filePath, ".md");
|
|
2587
3035
|
slugs.add(baseName.toLowerCase());
|
|
2588
3036
|
}
|
|
2589
3037
|
return slugs;
|
|
@@ -2730,9 +3178,8 @@ async function checkInferredWithoutCitations(root) {
|
|
|
2730
3178
|
const pages = await collectAllPages(root);
|
|
2731
3179
|
const results = [];
|
|
2732
3180
|
for (const page of pages) {
|
|
2733
|
-
const {
|
|
2734
|
-
const
|
|
2735
|
-
const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
|
|
3181
|
+
const { body } = parseFrontmatter(page.content);
|
|
3182
|
+
const inferred = countUncitedProseParagraphs(body);
|
|
2736
3183
|
if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
|
|
2737
3184
|
results.push({
|
|
2738
3185
|
rule: "excess-inferred-paragraphs",
|
|
@@ -2743,7 +3190,7 @@ async function checkInferredWithoutCitations(root) {
|
|
|
2743
3190
|
}
|
|
2744
3191
|
return results;
|
|
2745
3192
|
}
|
|
2746
|
-
var PROSE_PARAGRAPH_LEAD =
|
|
3193
|
+
var PROSE_PARAGRAPH_LEAD = new RegExp("^\\p{L}", "u");
|
|
2747
3194
|
function countUncitedProseParagraphs(body) {
|
|
2748
3195
|
const paragraphs = body.split(/\n\s*\n/);
|
|
2749
3196
|
let count = 0;
|
|
@@ -2766,18 +3213,7 @@ async function checkSchemaCrossLinks(root, schema) {
|
|
|
2766
3213
|
const pages = await collectAllPages(root);
|
|
2767
3214
|
const results = [];
|
|
2768
3215
|
for (const page of pages) {
|
|
2769
|
-
|
|
2770
|
-
const kind = resolvePageKind(meta.kind, schema);
|
|
2771
|
-
const rule = schema.kinds[kind];
|
|
2772
|
-
if (rule.minWikilinks <= 0) continue;
|
|
2773
|
-
const linkCount = countWikilinks(body);
|
|
2774
|
-
if (linkCount >= rule.minWikilinks) continue;
|
|
2775
|
-
results.push({
|
|
2776
|
-
rule: "schema-cross-link-minimum",
|
|
2777
|
-
severity: "warning",
|
|
2778
|
-
file: page.filePath,
|
|
2779
|
-
message: `Page kind "${kind}" requires at least ${rule.minWikilinks} [[wikilinks]] but only ${linkCount} found.`
|
|
2780
|
-
});
|
|
3216
|
+
results.push(...checkPageCrossLinks(page.content, page.filePath, schema));
|
|
2781
3217
|
}
|
|
2782
3218
|
return results;
|
|
2783
3219
|
}
|
|
@@ -2818,13 +3254,24 @@ function countLines(content) {
|
|
|
2818
3254
|
}
|
|
2819
3255
|
async function checkBrokenCitations(root) {
|
|
2820
3256
|
const pages = await collectAllPages(root);
|
|
2821
|
-
const sourcesDir =
|
|
3257
|
+
const sourcesDir = path24.join(root, SOURCES_DIR);
|
|
2822
3258
|
const results = [];
|
|
2823
3259
|
const lineCountCache = /* @__PURE__ */ new Map();
|
|
2824
3260
|
for (const page of pages) {
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
3261
|
+
const pageFindings = await checkPageBrokenCitations(
|
|
3262
|
+
page.content,
|
|
3263
|
+
page.filePath,
|
|
3264
|
+
sourcesDir,
|
|
3265
|
+
lineCountCache
|
|
3266
|
+
);
|
|
3267
|
+
results.push(...pageFindings);
|
|
3268
|
+
}
|
|
3269
|
+
return results;
|
|
3270
|
+
}
|
|
3271
|
+
async function checkPageBrokenCitations(content, filePath, sourcesDir, lineCountCache = /* @__PURE__ */ new Map()) {
|
|
3272
|
+
const results = [];
|
|
3273
|
+
for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
|
|
3274
|
+
await collectBrokenForMarker(captured, line, filePath, sourcesDir, lineCountCache, results);
|
|
2828
3275
|
}
|
|
2829
3276
|
return results;
|
|
2830
3277
|
}
|
|
@@ -2833,7 +3280,7 @@ async function collectBrokenForMarker(captured, line, pageFile, sourcesDir, line
|
|
|
2833
3280
|
const trimmed = part.trim();
|
|
2834
3281
|
if (trimmed.length === 0) continue;
|
|
2835
3282
|
const filename = stripSpanSuffix(trimmed);
|
|
2836
|
-
const citedPath =
|
|
3283
|
+
const citedPath = path24.join(sourcesDir, filename);
|
|
2837
3284
|
if (!existsSync6(citedPath)) {
|
|
2838
3285
|
out.push({
|
|
2839
3286
|
rule: "broken-citation",
|
|
@@ -2869,25 +3316,30 @@ async function checkMalformedClaimCitations(root) {
|
|
|
2869
3316
|
const pages = await collectAllPages(root);
|
|
2870
3317
|
const results = [];
|
|
2871
3318
|
for (const page of pages) {
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
|
|
2878
|
-
|
|
2879
|
-
|
|
2880
|
-
|
|
2881
|
-
|
|
2882
|
-
|
|
3319
|
+
results.push(...checkPageMalformedCitations(page.content, page.filePath));
|
|
3320
|
+
}
|
|
3321
|
+
return results;
|
|
3322
|
+
}
|
|
3323
|
+
function checkPageMalformedCitations(content, filePath) {
|
|
3324
|
+
const results = [];
|
|
3325
|
+
for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
|
|
3326
|
+
for (const part of captured.split(",")) {
|
|
3327
|
+
if (!isMalformedCitationEntry(part)) continue;
|
|
3328
|
+
results.push({
|
|
3329
|
+
rule: "malformed-claim-citation",
|
|
3330
|
+
severity: "error",
|
|
3331
|
+
file: filePath,
|
|
3332
|
+
message: `Malformed claim citation ^[${captured}] \u2014 expected file.md, file.md:N-N, or file.md#LN-LN`,
|
|
3333
|
+
line
|
|
3334
|
+
});
|
|
2883
3335
|
}
|
|
2884
3336
|
}
|
|
2885
3337
|
return results;
|
|
2886
3338
|
}
|
|
2887
3339
|
|
|
2888
3340
|
// src/compiler/page-renderer.ts
|
|
2889
|
-
import { readdir as
|
|
2890
|
-
import
|
|
3341
|
+
import { readdir as readdir9 } from "fs/promises";
|
|
3342
|
+
import path25 from "path";
|
|
2891
3343
|
|
|
2892
3344
|
// src/compiler/provenance.ts
|
|
2893
3345
|
function addProvenanceMeta(fields, concept) {
|
|
@@ -2900,9 +3352,6 @@ function addProvenanceMeta(fields, concept) {
|
|
|
2900
3352
|
if (concept.contradictedBy && concept.contradictedBy.length > 0) {
|
|
2901
3353
|
fields.contradictedBy = concept.contradictedBy;
|
|
2902
3354
|
}
|
|
2903
|
-
if (typeof concept.inferredParagraphs === "number") {
|
|
2904
|
-
fields.inferredParagraphs = concept.inferredParagraphs;
|
|
2905
|
-
}
|
|
2906
3355
|
}
|
|
2907
3356
|
function reportContradictionWarnings(conceptTitle, concept) {
|
|
2908
3357
|
const refs = concept.contradictedBy;
|
|
@@ -2917,7 +3366,7 @@ function reportContradictionWarnings(conceptTitle, concept) {
|
|
|
2917
3366
|
// src/compiler/page-renderer.ts
|
|
2918
3367
|
var RELATED_PAGE_CONTEXT_LIMIT = 5;
|
|
2919
3368
|
async function renderMergedPageContent(root, entry, schema) {
|
|
2920
|
-
const pagePath =
|
|
3369
|
+
const pagePath = path25.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
2921
3370
|
const existingPage = await safeReadFile(pagePath);
|
|
2922
3371
|
const relatedPages = await loadRelatedPages(root, entry.slug);
|
|
2923
3372
|
const system = buildPagePrompt(
|
|
@@ -2956,17 +3405,17 @@ function buildMergedFrontmatter(entry, existingPage, schema) {
|
|
|
2956
3405
|
return buildFrontmatter(frontmatterFields);
|
|
2957
3406
|
}
|
|
2958
3407
|
async function loadRelatedPages(root, excludeSlug) {
|
|
2959
|
-
const conceptsPath =
|
|
3408
|
+
const conceptsPath = path25.join(root, CONCEPTS_DIR);
|
|
2960
3409
|
let files;
|
|
2961
3410
|
try {
|
|
2962
|
-
files = await
|
|
3411
|
+
files = await readdir9(conceptsPath);
|
|
2963
3412
|
} catch {
|
|
2964
3413
|
return "";
|
|
2965
3414
|
}
|
|
2966
3415
|
const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
|
|
2967
3416
|
const contents = [];
|
|
2968
3417
|
for (const f of related) {
|
|
2969
|
-
const content = await safeReadFile(
|
|
3418
|
+
const content = await safeReadFile(path25.join(conceptsPath, f));
|
|
2970
3419
|
if (!content) continue;
|
|
2971
3420
|
const { meta } = parseFrontmatter(content);
|
|
2972
3421
|
if (meta.orphaned) continue;
|
|
@@ -3020,7 +3469,7 @@ async function generatePagesPhase(root, extractions, frozenSlugs, schema, option
|
|
|
3020
3469
|
return entry;
|
|
3021
3470
|
}))
|
|
3022
3471
|
);
|
|
3023
|
-
return { pages, errors, candidates };
|
|
3472
|
+
return { pages, errors, candidates, seedSlugs: [] };
|
|
3024
3473
|
}
|
|
3025
3474
|
async function persistExtractionStates(root, extractions) {
|
|
3026
3475
|
for (const result of extractions) {
|
|
@@ -3046,12 +3495,13 @@ function summarizeCompile(buckets, generation, extractions, options) {
|
|
|
3046
3495
|
errors.push(`No concepts extracted from ${result.sourceFile}`);
|
|
3047
3496
|
}
|
|
3048
3497
|
}
|
|
3498
|
+
const conceptSlugs = generation.pages.map((entry) => entry.slug);
|
|
3049
3499
|
const baseResult = {
|
|
3050
3500
|
compiled: buckets.toCompile.length,
|
|
3051
3501
|
skipped: buckets.unchanged.length,
|
|
3052
3502
|
deleted: buckets.deleted.length,
|
|
3053
3503
|
concepts: generation.pages.map((entry) => entry.concept.concept),
|
|
3054
|
-
pages: generation.
|
|
3504
|
+
pages: [...conceptSlugs, ...generation.seedSlugs],
|
|
3055
3505
|
errors
|
|
3056
3506
|
};
|
|
3057
3507
|
if (options.review) {
|
|
@@ -3069,12 +3519,21 @@ async function runCompilePipeline(root, options) {
|
|
|
3069
3519
|
if (buckets.toCompile.length === 0 && buckets.deleted.length === 0) {
|
|
3070
3520
|
status("\u2713", success("Nothing to compile \u2014 all sources up to date."));
|
|
3071
3521
|
if (!options.review) {
|
|
3072
|
-
const emptyGeneration = {
|
|
3522
|
+
const emptyGeneration = {
|
|
3523
|
+
pages: [],
|
|
3524
|
+
errors: [],
|
|
3525
|
+
candidates: [],
|
|
3526
|
+
seedSlugs: []
|
|
3527
|
+
};
|
|
3073
3528
|
await generateSeedPages(root, schema, emptyGeneration);
|
|
3074
|
-
await finalizeWiki(root, emptyGeneration.pages);
|
|
3529
|
+
await finalizeWiki(root, emptyGeneration.pages, emptyGeneration.seedSlugs);
|
|
3075
3530
|
return {
|
|
3076
3531
|
...emptyCompileResult(),
|
|
3077
3532
|
skipped: buckets.unchanged.length,
|
|
3533
|
+
// Surface seed-page slugs alongside any errors so downstream
|
|
3534
|
+
// consumers (MCP, embeddings, programmatic callers) can see what
|
|
3535
|
+
// landed even on the no-source-changes early-return path.
|
|
3536
|
+
pages: [...emptyGeneration.seedSlugs],
|
|
3078
3537
|
errors: emptyGeneration.errors
|
|
3079
3538
|
};
|
|
3080
3539
|
}
|
|
@@ -3098,7 +3557,7 @@ async function runCompilePipeline(root, options) {
|
|
|
3098
3557
|
}
|
|
3099
3558
|
await persistFrozenSlugs(root, frozenSlugs, extractions);
|
|
3100
3559
|
await generateSeedPages(root, schema, generation);
|
|
3101
|
-
await finalizeWiki(root, generation.pages);
|
|
3560
|
+
await finalizeWiki(root, generation.pages, generation.seedSlugs);
|
|
3102
3561
|
}
|
|
3103
3562
|
return summarizeCompile(buckets, generation, extractions, options);
|
|
3104
3563
|
}
|
|
@@ -3135,9 +3594,11 @@ async function runExtractionPhases(root, toCompile, state, allChanges) {
|
|
|
3135
3594
|
}
|
|
3136
3595
|
return extractions;
|
|
3137
3596
|
}
|
|
3138
|
-
async function finalizeWiki(root, pages) {
|
|
3139
|
-
const
|
|
3140
|
-
const
|
|
3597
|
+
async function finalizeWiki(root, pages, seedSlugs = []) {
|
|
3598
|
+
const conceptChangedSlugs = pages.map((entry) => entry.slug);
|
|
3599
|
+
const conceptNewSlugs = pages.filter((entry) => entry.concept.is_new).map((entry) => entry.slug);
|
|
3600
|
+
const allChangedSlugs = [...conceptChangedSlugs, ...seedSlugs];
|
|
3601
|
+
const allNewSlugs = [...conceptNewSlugs, ...seedSlugs];
|
|
3141
3602
|
if (allChangedSlugs.length > 0) {
|
|
3142
3603
|
status("\u{1F517}", info("Resolving interlinks..."));
|
|
3143
3604
|
await resolveLinks(root, allChangedSlugs, allNewSlugs);
|
|
@@ -3167,9 +3628,9 @@ function printChangesSummary(changes) {
|
|
|
3167
3628
|
}
|
|
3168
3629
|
async function extractForSource(root, sourceFile) {
|
|
3169
3630
|
status("*", info(`Extracting: ${sourceFile}`));
|
|
3170
|
-
const sourcePath =
|
|
3171
|
-
const sourceContent = await
|
|
3172
|
-
const existingIndex = await safeReadFile(
|
|
3631
|
+
const sourcePath = path26.join(root, SOURCES_DIR, sourceFile);
|
|
3632
|
+
const sourceContent = await readFile18(sourcePath, "utf-8");
|
|
3633
|
+
const existingIndex = await safeReadFile(path26.join(root, INDEX_FILE));
|
|
3173
3634
|
const concepts = await extractConcepts(sourceContent, existingIndex);
|
|
3174
3635
|
if (concepts.length > 0) {
|
|
3175
3636
|
const names = concepts.map((c) => c.concept).join(", ");
|
|
@@ -3192,13 +3653,11 @@ function reconcileConceptMetadata(existing, incoming) {
|
|
|
3192
3653
|
}
|
|
3193
3654
|
}
|
|
3194
3655
|
reconciled.contradictedBy = refs.length > 0 ? refs : void 0;
|
|
3195
|
-
if (typeof incoming.inferredParagraphs === "number") {
|
|
3196
|
-
reconciled.inferredParagraphs = typeof existing.inferredParagraphs === "number" ? Math.max(existing.inferredParagraphs, incoming.inferredParagraphs) : incoming.inferredParagraphs;
|
|
3197
|
-
}
|
|
3198
3656
|
return reconciled;
|
|
3199
3657
|
}
|
|
3200
3658
|
function mergeExtractions(extractions, frozenSlugs) {
|
|
3201
3659
|
const bySlug = /* @__PURE__ */ new Map();
|
|
3660
|
+
const slicesBySlug = /* @__PURE__ */ new Map();
|
|
3202
3661
|
for (const result of extractions) {
|
|
3203
3662
|
if (result.concepts.length === 0) continue;
|
|
3204
3663
|
for (const concept of result.concepts) {
|
|
@@ -3208,23 +3667,28 @@ function mergeExtractions(extractions, frozenSlugs) {
|
|
|
3208
3667
|
if (existing) {
|
|
3209
3668
|
existing.concept = reconcileConceptMetadata(existing.concept, concept);
|
|
3210
3669
|
existing.sourceFiles.push(result.sourceFile);
|
|
3211
|
-
existing.combinedContent += `
|
|
3212
|
-
|
|
3213
|
-
--- SOURCE: ${result.sourceFile} ---
|
|
3214
|
-
|
|
3215
|
-
${result.sourceContent}`;
|
|
3216
3670
|
} else {
|
|
3217
3671
|
bySlug.set(slug, {
|
|
3218
3672
|
slug,
|
|
3219
3673
|
concept,
|
|
3220
3674
|
sourceFiles: [result.sourceFile],
|
|
3221
|
-
combinedContent:
|
|
3222
|
-
|
|
3223
|
-
${result.sourceContent}`
|
|
3675
|
+
combinedContent: ""
|
|
3224
3676
|
});
|
|
3677
|
+
slicesBySlug.set(slug, []);
|
|
3225
3678
|
}
|
|
3679
|
+
slicesBySlug.get(slug).push({
|
|
3680
|
+
file: result.sourceFile,
|
|
3681
|
+
content: result.sourceContent
|
|
3682
|
+
});
|
|
3226
3683
|
}
|
|
3227
3684
|
}
|
|
3685
|
+
for (const merged of bySlug.values()) {
|
|
3686
|
+
const slices = slicesBySlug.get(merged.slug) ?? [];
|
|
3687
|
+
merged.combinedContent = buildBudgetedCombinedContent(
|
|
3688
|
+
merged.concept.concept,
|
|
3689
|
+
slices
|
|
3690
|
+
);
|
|
3691
|
+
}
|
|
3228
3692
|
return Array.from(bySlug.values());
|
|
3229
3693
|
}
|
|
3230
3694
|
async function generateMergedPage(root, entry, schema, options, sourceStates) {
|
|
@@ -3232,13 +3696,18 @@ async function generateMergedPage(root, entry, schema, options, sourceStates) {
|
|
|
3232
3696
|
if (options.review) {
|
|
3233
3697
|
return await persistReviewCandidate(root, entry, fullPage, sourceStates, schema);
|
|
3234
3698
|
}
|
|
3235
|
-
const pagePath =
|
|
3699
|
+
const pagePath = path26.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
3236
3700
|
const error2 = await writePageIfValid(pagePath, fullPage, entry.concept.concept);
|
|
3237
3701
|
return { error: error2 ?? void 0 };
|
|
3238
3702
|
}
|
|
3239
3703
|
async function persistReviewCandidate(root, entry, fullPage, sourceStates, schema) {
|
|
3240
3704
|
const virtualPath = `wiki/concepts/${entry.slug}.md`;
|
|
3241
|
-
const
|
|
3705
|
+
const schemaViolations = checkPageCrossLinks(fullPage, virtualPath, schema);
|
|
3706
|
+
const provenanceViolations = await collectCandidateProvenanceViolations(
|
|
3707
|
+
root,
|
|
3708
|
+
fullPage,
|
|
3709
|
+
virtualPath
|
|
3710
|
+
);
|
|
3242
3711
|
const candidate = await writeCandidate(root, {
|
|
3243
3712
|
title: entry.concept.concept,
|
|
3244
3713
|
slug: entry.slug,
|
|
@@ -3246,21 +3715,35 @@ async function persistReviewCandidate(root, entry, fullPage, sourceStates, schem
|
|
|
3246
3715
|
sources: entry.sourceFiles,
|
|
3247
3716
|
body: fullPage,
|
|
3248
3717
|
sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles),
|
|
3249
|
-
schemaViolations:
|
|
3718
|
+
schemaViolations: schemaViolations.length > 0 ? schemaViolations : void 0,
|
|
3719
|
+
provenanceViolations: provenanceViolations.length > 0 ? provenanceViolations : void 0
|
|
3250
3720
|
});
|
|
3251
3721
|
status("?", info(`Candidate ready: ${candidate.id} (${entry.slug})`));
|
|
3252
3722
|
return { candidateId: candidate.id };
|
|
3253
3723
|
}
|
|
3724
|
+
async function collectCandidateProvenanceViolations(root, fullPage, virtualPath) {
|
|
3725
|
+
const malformed = checkPageMalformedCitations(fullPage, virtualPath);
|
|
3726
|
+
const broken = await checkPageBrokenCitations(
|
|
3727
|
+
fullPage,
|
|
3728
|
+
virtualPath,
|
|
3729
|
+
path26.join(root, SOURCES_DIR)
|
|
3730
|
+
);
|
|
3731
|
+
return [...malformed, ...broken];
|
|
3732
|
+
}
|
|
3254
3733
|
async function generateSeedPages(root, schema, generation) {
|
|
3255
3734
|
if (schema.seedPages.length === 0) return;
|
|
3256
3735
|
for (const seed of schema.seedPages) {
|
|
3257
|
-
const
|
|
3258
|
-
if (
|
|
3736
|
+
const result = await generateSingleSeedPage(root, schema, seed);
|
|
3737
|
+
if (result.error) {
|
|
3738
|
+
generation.errors.push(result.error);
|
|
3739
|
+
continue;
|
|
3740
|
+
}
|
|
3741
|
+
generation.seedSlugs.push(result.slug);
|
|
3259
3742
|
}
|
|
3260
3743
|
}
|
|
3261
3744
|
async function generateSingleSeedPage(root, schema, seed) {
|
|
3262
3745
|
const slug = slugify(seed.title);
|
|
3263
|
-
const pagePath =
|
|
3746
|
+
const pagePath = path26.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
3264
3747
|
const relatedContent = await loadSeedRelatedPages(root, seed.relatedSlugs ?? []);
|
|
3265
3748
|
const rule = schema.kinds[seed.kind];
|
|
3266
3749
|
const system = buildSeedPagePrompt(seed, rule, relatedContent);
|
|
@@ -3283,16 +3766,17 @@ async function generateSingleSeedPage(root, schema, seed) {
|
|
|
3283
3766
|
const frontmatterFields = { ...typedFields };
|
|
3284
3767
|
addObsidianMeta(frontmatterFields, seed.title, []);
|
|
3285
3768
|
const frontmatter = buildFrontmatter(frontmatterFields);
|
|
3286
|
-
|
|
3769
|
+
const error2 = await writePageIfValid(pagePath, `${frontmatter}
|
|
3287
3770
|
|
|
3288
3771
|
${pageBody}
|
|
3289
3772
|
`, seed.title);
|
|
3773
|
+
return error2 ? { slug, error: error2 } : { slug };
|
|
3290
3774
|
}
|
|
3291
3775
|
async function loadSeedRelatedPages(root, slugs) {
|
|
3292
3776
|
if (slugs.length === 0) return "";
|
|
3293
3777
|
const contents = [];
|
|
3294
3778
|
for (const slug of slugs) {
|
|
3295
|
-
const pagePath =
|
|
3779
|
+
const pagePath = path26.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
3296
3780
|
const content = await safeReadFile(pagePath);
|
|
3297
3781
|
if (content) contents.push(content);
|
|
3298
3782
|
}
|
|
@@ -3347,7 +3831,7 @@ async function compileCommand(options = {}) {
|
|
|
3347
3831
|
|
|
3348
3832
|
// src/commands/query.ts
|
|
3349
3833
|
import { existsSync as existsSync8 } from "fs";
|
|
3350
|
-
import
|
|
3834
|
+
import path27 from "path";
|
|
3351
3835
|
var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
|
|
3352
3836
|
var PAGE_SELECTION_TOOL = {
|
|
3353
3837
|
name: "select_pages",
|
|
@@ -3404,7 +3888,7 @@ async function selectRelevantPages(root, question, debug) {
|
|
|
3404
3888
|
const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
|
|
3405
3889
|
return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2, chunks: [] };
|
|
3406
3890
|
}
|
|
3407
|
-
const indexContent = await safeReadFile(
|
|
3891
|
+
const indexContent = await safeReadFile(path27.join(root, INDEX_FILE));
|
|
3408
3892
|
const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
|
|
3409
3893
|
return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning, chunks: [] };
|
|
3410
3894
|
}
|
|
@@ -3496,7 +3980,7 @@ async function loadSelectedPages(root, slugs) {
|
|
|
3496
3980
|
for (const slug of slugs) {
|
|
3497
3981
|
let content = "";
|
|
3498
3982
|
for (const dir of PAGE_DIRS) {
|
|
3499
|
-
const candidate = await safeReadFile(
|
|
3983
|
+
const candidate = await safeReadFile(path27.join(root, dir, `${slug}.md`));
|
|
3500
3984
|
if (!candidate) continue;
|
|
3501
3985
|
const { meta } = parseFrontmatter(candidate);
|
|
3502
3986
|
if (meta.orphaned) continue;
|
|
@@ -3512,7 +3996,11 @@ ${content}`);
|
|
|
3512
3996
|
}
|
|
3513
3997
|
return sections.join("\n\n");
|
|
3514
3998
|
}
|
|
3515
|
-
var
|
|
3999
|
+
var ANSWER_SYSTEM_PROMPT_BASE = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
|
|
4000
|
+
function buildAnswerSystemPrompt() {
|
|
4001
|
+
const lang = languageDirective();
|
|
4002
|
+
return lang ? `${ANSWER_SYSTEM_PROMPT_BASE} ${lang}` : ANSWER_SYSTEM_PROMPT_BASE;
|
|
4003
|
+
}
|
|
3516
4004
|
async function callAnswerLLM(question, pagesContent, chunks, onToken) {
|
|
3517
4005
|
const provenance = chunks.length > 0 ? buildChunkProvenance(chunks) : "";
|
|
3518
4006
|
const userMessage = `Question: ${question}
|
|
@@ -3520,7 +4008,7 @@ async function callAnswerLLM(question, pagesContent, chunks, onToken) {
|
|
|
3520
4008
|
Relevant wiki pages:
|
|
3521
4009
|
${pagesContent}${provenance}`;
|
|
3522
4010
|
return callClaude({
|
|
3523
|
-
system:
|
|
4011
|
+
system: buildAnswerSystemPrompt(),
|
|
3524
4012
|
messages: [{ role: "user", content: userMessage }],
|
|
3525
4013
|
stream: Boolean(onToken),
|
|
3526
4014
|
onToken
|
|
@@ -3543,7 +4031,7 @@ function summarizeAnswer(answer) {
|
|
|
3543
4031
|
}
|
|
3544
4032
|
async function saveQueryPage(root, question, answer) {
|
|
3545
4033
|
const slug = slugify(question);
|
|
3546
|
-
const filePath =
|
|
4034
|
+
const filePath = path27.join(root, QUERIES_DIR, `${slug}.md`);
|
|
3547
4035
|
const frontmatter = buildFrontmatter({
|
|
3548
4036
|
title: question,
|
|
3549
4037
|
summary: summarizeAnswer(answer),
|
|
@@ -3569,7 +4057,7 @@ ${answer}
|
|
|
3569
4057
|
return slug;
|
|
3570
4058
|
}
|
|
3571
4059
|
async function generateAnswer(root, question, options = {}) {
|
|
3572
|
-
if (!existsSync8(
|
|
4060
|
+
if (!existsSync8(path27.join(root, INDEX_FILE))) {
|
|
3573
4061
|
throw new Error("Wiki index not found. Run `llmwiki compile` first.");
|
|
3574
4062
|
}
|
|
3575
4063
|
const selection = await selectRelevantPages(root, question, Boolean(options.debug));
|
|
@@ -3597,7 +4085,7 @@ function buildEmptyResult(selection) {
|
|
|
3597
4085
|
};
|
|
3598
4086
|
}
|
|
3599
4087
|
async function queryCommand(root, question, options) {
|
|
3600
|
-
if (!existsSync8(
|
|
4088
|
+
if (!existsSync8(path27.join(root, INDEX_FILE))) {
|
|
3601
4089
|
status("!", error("Wiki index not found. Run `llmwiki compile` first."));
|
|
3602
4090
|
return;
|
|
3603
4091
|
}
|
|
@@ -3648,10 +4136,10 @@ var DEBUG_CHUNK_PREVIEW_CHARS = 120;
|
|
|
3648
4136
|
// src/commands/watch.ts
|
|
3649
4137
|
import { watch as chokidarWatch } from "chokidar";
|
|
3650
4138
|
import { existsSync as existsSync9 } from "fs";
|
|
3651
|
-
import
|
|
4139
|
+
import path28 from "path";
|
|
3652
4140
|
var DEBOUNCE_MS = 500;
|
|
3653
4141
|
async function watchCommand() {
|
|
3654
|
-
const sourcesPath =
|
|
4142
|
+
const sourcesPath = path28.resolve(SOURCES_DIR);
|
|
3655
4143
|
if (!existsSync9(sourcesPath)) {
|
|
3656
4144
|
status(
|
|
3657
4145
|
"!",
|
|
@@ -3686,7 +4174,7 @@ async function watchCommand() {
|
|
|
3686
4174
|
const scheduleCompile = (eventPath, event) => {
|
|
3687
4175
|
status(
|
|
3688
4176
|
"~",
|
|
3689
|
-
dim(`${event}: ${
|
|
4177
|
+
dim(`${event}: ${path28.basename(eventPath)}`)
|
|
3690
4178
|
);
|
|
3691
4179
|
if (debounceTimer) clearTimeout(debounceTimer);
|
|
3692
4180
|
debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
|
|
@@ -3770,10 +4258,388 @@ async function lintCommand() {
|
|
|
3770
4258
|
}
|
|
3771
4259
|
}
|
|
3772
4260
|
|
|
4261
|
+
// src/commands/export.ts
|
|
4262
|
+
import path30 from "path";
|
|
4263
|
+
import { createRequire } from "module";
|
|
4264
|
+
|
|
4265
|
+
// src/export/collect.ts
|
|
4266
|
+
import { readdir as readdir10, readFile as readFile19 } from "fs/promises";
|
|
4267
|
+
import path29 from "path";
|
|
4268
|
+
var WIKILINK_RE = /\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g;
|
|
4269
|
+
function extractWikilinkSlugs(body) {
|
|
4270
|
+
const slugs = /* @__PURE__ */ new Set();
|
|
4271
|
+
let match;
|
|
4272
|
+
while ((match = WIKILINK_RE.exec(body)) !== null) {
|
|
4273
|
+
slugs.add(slugify(match[1].trim()));
|
|
4274
|
+
}
|
|
4275
|
+
return [...slugs];
|
|
4276
|
+
}
|
|
4277
|
+
async function parsePageFile(filePath, slug, pageDirectory) {
|
|
4278
|
+
let raw;
|
|
4279
|
+
try {
|
|
4280
|
+
raw = await readFile19(filePath, "utf-8");
|
|
4281
|
+
} catch {
|
|
4282
|
+
return null;
|
|
4283
|
+
}
|
|
4284
|
+
const { meta, body } = parseFrontmatter(raw);
|
|
4285
|
+
if (!meta.title || typeof meta.title !== "string") return null;
|
|
4286
|
+
if (meta.orphaned === true) return null;
|
|
4287
|
+
return {
|
|
4288
|
+
title: meta.title,
|
|
4289
|
+
slug,
|
|
4290
|
+
pageDirectory,
|
|
4291
|
+
summary: typeof meta.summary === "string" ? meta.summary : "",
|
|
4292
|
+
sources: Array.isArray(meta.sources) ? meta.sources.filter((s) => typeof s === "string") : [],
|
|
4293
|
+
tags: Array.isArray(meta.tags) ? meta.tags.filter((t) => typeof t === "string") : [],
|
|
4294
|
+
createdAt: typeof meta.createdAt === "string" ? meta.createdAt : (/* @__PURE__ */ new Date()).toISOString(),
|
|
4295
|
+
updatedAt: typeof meta.updatedAt === "string" ? meta.updatedAt : (/* @__PURE__ */ new Date()).toISOString(),
|
|
4296
|
+
links: extractWikilinkSlugs(body),
|
|
4297
|
+
body
|
|
4298
|
+
};
|
|
4299
|
+
}
|
|
4300
|
+
async function collectFromDir(dirPath, pageDirectory) {
|
|
4301
|
+
let files;
|
|
4302
|
+
try {
|
|
4303
|
+
files = await readdir10(dirPath);
|
|
4304
|
+
} catch {
|
|
4305
|
+
return [];
|
|
4306
|
+
}
|
|
4307
|
+
const pages = [];
|
|
4308
|
+
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
4309
|
+
const slug = file.replace(/\.md$/, "");
|
|
4310
|
+
const page = await parsePageFile(path29.join(dirPath, file), slug, pageDirectory);
|
|
4311
|
+
if (page) pages.push(page);
|
|
4312
|
+
}
|
|
4313
|
+
return pages;
|
|
4314
|
+
}
|
|
4315
|
+
async function collectExportPages(root) {
|
|
4316
|
+
const conceptsPath = path29.join(root, CONCEPTS_DIR);
|
|
4317
|
+
const queriesPath = path29.join(root, QUERIES_DIR);
|
|
4318
|
+
const [concepts, queries] = await Promise.all([
|
|
4319
|
+
collectFromDir(conceptsPath, "concepts"),
|
|
4320
|
+
collectFromDir(queriesPath, "queries")
|
|
4321
|
+
]);
|
|
4322
|
+
const all = [...concepts, ...queries];
|
|
4323
|
+
all.sort((a, b) => a.title.localeCompare(b.title));
|
|
4324
|
+
return all;
|
|
4325
|
+
}
|
|
4326
|
+
|
|
4327
|
+
// src/export/llms-txt.ts
|
|
4328
|
+
function pageRelativePath(page) {
|
|
4329
|
+
return `wiki/${page.pageDirectory}/${page.slug}.md`;
|
|
4330
|
+
}
|
|
4331
|
+
function buildEntryNote(page) {
|
|
4332
|
+
const parts = [];
|
|
4333
|
+
if (page.summary) parts.push(page.summary);
|
|
4334
|
+
if (page.tags.length > 0) parts.push(`tags: ${page.tags.join(", ")}`);
|
|
4335
|
+
if (page.sources.length > 0) parts.push(`sources: ${page.sources.join(", ")}`);
|
|
4336
|
+
parts.push(`created: ${page.createdAt}`);
|
|
4337
|
+
parts.push(`updated: ${page.updatedAt}`);
|
|
4338
|
+
return parts.join(" | ");
|
|
4339
|
+
}
|
|
4340
|
+
function formatPageEntry(page) {
|
|
4341
|
+
const note = buildEntryNote(page);
|
|
4342
|
+
return `- [${page.title}](${pageRelativePath(page)}): ${note}`;
|
|
4343
|
+
}
|
|
4344
|
+
function buildSection(heading, pages) {
|
|
4345
|
+
if (pages.length === 0) return [];
|
|
4346
|
+
return [`## ${heading}`, "", ...pages.map(formatPageEntry), ""];
|
|
4347
|
+
}
|
|
4348
|
+
function buildLlmsTxt(pages, projectTitle) {
|
|
4349
|
+
const concepts = pages.filter((p) => p.pageDirectory === "concepts");
|
|
4350
|
+
const queries = pages.filter((p) => p.pageDirectory === "queries");
|
|
4351
|
+
const lines = [
|
|
4352
|
+
`# ${projectTitle}`,
|
|
4353
|
+
"",
|
|
4354
|
+
`> ${pages.length} pages \u2014 exported ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
4355
|
+
"",
|
|
4356
|
+
...buildSection("Concepts", concepts),
|
|
4357
|
+
...buildSection("Saved Queries", queries)
|
|
4358
|
+
];
|
|
4359
|
+
return lines.join("\n");
|
|
4360
|
+
}
|
|
4361
|
+
function buildLlmsFullTxt(pages, projectTitle) {
|
|
4362
|
+
const sections = [buildLlmsTxt(pages, projectTitle)];
|
|
4363
|
+
for (const page of pages) {
|
|
4364
|
+
const tags = page.tags.length > 0 ? `
|
|
4365
|
+
Tags: ${page.tags.join(", ")}` : "";
|
|
4366
|
+
const sources = page.sources.length > 0 ? `
|
|
4367
|
+
Sources: ${page.sources.join(", ")}` : "";
|
|
4368
|
+
const header2 = [
|
|
4369
|
+
"---",
|
|
4370
|
+
`## ${page.title}`,
|
|
4371
|
+
`> ${page.summary}${tags}${sources}`,
|
|
4372
|
+
`Created: ${page.createdAt} | Updated: ${page.updatedAt}`,
|
|
4373
|
+
""
|
|
4374
|
+
].join("\n");
|
|
4375
|
+
sections.push(`${header2}
|
|
4376
|
+
${page.body.trim()}
|
|
4377
|
+
`);
|
|
4378
|
+
}
|
|
4379
|
+
return sections.join("\n");
|
|
4380
|
+
}
|
|
4381
|
+
|
|
4382
|
+
// src/export/json-export.ts
|
|
4383
|
+
function buildJsonExport(pages) {
|
|
4384
|
+
const doc = {
|
|
4385
|
+
exportedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
4386
|
+
pageCount: pages.length,
|
|
4387
|
+
pages
|
|
4388
|
+
};
|
|
4389
|
+
return JSON.stringify(doc, null, 2);
|
|
4390
|
+
}
|
|
4391
|
+
|
|
4392
|
+
// src/export/json-ld.ts
|
|
4393
|
+
var LOCAL_BASE = "urn:llmwiki:";
|
|
4394
|
+
function pageIri(slug) {
|
|
4395
|
+
return `${LOCAL_BASE}${slug}`;
|
|
4396
|
+
}
|
|
4397
|
+
function pageToJsonLd(page) {
|
|
4398
|
+
const node = {
|
|
4399
|
+
"@id": pageIri(page.slug),
|
|
4400
|
+
"@type": "Article",
|
|
4401
|
+
name: page.title,
|
|
4402
|
+
description: page.summary,
|
|
4403
|
+
dateCreated: page.createdAt,
|
|
4404
|
+
dateModified: page.updatedAt
|
|
4405
|
+
};
|
|
4406
|
+
if (page.tags.length > 0) {
|
|
4407
|
+
node["keywords"] = page.tags;
|
|
4408
|
+
}
|
|
4409
|
+
if (page.sources.length > 0) {
|
|
4410
|
+
node["isBasedOn"] = page.sources;
|
|
4411
|
+
}
|
|
4412
|
+
if (page.links.length > 0) {
|
|
4413
|
+
node["mentions"] = page.links.map((slug) => ({ "@id": pageIri(slug) }));
|
|
4414
|
+
}
|
|
4415
|
+
return node;
|
|
4416
|
+
}
|
|
4417
|
+
function buildJsonLd(pages) {
|
|
4418
|
+
const doc = {
|
|
4419
|
+
"@context": "https://schema.org",
|
|
4420
|
+
"@graph": pages.map(pageToJsonLd)
|
|
4421
|
+
};
|
|
4422
|
+
return JSON.stringify(doc, null, 2);
|
|
4423
|
+
}
|
|
4424
|
+
|
|
4425
|
+
// src/export/graphml.ts
|
|
4426
|
+
var XML_ESCAPES = {
|
|
4427
|
+
"&": "&",
|
|
4428
|
+
"<": "<",
|
|
4429
|
+
">": ">",
|
|
4430
|
+
'"': """,
|
|
4431
|
+
"'": "'"
|
|
4432
|
+
};
|
|
4433
|
+
function escapeXml(value) {
|
|
4434
|
+
return value.replace(/[&<>"']/g, (ch) => XML_ESCAPES[ch] ?? ch);
|
|
4435
|
+
}
|
|
4436
|
+
var KEY_DEFS = [
|
|
4437
|
+
'<key id="title" for="node" attr.name="title" attr.type="string"/>',
|
|
4438
|
+
'<key id="summary" for="node" attr.name="summary" attr.type="string"/>',
|
|
4439
|
+
'<key id="tags" for="node" attr.name="tags" attr.type="string"/>',
|
|
4440
|
+
'<key id="sources" for="node" attr.name="sources" attr.type="string"/>',
|
|
4441
|
+
'<key id="createdAt" for="node" attr.name="createdAt" attr.type="string"/>',
|
|
4442
|
+
'<key id="updatedAt" for="node" attr.name="updatedAt" attr.type="string"/>'
|
|
4443
|
+
].join("\n ");
|
|
4444
|
+
function pageToNode(page) {
|
|
4445
|
+
const tags = page.tags.join(", ");
|
|
4446
|
+
const sources = page.sources.join(", ");
|
|
4447
|
+
return [
|
|
4448
|
+
` <node id="${escapeXml(page.slug)}">`,
|
|
4449
|
+
` <data key="title">${escapeXml(page.title)}</data>`,
|
|
4450
|
+
` <data key="summary">${escapeXml(page.summary)}</data>`,
|
|
4451
|
+
` <data key="tags">${escapeXml(tags)}</data>`,
|
|
4452
|
+
` <data key="sources">${escapeXml(sources)}</data>`,
|
|
4453
|
+
` <data key="createdAt">${escapeXml(page.createdAt)}</data>`,
|
|
4454
|
+
` <data key="updatedAt">${escapeXml(page.updatedAt)}</data>`,
|
|
4455
|
+
` </node>`
|
|
4456
|
+
].join("\n");
|
|
4457
|
+
}
|
|
4458
|
+
function pageToEdges(page, knownSlugs) {
|
|
4459
|
+
return page.links.filter((slug) => knownSlugs.has(slug)).map(
|
|
4460
|
+
(slug) => ` <edge source="${escapeXml(page.slug)}" target="${escapeXml(slug)}"/>`
|
|
4461
|
+
);
|
|
4462
|
+
}
|
|
4463
|
+
function buildGraphml(pages) {
|
|
4464
|
+
const knownSlugs = new Set(pages.map((p) => p.slug));
|
|
4465
|
+
const nodes = pages.map(pageToNode).join("\n");
|
|
4466
|
+
const edges = pages.flatMap((p) => pageToEdges(p, knownSlugs)).join("\n");
|
|
4467
|
+
return [
|
|
4468
|
+
'<?xml version="1.0" encoding="UTF-8"?>',
|
|
4469
|
+
'<graphml xmlns="http://graphml.graphdrawing.org/graphml"',
|
|
4470
|
+
' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"',
|
|
4471
|
+
' xsi:schemaLocation="http://graphml.graphdrawing.org/graphml',
|
|
4472
|
+
' http://graphml.graphdrawing.org/graphml/1.0/graphml.xsd">',
|
|
4473
|
+
` ${KEY_DEFS}`,
|
|
4474
|
+
' <graph id="wiki" edgedefault="directed">',
|
|
4475
|
+
nodes,
|
|
4476
|
+
edges,
|
|
4477
|
+
" </graph>",
|
|
4478
|
+
"</graphml>",
|
|
4479
|
+
""
|
|
4480
|
+
].join("\n");
|
|
4481
|
+
}
|
|
4482
|
+
|
|
4483
|
+
// src/export/marp.ts
|
|
4484
|
+
var SLIDE_BODY_MAX_CHARS = 300;
|
|
4485
|
+
function extractFirstParagraph(body) {
|
|
4486
|
+
const trimmed = body.trim();
|
|
4487
|
+
const firstBlock = trimmed.split(/\n\s*\n/)[0] ?? "";
|
|
4488
|
+
const stripped = firstBlock.replace(/^#{1,6}\s+/gm, "").replace(/^[-*+]\s+/gm, "").trim();
|
|
4489
|
+
if (stripped.length <= SLIDE_BODY_MAX_CHARS) return stripped;
|
|
4490
|
+
return `${stripped.slice(0, SLIDE_BODY_MAX_CHARS)}\u2026`;
|
|
4491
|
+
}
|
|
4492
|
+
function buildSpeakerNotes(page) {
|
|
4493
|
+
const parts = [`created: ${page.createdAt}`, `updated: ${page.updatedAt}`];
|
|
4494
|
+
if (page.sources.length > 0) parts.push(`sources: ${page.sources.join(", ")}`);
|
|
4495
|
+
return `<!-- ${parts.join(" | ")} -->`;
|
|
4496
|
+
}
|
|
4497
|
+
function pageToSlide(page) {
|
|
4498
|
+
const tagLine = page.tags.length > 0 ? `
|
|
4499
|
+
_Tags: ${page.tags.join(", ")}_` : "";
|
|
4500
|
+
const excerpt = extractFirstParagraph(page.body);
|
|
4501
|
+
const notes = buildSpeakerNotes(page);
|
|
4502
|
+
return [
|
|
4503
|
+
`## ${page.title}`,
|
|
4504
|
+
"",
|
|
4505
|
+
`> ${page.summary}${tagLine}`,
|
|
4506
|
+
"",
|
|
4507
|
+
excerpt,
|
|
4508
|
+
"",
|
|
4509
|
+
notes
|
|
4510
|
+
].join("\n");
|
|
4511
|
+
}
|
|
4512
|
+
function filterBySource(pages, source2) {
|
|
4513
|
+
if (source2 === "all") return pages;
|
|
4514
|
+
return pages.filter((p) => p.pageDirectory === source2);
|
|
4515
|
+
}
|
|
4516
|
+
function buildMarp(pages, projectTitle, source2 = "all") {
|
|
4517
|
+
const filtered = filterBySource(pages, source2);
|
|
4518
|
+
const frontmatter = [
|
|
4519
|
+
"---",
|
|
4520
|
+
"marp: true",
|
|
4521
|
+
"theme: default",
|
|
4522
|
+
"paginate: true",
|
|
4523
|
+
`title: "${projectTitle}"`,
|
|
4524
|
+
"---"
|
|
4525
|
+
].join("\n");
|
|
4526
|
+
const titleSlide = [
|
|
4527
|
+
"",
|
|
4528
|
+
`# ${projectTitle}`,
|
|
4529
|
+
"",
|
|
4530
|
+
`${filtered.length} pages | ${(/* @__PURE__ */ new Date()).toISOString()}`
|
|
4531
|
+
].join("\n");
|
|
4532
|
+
const slides = filtered.map((p) => `---
|
|
4533
|
+
|
|
4534
|
+
${pageToSlide(p)}`);
|
|
4535
|
+
return [frontmatter, titleSlide, ...slides, ""].join("\n\n");
|
|
4536
|
+
}
|
|
4537
|
+
|
|
4538
|
+
// src/export/types.ts
|
|
4539
|
+
var MARP_SOURCES = ["concepts", "queries", "all"];
|
|
4540
|
+
var EXPORT_TARGETS = [
|
|
4541
|
+
"llms-txt",
|
|
4542
|
+
"llms-full-txt",
|
|
4543
|
+
"json",
|
|
4544
|
+
"json-ld",
|
|
4545
|
+
"graphml",
|
|
4546
|
+
"marp"
|
|
4547
|
+
];
|
|
4548
|
+
|
|
4549
|
+
// src/commands/export.ts
|
|
4550
|
+
var require2 = createRequire(import.meta.url);
|
|
4551
|
+
var EXPORT_DIR = "dist/exports";
|
|
4552
|
+
var TARGET_FILENAMES = {
|
|
4553
|
+
"llms-txt": "llms.txt",
|
|
4554
|
+
"llms-full-txt": "llms-full.txt",
|
|
4555
|
+
json: "wiki.json",
|
|
4556
|
+
"json-ld": "wiki.jsonld",
|
|
4557
|
+
graphml: "wiki.graphml",
|
|
4558
|
+
marp: "wiki.md"
|
|
4559
|
+
};
|
|
4560
|
+
function resolveProjectTitle(root) {
|
|
4561
|
+
try {
|
|
4562
|
+
const pkg = require2(path30.join(root, "package.json"));
|
|
4563
|
+
return typeof pkg.name === "string" ? pkg.name : "Knowledge Wiki";
|
|
4564
|
+
} catch {
|
|
4565
|
+
return "Knowledge Wiki";
|
|
4566
|
+
}
|
|
4567
|
+
}
|
|
4568
|
+
function isValidTarget(value) {
|
|
4569
|
+
return EXPORT_TARGETS.includes(value);
|
|
4570
|
+
}
|
|
4571
|
+
function isValidMarpSource(value) {
|
|
4572
|
+
return MARP_SOURCES.includes(value);
|
|
4573
|
+
}
|
|
4574
|
+
function resolveMarpSource(rawSource) {
|
|
4575
|
+
if (!rawSource) return "all";
|
|
4576
|
+
if (!isValidMarpSource(rawSource)) {
|
|
4577
|
+
throw new Error(
|
|
4578
|
+
`Unknown --source value "${rawSource}". Valid values: ${MARP_SOURCES.join(", ")}`
|
|
4579
|
+
);
|
|
4580
|
+
}
|
|
4581
|
+
return rawSource;
|
|
4582
|
+
}
|
|
4583
|
+
function buildContent(target, pages, projectTitle, marpSource) {
|
|
4584
|
+
switch (target) {
|
|
4585
|
+
case "llms-txt":
|
|
4586
|
+
return buildLlmsTxt(pages, projectTitle);
|
|
4587
|
+
case "llms-full-txt":
|
|
4588
|
+
return buildLlmsFullTxt(pages, projectTitle);
|
|
4589
|
+
case "json":
|
|
4590
|
+
return buildJsonExport(pages);
|
|
4591
|
+
case "json-ld":
|
|
4592
|
+
return buildJsonLd(pages);
|
|
4593
|
+
case "graphml":
|
|
4594
|
+
return buildGraphml(pages);
|
|
4595
|
+
case "marp":
|
|
4596
|
+
return buildMarp(pages, projectTitle, marpSource);
|
|
4597
|
+
}
|
|
4598
|
+
}
|
|
4599
|
+
function computeReportedPageCount(pages, targets, marpSource) {
|
|
4600
|
+
const onlyMarpTarget = targets.length === 1 && targets[0] === "marp";
|
|
4601
|
+
if (onlyMarpTarget && marpSource !== "all") {
|
|
4602
|
+
return pages.filter((p) => p.pageDirectory === marpSource).length;
|
|
4603
|
+
}
|
|
4604
|
+
return pages.length;
|
|
4605
|
+
}
|
|
4606
|
+
async function runExport(root, options = {}) {
|
|
4607
|
+
const pages = await collectExportPages(root);
|
|
4608
|
+
const projectTitle = resolveProjectTitle(root);
|
|
4609
|
+
const targets = resolveTargets(options.target);
|
|
4610
|
+
const marpSource = resolveMarpSource(options.source);
|
|
4611
|
+
const written = [];
|
|
4612
|
+
for (const target of targets) {
|
|
4613
|
+
const content = buildContent(target, pages, projectTitle, marpSource);
|
|
4614
|
+
const outPath = path30.join(root, EXPORT_DIR, TARGET_FILENAMES[target]);
|
|
4615
|
+
await atomicWrite(outPath, content);
|
|
4616
|
+
written.push(outPath);
|
|
4617
|
+
status("+", success(`Exported ${target} \u2192 ${source(outPath)}`));
|
|
4618
|
+
}
|
|
4619
|
+
return { written, pageCount: computeReportedPageCount(pages, targets, marpSource) };
|
|
4620
|
+
}
|
|
4621
|
+
function resolveTargets(rawTarget) {
|
|
4622
|
+
if (!rawTarget) return [...EXPORT_TARGETS];
|
|
4623
|
+
if (!isValidTarget(rawTarget)) {
|
|
4624
|
+
throw new Error(
|
|
4625
|
+
`Unknown export target "${rawTarget}". Valid targets: ${EXPORT_TARGETS.join(", ")}`
|
|
4626
|
+
);
|
|
4627
|
+
}
|
|
4628
|
+
return [rawTarget];
|
|
4629
|
+
}
|
|
4630
|
+
async function exportCommand(root, options) {
|
|
4631
|
+
header("Exporting wiki");
|
|
4632
|
+
const { written, pageCount } = await runExport(root, options);
|
|
4633
|
+
status(
|
|
4634
|
+
"\u2713",
|
|
4635
|
+
success(`Done \u2014 ${pageCount} pages exported to ${written.length} file(s).`)
|
|
4636
|
+
);
|
|
4637
|
+
}
|
|
4638
|
+
|
|
3773
4639
|
// src/commands/schema.ts
|
|
3774
4640
|
import { existsSync as existsSync10 } from "fs";
|
|
3775
4641
|
import { mkdir as mkdir6, writeFile as writeFile5 } from "fs/promises";
|
|
3776
|
-
import
|
|
4642
|
+
import path31 from "path";
|
|
3777
4643
|
async function schemaInitCommand() {
|
|
3778
4644
|
const root = process.cwd();
|
|
3779
4645
|
const defaults = buildDefaultSchema();
|
|
@@ -3782,7 +4648,7 @@ async function schemaInitCommand() {
|
|
|
3782
4648
|
status("!", warn(`Schema file already exists at ${targetPath}`));
|
|
3783
4649
|
return;
|
|
3784
4650
|
}
|
|
3785
|
-
await mkdir6(
|
|
4651
|
+
await mkdir6(path31.dirname(targetPath), { recursive: true });
|
|
3786
4652
|
const serializable = {
|
|
3787
4653
|
version: defaults.version,
|
|
3788
4654
|
defaultKind: defaults.defaultKind,
|
|
@@ -3838,10 +4704,17 @@ async function reviewShowCommand(id) {
|
|
|
3838
4704
|
status("!", warn(`[${v.severity}] ${v.message}`));
|
|
3839
4705
|
}
|
|
3840
4706
|
}
|
|
4707
|
+
if (candidate.provenanceViolations && candidate.provenanceViolations.length > 0) {
|
|
4708
|
+
console.log();
|
|
4709
|
+
header("Provenance violations");
|
|
4710
|
+
for (const v of candidate.provenanceViolations) {
|
|
4711
|
+
status("!", warn(`[${v.severity}] ${v.message}`));
|
|
4712
|
+
}
|
|
4713
|
+
}
|
|
3841
4714
|
}
|
|
3842
4715
|
|
|
3843
4716
|
// src/commands/review-approve.ts
|
|
3844
|
-
import
|
|
4717
|
+
import path32 from "path";
|
|
3845
4718
|
|
|
3846
4719
|
// src/commands/review-helpers.ts
|
|
3847
4720
|
async function runReviewUnderLock(id, underLock) {
|
|
@@ -3873,7 +4746,7 @@ async function approveUnderLock(root, id) {
|
|
|
3873
4746
|
process.exitCode = 1;
|
|
3874
4747
|
return;
|
|
3875
4748
|
}
|
|
3876
|
-
const pagePath =
|
|
4749
|
+
const pagePath = path32.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
|
|
3877
4750
|
await atomicWrite(pagePath, candidate.body);
|
|
3878
4751
|
status("+", success(`Approved \u2192 ${source(pagePath)}`));
|
|
3879
4752
|
await persistCandidateSourceStates(root, candidate);
|
|
@@ -3933,7 +4806,7 @@ import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js
|
|
|
3933
4806
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
3934
4807
|
|
|
3935
4808
|
// src/mcp/tools.ts
|
|
3936
|
-
import
|
|
4809
|
+
import path33 from "path";
|
|
3937
4810
|
import { z } from "zod";
|
|
3938
4811
|
|
|
3939
4812
|
// src/mcp/provider-check.ts
|
|
@@ -4069,7 +4942,7 @@ async function pickSearchSlugs(root, question) {
|
|
|
4069
4942
|
if (candidates.length > 0) return candidates.map((c) => c.slug);
|
|
4070
4943
|
} catch {
|
|
4071
4944
|
}
|
|
4072
|
-
const indexContent = await safeReadFile(
|
|
4945
|
+
const indexContent = await safeReadFile(path33.join(root, INDEX_FILE));
|
|
4073
4946
|
const { pages } = await selectPages(question, indexContent);
|
|
4074
4947
|
return pages;
|
|
4075
4948
|
}
|
|
@@ -4128,8 +5001,8 @@ function registerStatusTool(server, root) {
|
|
|
4128
5001
|
);
|
|
4129
5002
|
}
|
|
4130
5003
|
async function collectStatus(root) {
|
|
4131
|
-
const concepts = await collectPageSummaries(
|
|
4132
|
-
const queries = await collectPageSummaries(
|
|
5004
|
+
const concepts = await collectPageSummaries(path33.join(root, CONCEPTS_DIR));
|
|
5005
|
+
const queries = await collectPageSummaries(path33.join(root, QUERIES_DIR));
|
|
4133
5006
|
const state = await readState(root);
|
|
4134
5007
|
const changes = await detectChanges(root, state);
|
|
4135
5008
|
const orphans = await findOrphanedSlugs(root);
|
|
@@ -4146,7 +5019,7 @@ async function collectStatus(root) {
|
|
|
4146
5019
|
};
|
|
4147
5020
|
}
|
|
4148
5021
|
async function findOrphanedSlugs(root) {
|
|
4149
|
-
const scanned = await scanWikiPages(
|
|
5022
|
+
const scanned = await scanWikiPages(path33.join(root, CONCEPTS_DIR));
|
|
4150
5023
|
return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
|
|
4151
5024
|
}
|
|
4152
5025
|
async function loadPageRecords(root, slugs) {
|
|
@@ -4159,7 +5032,7 @@ async function loadPageRecords(root, slugs) {
|
|
|
4159
5032
|
}
|
|
4160
5033
|
async function readPage(root, slug) {
|
|
4161
5034
|
for (const dir of PAGE_DIRS2) {
|
|
4162
|
-
const content = await safeReadFile(
|
|
5035
|
+
const content = await safeReadFile(path33.join(root, dir, `${slug}.md`));
|
|
4163
5036
|
if (!content) continue;
|
|
4164
5037
|
const { meta, body } = parseFrontmatter(content);
|
|
4165
5038
|
if (meta.orphaned) continue;
|
|
@@ -4174,8 +5047,8 @@ async function readPage(root, slug) {
|
|
|
4174
5047
|
}
|
|
4175
5048
|
|
|
4176
5049
|
// src/mcp/resources.ts
|
|
4177
|
-
import
|
|
4178
|
-
import { readdir as
|
|
5050
|
+
import path34 from "path";
|
|
5051
|
+
import { readdir as readdir11 } from "fs/promises";
|
|
4179
5052
|
import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
4180
5053
|
function jsonContent(uri, payload) {
|
|
4181
5054
|
return {
|
|
@@ -4208,7 +5081,7 @@ function registerIndexResource(server, root) {
|
|
|
4208
5081
|
mimeType: "text/markdown"
|
|
4209
5082
|
},
|
|
4210
5083
|
async (uri) => {
|
|
4211
|
-
const content = await safeReadFile(
|
|
5084
|
+
const content = await safeReadFile(path34.join(root, INDEX_FILE));
|
|
4212
5085
|
return { contents: [markdownContent(uri, content)] };
|
|
4213
5086
|
}
|
|
4214
5087
|
);
|
|
@@ -4275,23 +5148,23 @@ function registerQueryResource(server, root) {
|
|
|
4275
5148
|
);
|
|
4276
5149
|
}
|
|
4277
5150
|
async function listSources(root) {
|
|
4278
|
-
const sourcesPath =
|
|
5151
|
+
const sourcesPath = path34.join(root, SOURCES_DIR);
|
|
4279
5152
|
let files;
|
|
4280
5153
|
try {
|
|
4281
|
-
files = await
|
|
5154
|
+
files = await readdir11(sourcesPath);
|
|
4282
5155
|
} catch {
|
|
4283
5156
|
return [];
|
|
4284
5157
|
}
|
|
4285
5158
|
const records = [];
|
|
4286
5159
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
4287
|
-
const content = await safeReadFile(
|
|
5160
|
+
const content = await safeReadFile(path34.join(sourcesPath, file));
|
|
4288
5161
|
const { meta } = parseFrontmatter(content);
|
|
4289
5162
|
records.push({ filename: file, ...meta });
|
|
4290
5163
|
}
|
|
4291
5164
|
return records;
|
|
4292
5165
|
}
|
|
4293
5166
|
async function loadPageWithMeta(root, dir, slug) {
|
|
4294
|
-
const filePath =
|
|
5167
|
+
const filePath = path34.join(root, dir, `${slug}.md`);
|
|
4295
5168
|
const content = await safeReadFile(filePath);
|
|
4296
5169
|
if (!content) {
|
|
4297
5170
|
throw new Error(`Page not found: ${dir}/${slug}.md`);
|
|
@@ -4300,10 +5173,10 @@ async function loadPageWithMeta(root, dir, slug) {
|
|
|
4300
5173
|
return { slug, meta, body: body.trim() };
|
|
4301
5174
|
}
|
|
4302
5175
|
async function listPagesUnder(root, dir, scheme) {
|
|
4303
|
-
const pagesPath =
|
|
5176
|
+
const pagesPath = path34.join(root, dir);
|
|
4304
5177
|
let files;
|
|
4305
5178
|
try {
|
|
4306
|
-
files = await
|
|
5179
|
+
files = await readdir11(pagesPath);
|
|
4307
5180
|
} catch {
|
|
4308
5181
|
return { resources: [] };
|
|
4309
5182
|
}
|
|
@@ -4327,8 +5200,8 @@ async function startMCPServer(options) {
|
|
|
4327
5200
|
}
|
|
4328
5201
|
|
|
4329
5202
|
// src/cli.ts
|
|
4330
|
-
var
|
|
4331
|
-
var { version } =
|
|
5203
|
+
var require3 = createRequire2(import.meta.url);
|
|
5204
|
+
var { version } = require3("../package.json");
|
|
4332
5205
|
var program = new Command();
|
|
4333
5206
|
program.name("llmwiki").description("The knowledge compiler \u2014 raw sources in, interlinked wiki out").version(version);
|
|
4334
5207
|
program.command("ingest <source>").description("Ingest a URL or local file into sources/").action(async (source2) => {
|
|
@@ -4339,11 +5212,23 @@ program.command("ingest <source>").description("Ingest a URL or local file into
|
|
|
4339
5212
|
process.exit(1);
|
|
4340
5213
|
}
|
|
4341
5214
|
});
|
|
5215
|
+
program.command("ingest-session <path>").description("Ingest a coding-agent session export (Claude, Codex, Cursor) into sources/").action(async (targetPath) => {
|
|
5216
|
+
try {
|
|
5217
|
+
await ingestSession(targetPath);
|
|
5218
|
+
} catch (err) {
|
|
5219
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
5220
|
+
process.exit(1);
|
|
5221
|
+
}
|
|
5222
|
+
});
|
|
4342
5223
|
program.command("compile").description("Compile sources/ into an interlinked wiki").option(
|
|
4343
5224
|
"--review",
|
|
4344
5225
|
"Write generated pages as review candidates under .llmwiki/candidates/ instead of mutating wiki/. Orphan-marking for deleted sources is deferred until the next non-review compile."
|
|
5226
|
+
).option(
|
|
5227
|
+
"--lang <code>",
|
|
5228
|
+
'Target language for generated wiki content (e.g. "Chinese", "ja", "zh-CN"). Equivalent to setting LLMWIKI_OUTPUT_LANG.'
|
|
4345
5229
|
).action(async (options) => {
|
|
4346
5230
|
try {
|
|
5231
|
+
applyLanguageOption(options.lang);
|
|
4347
5232
|
requireProvider();
|
|
4348
5233
|
await compileCommand({ review: options.review });
|
|
4349
5234
|
} catch (err) {
|
|
@@ -4384,15 +5269,21 @@ reviewCommand.command("reject <id>").description("Reject a candidate and archive
|
|
|
4384
5269
|
process.exit(1);
|
|
4385
5270
|
}
|
|
4386
5271
|
});
|
|
4387
|
-
program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").option("--debug", "Print which pages and chunks were selected and their scores").
|
|
4388
|
-
|
|
4389
|
-
|
|
4390
|
-
|
|
4391
|
-
|
|
4392
|
-
|
|
4393
|
-
|
|
5272
|
+
program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").option("--debug", "Print which pages and chunks were selected and their scores").option(
|
|
5273
|
+
"--lang <code>",
|
|
5274
|
+
'Target language for the answer (e.g. "Chinese", "ja", "zh-CN"). Equivalent to setting LLMWIKI_OUTPUT_LANG.'
|
|
5275
|
+
).action(
|
|
5276
|
+
async (question, options) => {
|
|
5277
|
+
try {
|
|
5278
|
+
applyLanguageOption(options.lang);
|
|
5279
|
+
requireProvider();
|
|
5280
|
+
await queryCommand(process.cwd(), question, options);
|
|
5281
|
+
} catch (err) {
|
|
5282
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
5283
|
+
process.exit(1);
|
|
5284
|
+
}
|
|
4394
5285
|
}
|
|
4395
|
-
|
|
5286
|
+
);
|
|
4396
5287
|
program.command("watch").description("Watch sources/ and auto-recompile on changes").action(async () => {
|
|
4397
5288
|
try {
|
|
4398
5289
|
requireProvider();
|
|
@@ -4427,6 +5318,17 @@ schemaCmd.command("show").description("Print the resolved schema for this projec
|
|
|
4427
5318
|
process.exit(1);
|
|
4428
5319
|
}
|
|
4429
5320
|
});
|
|
5321
|
+
program.command("export").description("Export wiki content to portable formats (llms.txt, JSON, GraphML, Marp, \u2026)").option("--target <name>", "Limit export to a single target format").option(
|
|
5322
|
+
"--source <kind>",
|
|
5323
|
+
"For marp target: which pages to include \u2014 concepts, queries, or all (default: all)"
|
|
5324
|
+
).action(async (options) => {
|
|
5325
|
+
try {
|
|
5326
|
+
await exportCommand(process.cwd(), options);
|
|
5327
|
+
} catch (err) {
|
|
5328
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
5329
|
+
process.exit(1);
|
|
5330
|
+
}
|
|
5331
|
+
});
|
|
4430
5332
|
program.command("serve").description("Start an MCP server exposing wiki tools and resources over stdio").option("--root <dir>", "Project root directory", process.cwd()).action(async (options) => {
|
|
4431
5333
|
try {
|
|
4432
5334
|
await startMCPServer({ root: options.root, version });
|
|
@@ -4435,6 +5337,11 @@ program.command("serve").description("Start an MCP server exposing wiki tools an
|
|
|
4435
5337
|
process.exit(1);
|
|
4436
5338
|
}
|
|
4437
5339
|
});
|
|
5340
|
+
function applyLanguageOption(lang) {
|
|
5341
|
+
if (lang && lang.trim().length > 0) {
|
|
5342
|
+
process.env.LLMWIKI_OUTPUT_LANG = lang.trim();
|
|
5343
|
+
}
|
|
5344
|
+
}
|
|
4438
5345
|
var PROVIDER_KEY_VARS2 = {
|
|
4439
5346
|
anthropic: "ANTHROPIC_API_KEY",
|
|
4440
5347
|
openai: "OPENAI_API_KEY",
|