llm-wiki-compiler 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -10
- package/dist/cli.js +1162 -256
- package/dist/cli.js.map +1 -1
- package/package.json +3 -2
package/dist/cli.js
CHANGED
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
// src/cli.ts
|
|
4
4
|
import "dotenv/config";
|
|
5
|
-
import { createRequire } from "module";
|
|
5
|
+
import { createRequire as createRequire2 } from "module";
|
|
6
6
|
import { Command } from "commander";
|
|
7
7
|
|
|
8
8
|
// src/commands/ingest.ts
|
|
9
|
-
import
|
|
10
|
-
import {
|
|
9
|
+
import path8 from "path";
|
|
10
|
+
import { readFile as readFile7 } from "fs/promises";
|
|
11
11
|
|
|
12
12
|
// src/utils/markdown.ts
|
|
13
13
|
import { writeFile, rename, readFile, mkdir } from "fs/promises";
|
|
@@ -22,7 +22,7 @@ var VALID_PROVENANCE_STATES = /* @__PURE__ */ new Set([
|
|
|
22
22
|
"ambiguous"
|
|
23
23
|
]);
|
|
24
24
|
function slugify(title) {
|
|
25
|
-
return title.toLowerCase().replace(/['']/g, "").replace(/[^\
|
|
25
|
+
return title.toLowerCase().replace(/['']/g, "").replace(/[^\p{L}\p{N}\s-]/gu, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
26
26
|
}
|
|
27
27
|
function buildFrontmatter(fields) {
|
|
28
28
|
const dumped = yaml.dump(fields, { lineWidth: -1, quotingType: '"' }).trimEnd();
|
|
@@ -103,16 +103,11 @@ function parseContradictedBy(raw) {
|
|
|
103
103
|
const refs = raw.map(coerceContradictionEntry).filter((ref) => ref !== null);
|
|
104
104
|
return refs.length > 0 ? refs : void 0;
|
|
105
105
|
}
|
|
106
|
-
function parseInferredParagraphs(raw) {
|
|
107
|
-
if (typeof raw !== "number" || !Number.isInteger(raw) || raw < 0) return void 0;
|
|
108
|
-
return raw;
|
|
109
|
-
}
|
|
110
106
|
function parseProvenanceMetadata(meta) {
|
|
111
107
|
return {
|
|
112
108
|
confidence: parseConfidence(meta.confidence),
|
|
113
109
|
provenanceState: parseProvenanceState(meta.provenanceState),
|
|
114
|
-
contradictedBy: parseContradictedBy(meta.contradictedBy)
|
|
115
|
-
inferredParagraphs: parseInferredParagraphs(meta.inferredParagraphs)
|
|
110
|
+
contradictedBy: parseContradictedBy(meta.contradictedBy)
|
|
116
111
|
};
|
|
117
112
|
}
|
|
118
113
|
function validateWikiPage(content) {
|
|
@@ -123,9 +118,16 @@ function validateWikiPage(content) {
|
|
|
123
118
|
return true;
|
|
124
119
|
}
|
|
125
120
|
|
|
121
|
+
// src/utils/source-writer.ts
|
|
122
|
+
import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "fs/promises";
|
|
123
|
+
import path2 from "path";
|
|
124
|
+
import { createHash } from "crypto";
|
|
125
|
+
|
|
126
126
|
// src/utils/constants.ts
|
|
127
127
|
var MAX_SOURCE_CHARS = 1e5;
|
|
128
128
|
var MIN_SOURCE_CHARS = 50;
|
|
129
|
+
var DEFAULT_PROMPT_BUDGET_CHARS = 2e5;
|
|
130
|
+
var PROMPT_BUDGET_ENV_VAR = "LLMWIKI_PROMPT_BUDGET_CHARS";
|
|
129
131
|
var QUERY_PAGE_LIMIT = 5;
|
|
130
132
|
var COMPILE_CONCURRENCY = 5;
|
|
131
133
|
var RETRY_COUNT = 3;
|
|
@@ -169,6 +171,42 @@ var EMBEDDING_MODELS = {
|
|
|
169
171
|
ollama: "nomic-embed-text"
|
|
170
172
|
};
|
|
171
173
|
|
|
174
|
+
// src/utils/source-writer.ts
|
|
175
|
+
var COLLISION_HASH_LEN = 8;
|
|
176
|
+
function shortHashOfSource(source2) {
|
|
177
|
+
return createHash("sha256").update(source2).digest("hex").slice(0, COLLISION_HASH_LEN);
|
|
178
|
+
}
|
|
179
|
+
async function resolveCollisionFreeFilename(slug, source2) {
|
|
180
|
+
const candidate = `${slug}.md`;
|
|
181
|
+
const candidatePath2 = path2.join(SOURCES_DIR, candidate);
|
|
182
|
+
let existing;
|
|
183
|
+
try {
|
|
184
|
+
existing = await readFile2(candidatePath2, "utf-8");
|
|
185
|
+
} catch (err) {
|
|
186
|
+
const e = err;
|
|
187
|
+
if (e.code === "ENOENT") return candidate;
|
|
188
|
+
throw err;
|
|
189
|
+
}
|
|
190
|
+
const { meta } = parseFrontmatter(existing);
|
|
191
|
+
if (typeof meta.source === "string" && meta.source === source2) {
|
|
192
|
+
return candidate;
|
|
193
|
+
}
|
|
194
|
+
return `${slug}-${shortHashOfSource(source2)}.md`;
|
|
195
|
+
}
|
|
196
|
+
async function saveSource(title, document, source2) {
|
|
197
|
+
const slug = slugify(title);
|
|
198
|
+
if (!slug) {
|
|
199
|
+
throw new Error(
|
|
200
|
+
`Could not derive a filename from title "${title}". The title contains no letter or number characters. Rename the source file to one with at least one letter or digit.`
|
|
201
|
+
);
|
|
202
|
+
}
|
|
203
|
+
await mkdir2(SOURCES_DIR, { recursive: true });
|
|
204
|
+
const filename = await resolveCollisionFreeFilename(slug, source2);
|
|
205
|
+
const destPath = path2.join(SOURCES_DIR, filename);
|
|
206
|
+
await writeFile2(destPath, document, "utf-8");
|
|
207
|
+
return destPath;
|
|
208
|
+
}
|
|
209
|
+
|
|
172
210
|
// src/utils/output.ts
|
|
173
211
|
var RESET = "\x1B[0m";
|
|
174
212
|
var BOLD = "\x1B[1m";
|
|
@@ -244,13 +282,13 @@ async function ingestWeb(url) {
|
|
|
244
282
|
}
|
|
245
283
|
|
|
246
284
|
// src/ingest/file.ts
|
|
247
|
-
import { readFile as
|
|
248
|
-
import
|
|
285
|
+
import { readFile as readFile3 } from "fs/promises";
|
|
286
|
+
import path4 from "path";
|
|
249
287
|
|
|
250
288
|
// src/ingest/shared.ts
|
|
251
|
-
import
|
|
289
|
+
import path3 from "path";
|
|
252
290
|
function titleFromFilename(filePath) {
|
|
253
|
-
const basename =
|
|
291
|
+
const basename = path3.basename(filePath, path3.extname(filePath));
|
|
254
292
|
return basename.replace(/[-_]+/g, " ").trim();
|
|
255
293
|
}
|
|
256
294
|
|
|
@@ -262,20 +300,20 @@ ${text}
|
|
|
262
300
|
\`\`\``;
|
|
263
301
|
}
|
|
264
302
|
async function ingestFile(filePath) {
|
|
265
|
-
const ext =
|
|
303
|
+
const ext = path4.extname(filePath).toLowerCase();
|
|
266
304
|
if (!SUPPORTED_EXTENSIONS.has(ext)) {
|
|
267
305
|
throw new Error(
|
|
268
306
|
`Unsupported file type "${ext}". Only .md and .txt files are supported.`
|
|
269
307
|
);
|
|
270
308
|
}
|
|
271
|
-
const raw = await
|
|
309
|
+
const raw = await readFile3(filePath, "utf-8");
|
|
272
310
|
const title = titleFromFilename(filePath);
|
|
273
311
|
const content = ext === ".md" ? raw : wrapPlainText(raw);
|
|
274
312
|
return { title, content };
|
|
275
313
|
}
|
|
276
314
|
|
|
277
315
|
// src/ingest/pdf.ts
|
|
278
|
-
import { readFile as
|
|
316
|
+
import { readFile as readFile4 } from "fs/promises";
|
|
279
317
|
function resolveTitle(filePath, info2) {
|
|
280
318
|
if (info2 && typeof info2 === "object") {
|
|
281
319
|
const titleField = info2["Title"];
|
|
@@ -287,7 +325,7 @@ function resolveTitle(filePath, info2) {
|
|
|
287
325
|
}
|
|
288
326
|
async function ingestPdf(filePath) {
|
|
289
327
|
const { PDFParse } = await import("pdf-parse");
|
|
290
|
-
const buffer = await
|
|
328
|
+
const buffer = await readFile4(filePath);
|
|
291
329
|
const parser = new PDFParse({ data: new Uint8Array(buffer) });
|
|
292
330
|
try {
|
|
293
331
|
const textResult = await parser.getText();
|
|
@@ -301,8 +339,8 @@ async function ingestPdf(filePath) {
|
|
|
301
339
|
}
|
|
302
340
|
|
|
303
341
|
// src/ingest/image.ts
|
|
304
|
-
import { readFile as
|
|
305
|
-
import
|
|
342
|
+
import { readFile as readFile5 } from "fs/promises";
|
|
343
|
+
import path6 from "path";
|
|
306
344
|
import Anthropic2 from "@anthropic-ai/sdk";
|
|
307
345
|
|
|
308
346
|
// src/providers/anthropic.ts
|
|
@@ -419,7 +457,7 @@ var AnthropicProvider = class {
|
|
|
419
457
|
// src/utils/claude-settings.ts
|
|
420
458
|
import { readFileSync } from "fs";
|
|
421
459
|
import { homedir } from "os";
|
|
422
|
-
import
|
|
460
|
+
import path5 from "path";
|
|
423
461
|
var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
|
|
424
462
|
function isRecord(value) {
|
|
425
463
|
return typeof value === "object" && value !== null;
|
|
@@ -430,7 +468,7 @@ function normalize(value) {
|
|
|
430
468
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
431
469
|
}
|
|
432
470
|
function resolveClaudeSettingsPath(env) {
|
|
433
|
-
return env[CLAUDE_SETTINGS_PATH_ENV] ??
|
|
471
|
+
return env[CLAUDE_SETTINGS_PATH_ENV] ?? path5.join(homedir(), ".claude", "settings.json");
|
|
434
472
|
}
|
|
435
473
|
function readClaudeSettingsFile(settingsPath) {
|
|
436
474
|
try {
|
|
@@ -563,9 +601,9 @@ async function ingestImage(filePath) {
|
|
|
563
601
|
`Image ingest requires the Anthropic provider (vision). Current provider: "${providerName}". Set LLMWIKI_PROVIDER=anthropic and ANTHROPIC_API_KEY to use image ingest.`
|
|
564
602
|
);
|
|
565
603
|
}
|
|
566
|
-
const ext =
|
|
604
|
+
const ext = path6.extname(filePath).toLowerCase();
|
|
567
605
|
const mimeType = mimeTypeForExtension(ext);
|
|
568
|
-
const imageBuffer = await
|
|
606
|
+
const imageBuffer = await readFile5(filePath);
|
|
569
607
|
const imageData = imageBuffer.toString("base64");
|
|
570
608
|
const client = buildClient();
|
|
571
609
|
const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
|
|
@@ -575,10 +613,9 @@ async function ingestImage(filePath) {
|
|
|
575
613
|
}
|
|
576
614
|
|
|
577
615
|
// src/ingest/transcript.ts
|
|
578
|
-
import { readFile as
|
|
579
|
-
import
|
|
580
|
-
import { YoutubeTranscript
|
|
581
|
-
var YoutubeTranscript = YoutubeTranscriptUntyped;
|
|
616
|
+
import { readFile as readFile6 } from "fs/promises";
|
|
617
|
+
import path7 from "path";
|
|
618
|
+
import { YoutubeTranscript } from "youtube-transcript";
|
|
582
619
|
var YOUTUBE_URL_PATTERN = /^https?:\/\/(www\.)?(youtube\.com\/watch|youtu\.be\/)/;
|
|
583
620
|
var SRT_SEQUENCE_PATTERN = /^\d+$/;
|
|
584
621
|
var TIMESTAMP_PATTERN = /\d{2}:\d{2}[:.]\d{2}/;
|
|
@@ -662,8 +699,8 @@ async function ingestTranscript(source2) {
|
|
|
662
699
|
if (isYoutubeUrl(source2)) {
|
|
663
700
|
return fetchYoutubeTranscript(source2);
|
|
664
701
|
}
|
|
665
|
-
const ext =
|
|
666
|
-
const raw = await
|
|
702
|
+
const ext = path7.extname(source2).toLowerCase();
|
|
703
|
+
const raw = await readFile6(source2, "utf-8");
|
|
667
704
|
if (ext === ".vtt") return parseVtt(raw, source2);
|
|
668
705
|
if (ext === ".srt") return parseSrt(raw, source2);
|
|
669
706
|
if (ext === ".txt") return parsePlainTranscript(raw, source2);
|
|
@@ -702,7 +739,7 @@ function hasSpeakerDialoguePattern(sample) {
|
|
|
702
739
|
return hasEnoughSpeakers && hasRepeatedSpeaker;
|
|
703
740
|
}
|
|
704
741
|
async function looksLikeTxtTranscript(filePath) {
|
|
705
|
-
const raw = await
|
|
742
|
+
const raw = await readFile7(filePath, "utf-8");
|
|
706
743
|
const sample = raw.slice(0, TXT_SNIFF_BYTES);
|
|
707
744
|
if (hasSpeakerDialoguePattern(sample)) return true;
|
|
708
745
|
const timestampMatches = sample.match(new RegExp(TIMESTAMP_PATTERN2.source, "gm"));
|
|
@@ -742,7 +779,7 @@ function enforceMinContent(content) {
|
|
|
742
779
|
}
|
|
743
780
|
async function detectSourceType(source2) {
|
|
744
781
|
if (!isUrl(source2)) {
|
|
745
|
-
const ext =
|
|
782
|
+
const ext = path8.extname(source2).toLowerCase();
|
|
746
783
|
if (ext === ".pdf") return "pdf";
|
|
747
784
|
if (IMAGE_EXTENSIONS.has(ext)) return "image";
|
|
748
785
|
if (TRANSCRIPT_EXTENSIONS.has(ext)) return "transcript";
|
|
@@ -788,13 +825,6 @@ async function fetchContent(source2, sourceType) {
|
|
|
788
825
|
return ingestFile(source2);
|
|
789
826
|
}
|
|
790
827
|
}
|
|
791
|
-
async function saveSource(title, document) {
|
|
792
|
-
const filename = `${slugify(title)}.md`;
|
|
793
|
-
const destPath = path7.join(SOURCES_DIR, filename);
|
|
794
|
-
await mkdir2(SOURCES_DIR, { recursive: true });
|
|
795
|
-
await writeFile2(destPath, document, "utf-8");
|
|
796
|
-
return destPath;
|
|
797
|
-
}
|
|
798
828
|
async function ingestSource(source2) {
|
|
799
829
|
const sourceType = await detectSourceType(source2);
|
|
800
830
|
status("*", info(`Ingesting [${sourceType}]: ${source2}`));
|
|
@@ -802,9 +832,9 @@ async function ingestSource(source2) {
|
|
|
802
832
|
const result = enforceCharLimit(content);
|
|
803
833
|
enforceMinContent(result.content);
|
|
804
834
|
const document = buildDocument(title, source2, result, sourceType);
|
|
805
|
-
const savedPath = await saveSource(title, document);
|
|
835
|
+
const savedPath = await saveSource(title, document, source2);
|
|
806
836
|
return {
|
|
807
|
-
filename:
|
|
837
|
+
filename: path8.basename(savedPath),
|
|
808
838
|
charCount: result.content.length,
|
|
809
839
|
truncated: result.truncated,
|
|
810
840
|
source: source2,
|
|
@@ -813,7 +843,7 @@ async function ingestSource(source2) {
|
|
|
813
843
|
}
|
|
814
844
|
async function ingest(source2) {
|
|
815
845
|
const result = await ingestSource(source2);
|
|
816
|
-
const savedPath =
|
|
846
|
+
const savedPath = path8.join(SOURCES_DIR, result.filename);
|
|
817
847
|
status(
|
|
818
848
|
"+",
|
|
819
849
|
success(`Saved ${bold(result.filename)} \u2192 ${source(savedPath)}`)
|
|
@@ -821,27 +851,390 @@ async function ingest(source2) {
|
|
|
821
851
|
status("\u2192", dim("Next: llmwiki compile"));
|
|
822
852
|
}
|
|
823
853
|
|
|
854
|
+
// src/commands/ingest-session.ts
|
|
855
|
+
import path12 from "path";
|
|
856
|
+
import { readdir, stat } from "fs/promises";
|
|
857
|
+
|
|
858
|
+
// src/adapters/claude.ts
|
|
859
|
+
import { readFile as readFile8 } from "fs/promises";
|
|
860
|
+
import path9 from "path";
|
|
861
|
+
|
|
862
|
+
// src/adapters/utils.ts
|
|
863
|
+
var MAX_TITLE_CHARS = 80;
|
|
864
|
+
function truncateTitle(text) {
|
|
865
|
+
const trimmed = text.trim();
|
|
866
|
+
return trimmed.length > MAX_TITLE_CHARS ? trimmed.slice(0, MAX_TITLE_CHARS).trimEnd() + "\u2026" : trimmed;
|
|
867
|
+
}
|
|
868
|
+
function resolveSessionTitle(rawTitle, firstUserContent, defaultTitle) {
|
|
869
|
+
if (rawTitle && rawTitle.trim().length > 0) return truncateTitle(rawTitle);
|
|
870
|
+
if (firstUserContent) {
|
|
871
|
+
const firstLine = firstUserContent.split("\n")[0];
|
|
872
|
+
if (firstLine.trim().length > 0) return truncateTitle(firstLine);
|
|
873
|
+
}
|
|
874
|
+
return defaultTitle;
|
|
875
|
+
}
|
|
876
|
+
function parseJsonOrThrow(raw, filePath) {
|
|
877
|
+
try {
|
|
878
|
+
return JSON.parse(raw);
|
|
879
|
+
} catch {
|
|
880
|
+
throw new Error(`Invalid JSON in session file: ${filePath}`);
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
// src/adapters/claude.ts
|
|
885
|
+
var CLAUDE_EXTENSION = ".jsonl";
|
|
886
|
+
var CLAUDE_TYPE_MARKERS = /* @__PURE__ */ new Set(["user", "assistant", "system", "tool_use", "tool_result"]);
|
|
887
|
+
function extractText(content) {
|
|
888
|
+
if (typeof content === "string") return content;
|
|
889
|
+
return content.filter((b) => b.type === "text" && typeof b.text === "string").map((b) => b.text).join("\n");
|
|
890
|
+
}
|
|
891
|
+
function titleFromFirstUserMessage(turns) {
|
|
892
|
+
const firstUser = turns.find((t) => t.role === "user" && t.content.trim().length > 0);
|
|
893
|
+
return resolveSessionTitle(void 0, firstUser?.content, "Claude Session");
|
|
894
|
+
}
|
|
895
|
+
function parseLine(line) {
|
|
896
|
+
try {
|
|
897
|
+
return JSON.parse(line);
|
|
898
|
+
} catch {
|
|
899
|
+
return null;
|
|
900
|
+
}
|
|
901
|
+
}
|
|
902
|
+
function eventToTurn(event) {
|
|
903
|
+
if (!event.message || !event.message.role) return null;
|
|
904
|
+
const role = event.message.role;
|
|
905
|
+
if (role !== "user" && role !== "assistant") return null;
|
|
906
|
+
const content = extractText(event.message.content);
|
|
907
|
+
if (content.trim().length === 0) return null;
|
|
908
|
+
return { role, content, timestamp: event.timestamp };
|
|
909
|
+
}
|
|
910
|
+
var claudeAdapter = {
|
|
911
|
+
name: "claude",
|
|
912
|
+
async detect(filePath) {
|
|
913
|
+
if (path9.extname(filePath).toLowerCase() !== CLAUDE_EXTENSION) return false;
|
|
914
|
+
const raw = await readFile8(filePath, "utf-8").catch(() => "");
|
|
915
|
+
const firstLine = raw.split("\n")[0].trim();
|
|
916
|
+
if (!firstLine.startsWith("{")) return false;
|
|
917
|
+
try {
|
|
918
|
+
const obj = JSON.parse(firstLine);
|
|
919
|
+
return typeof obj.type === "string" && CLAUDE_TYPE_MARKERS.has(obj.type);
|
|
920
|
+
} catch {
|
|
921
|
+
return false;
|
|
922
|
+
}
|
|
923
|
+
},
|
|
924
|
+
async parse(filePath) {
|
|
925
|
+
const raw = await readFile8(filePath, "utf-8");
|
|
926
|
+
const lines = raw.split("\n").filter((l) => l.trim().length > 0);
|
|
927
|
+
if (lines.length === 0) {
|
|
928
|
+
throw new Error(`Claude session file is empty: ${filePath}`);
|
|
929
|
+
}
|
|
930
|
+
const turns = [];
|
|
931
|
+
const timestamps = [];
|
|
932
|
+
for (const [index, line] of lines.entries()) {
|
|
933
|
+
const event = parseLine(line);
|
|
934
|
+
if (event === null) {
|
|
935
|
+
throw new Error(
|
|
936
|
+
`Malformed JSON on line ${index + 1} of Claude session: ${filePath}`
|
|
937
|
+
);
|
|
938
|
+
}
|
|
939
|
+
if (event.timestamp) timestamps.push(event.timestamp);
|
|
940
|
+
const turn = eventToTurn(event);
|
|
941
|
+
if (turn) turns.push(turn);
|
|
942
|
+
}
|
|
943
|
+
const title = titleFromFirstUserMessage(turns);
|
|
944
|
+
return {
|
|
945
|
+
title,
|
|
946
|
+
adapter: "claude",
|
|
947
|
+
startedAt: timestamps[0],
|
|
948
|
+
endedAt: timestamps[timestamps.length - 1],
|
|
949
|
+
participantIdentity: "Claude Code",
|
|
950
|
+
turns
|
|
951
|
+
};
|
|
952
|
+
}
|
|
953
|
+
};
|
|
954
|
+
|
|
955
|
+
// src/adapters/codex.ts
|
|
956
|
+
import { readFile as readFile9 } from "fs/promises";
|
|
957
|
+
import path10 from "path";
|
|
958
|
+
var CODEX_EXTENSION = ".json";
|
|
959
|
+
function unixToIso(ts) {
|
|
960
|
+
return new Date(ts * 1e3).toISOString();
|
|
961
|
+
}
|
|
962
|
+
function extractTurns(mapping) {
|
|
963
|
+
const turns = [];
|
|
964
|
+
for (const node of Object.values(mapping)) {
|
|
965
|
+
const msg = node.message;
|
|
966
|
+
if (!msg) continue;
|
|
967
|
+
const role = msg.author?.role;
|
|
968
|
+
if (role !== "user" && role !== "assistant") continue;
|
|
969
|
+
const content = (msg.content?.parts ?? []).join("\n").trim();
|
|
970
|
+
if (content.length === 0) continue;
|
|
971
|
+
turns.push({
|
|
972
|
+
role,
|
|
973
|
+
content,
|
|
974
|
+
timestamp: msg.create_time != null ? unixToIso(msg.create_time) : void 0
|
|
975
|
+
});
|
|
976
|
+
}
|
|
977
|
+
turns.sort((a, b) => {
|
|
978
|
+
if (!a.timestamp || !b.timestamp) return 0;
|
|
979
|
+
return a.timestamp.localeCompare(b.timestamp);
|
|
980
|
+
});
|
|
981
|
+
return turns;
|
|
982
|
+
}
|
|
983
|
+
function isCodexExport(value) {
|
|
984
|
+
return Array.isArray(value) && value.length > 0 && typeof value[0].mapping === "object";
|
|
985
|
+
}
|
|
986
|
+
var codexAdapter = {
|
|
987
|
+
name: "codex",
|
|
988
|
+
async detect(filePath) {
|
|
989
|
+
if (path10.extname(filePath).toLowerCase() !== CODEX_EXTENSION) return false;
|
|
990
|
+
const raw = await readFile9(filePath, "utf-8").catch(() => "");
|
|
991
|
+
if (raw.trimStart()[0] !== "[") return false;
|
|
992
|
+
try {
|
|
993
|
+
return isCodexExport(JSON.parse(raw));
|
|
994
|
+
} catch {
|
|
995
|
+
return false;
|
|
996
|
+
}
|
|
997
|
+
},
|
|
998
|
+
async parse(filePath) {
|
|
999
|
+
const raw = await readFile9(filePath, "utf-8");
|
|
1000
|
+
const parsed = parseJsonOrThrow(raw, filePath);
|
|
1001
|
+
if (!isCodexExport(parsed)) {
|
|
1002
|
+
throw new Error(
|
|
1003
|
+
`Codex session file does not contain a conversation array: ${filePath}`
|
|
1004
|
+
);
|
|
1005
|
+
}
|
|
1006
|
+
const conv = parsed[0];
|
|
1007
|
+
const turns = extractTurns(conv.mapping ?? {});
|
|
1008
|
+
const firstUser = turns.find((t) => t.role === "user");
|
|
1009
|
+
return {
|
|
1010
|
+
title: resolveSessionTitle(conv.title, firstUser?.content, "Codex Session"),
|
|
1011
|
+
adapter: "codex",
|
|
1012
|
+
startedAt: conv.create_time != null ? unixToIso(conv.create_time) : void 0,
|
|
1013
|
+
endedAt: conv.update_time != null ? unixToIso(conv.update_time) : void 0,
|
|
1014
|
+
participantIdentity: "OpenAI Codex",
|
|
1015
|
+
turns
|
|
1016
|
+
};
|
|
1017
|
+
}
|
|
1018
|
+
};
|
|
1019
|
+
|
|
1020
|
+
// src/adapters/cursor.ts
|
|
1021
|
+
import { readFile as readFile10 } from "fs/promises";
|
|
1022
|
+
import path11 from "path";
|
|
1023
|
+
var CURSOR_EXTENSION = ".json";
|
|
1024
|
+
function isTabsExport(value) {
|
|
1025
|
+
return typeof value === "object" && value !== null && "tabs" in value && Array.isArray(value.tabs);
|
|
1026
|
+
}
|
|
1027
|
+
function isFlatExport(value) {
|
|
1028
|
+
return typeof value === "object" && value !== null && "messages" in value && Array.isArray(value.messages);
|
|
1029
|
+
}
|
|
1030
|
+
function extractMessagesAndTitle(data) {
|
|
1031
|
+
if (isTabsExport(data)) {
|
|
1032
|
+
const tab = data.tabs[0];
|
|
1033
|
+
return { messages: tab?.messages ?? [], title: tab?.title };
|
|
1034
|
+
}
|
|
1035
|
+
return { messages: data.messages, title: data.title };
|
|
1036
|
+
}
|
|
1037
|
+
function toTurns(messages) {
|
|
1038
|
+
const turns = [];
|
|
1039
|
+
for (const msg of messages) {
|
|
1040
|
+
const role = msg.role;
|
|
1041
|
+
if (role !== "user" && role !== "assistant") continue;
|
|
1042
|
+
const content = (msg.content ?? "").trim();
|
|
1043
|
+
if (content.length === 0) continue;
|
|
1044
|
+
turns.push({ role, content, timestamp: msg.timestamp });
|
|
1045
|
+
}
|
|
1046
|
+
return turns;
|
|
1047
|
+
}
|
|
1048
|
+
var cursorAdapter = {
|
|
1049
|
+
name: "cursor",
|
|
1050
|
+
async detect(filePath) {
|
|
1051
|
+
if (path11.extname(filePath).toLowerCase() !== CURSOR_EXTENSION) return false;
|
|
1052
|
+
const raw = await readFile10(filePath, "utf-8").catch(() => "");
|
|
1053
|
+
if (raw.trimStart()[0] !== "{") return false;
|
|
1054
|
+
try {
|
|
1055
|
+
const parsed = JSON.parse(raw);
|
|
1056
|
+
return isTabsExport(parsed) || isFlatExport(parsed);
|
|
1057
|
+
} catch {
|
|
1058
|
+
return false;
|
|
1059
|
+
}
|
|
1060
|
+
},
|
|
1061
|
+
async parse(filePath) {
|
|
1062
|
+
const raw = await readFile10(filePath, "utf-8");
|
|
1063
|
+
const parsed = parseJsonOrThrow(raw, filePath);
|
|
1064
|
+
if (!isTabsExport(parsed) && !isFlatExport(parsed)) {
|
|
1065
|
+
throw new Error(
|
|
1066
|
+
`Cursor session file does not match a known Cursor export schema: ${filePath}`
|
|
1067
|
+
);
|
|
1068
|
+
}
|
|
1069
|
+
const { messages, title: rawTitle } = extractMessagesAndTitle(parsed);
|
|
1070
|
+
const turns = toTurns(messages);
|
|
1071
|
+
const firstUser = turns.find((t) => t.role === "user");
|
|
1072
|
+
const timestamps = turns.filter((t) => t.timestamp != null).map((t) => t.timestamp);
|
|
1073
|
+
return {
|
|
1074
|
+
title: resolveSessionTitle(rawTitle, firstUser?.content, "Cursor Session"),
|
|
1075
|
+
adapter: "cursor",
|
|
1076
|
+
startedAt: timestamps[0],
|
|
1077
|
+
endedAt: timestamps[timestamps.length - 1],
|
|
1078
|
+
participantIdentity: "Cursor AI",
|
|
1079
|
+
turns
|
|
1080
|
+
};
|
|
1081
|
+
}
|
|
1082
|
+
};
|
|
1083
|
+
|
|
1084
|
+
// src/adapters/registry.ts
|
|
1085
|
+
var ADAPTERS = [claudeAdapter, codexAdapter, cursorAdapter];
|
|
1086
|
+
async function detectAdapter(filePath) {
|
|
1087
|
+
for (const adapter of ADAPTERS) {
|
|
1088
|
+
if (await adapter.detect(filePath)) return adapter;
|
|
1089
|
+
}
|
|
1090
|
+
return null;
|
|
1091
|
+
}
|
|
1092
|
+
async function parseSessionFile(filePath) {
|
|
1093
|
+
const adapter = await detectAdapter(filePath);
|
|
1094
|
+
if (!adapter) {
|
|
1095
|
+
throw new Error(
|
|
1096
|
+
`No session adapter recognised the file: ${filePath}
|
|
1097
|
+
Supported formats: ${ADAPTERS.map((a) => a.name).join(", ")}`
|
|
1098
|
+
);
|
|
1099
|
+
}
|
|
1100
|
+
const session = await adapter.parse(filePath);
|
|
1101
|
+
assertSessionHasUsableTurns(session, filePath);
|
|
1102
|
+
return session;
|
|
1103
|
+
}
|
|
1104
|
+
function assertSessionHasUsableTurns(session, filePath) {
|
|
1105
|
+
const hasUsableTurn = session.turns.some(
|
|
1106
|
+
(t) => (t.role === "user" || t.role === "assistant") && t.content.trim().length > 0
|
|
1107
|
+
);
|
|
1108
|
+
if (!hasUsableTurn) {
|
|
1109
|
+
throw new Error(
|
|
1110
|
+
`${session.adapter} session has no usable turns: ${filePath}
|
|
1111
|
+
The file matches the ${session.adapter} export shape, but no user or assistant message with content was found. Re-export the session or delete the file if it is empty.`
|
|
1112
|
+
);
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
function formatSessionAsMarkdown(session) {
|
|
1116
|
+
const lines = [];
|
|
1117
|
+
for (const turn of session.turns) {
|
|
1118
|
+
const label = turn.role === "user" ? "User" : session.participantIdentity ?? "Assistant";
|
|
1119
|
+
const heading = turn.timestamp ? `### ${label} _(${turn.timestamp})_` : `### ${label}`;
|
|
1120
|
+
lines.push(heading);
|
|
1121
|
+
lines.push("");
|
|
1122
|
+
lines.push(turn.content);
|
|
1123
|
+
lines.push("");
|
|
1124
|
+
}
|
|
1125
|
+
return lines.join("\n").trimEnd();
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
// src/commands/ingest-session.ts
|
|
1129
|
+
function buildSessionFrontmatter(session, sourcePath) {
|
|
1130
|
+
const meta = {
|
|
1131
|
+
title: session.title,
|
|
1132
|
+
source: sourcePath,
|
|
1133
|
+
adapter: session.adapter,
|
|
1134
|
+
ingestedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
1135
|
+
};
|
|
1136
|
+
if (session.startedAt) meta.sessionStartedAt = session.startedAt;
|
|
1137
|
+
if (session.endedAt) meta.sessionEndedAt = session.endedAt;
|
|
1138
|
+
if (session.participantIdentity) meta.participant = session.participantIdentity;
|
|
1139
|
+
return buildFrontmatter(meta);
|
|
1140
|
+
}
|
|
1141
|
+
async function saveSessionSource(session, sourcePath) {
|
|
1142
|
+
const frontmatter = buildSessionFrontmatter(session, sourcePath);
|
|
1143
|
+
const body = formatSessionAsMarkdown(session);
|
|
1144
|
+
const document = `${frontmatter}
|
|
1145
|
+
|
|
1146
|
+
${body}
|
|
1147
|
+
`;
|
|
1148
|
+
return saveSource(session.title, document, sourcePath);
|
|
1149
|
+
}
|
|
1150
|
+
async function ingestSessionFile(filePath) {
|
|
1151
|
+
status("*", info(`Ingesting session: ${filePath}`));
|
|
1152
|
+
const session = await parseSessionFile(filePath);
|
|
1153
|
+
const savedPath = await saveSessionSource(session, filePath);
|
|
1154
|
+
status(
|
|
1155
|
+
"+",
|
|
1156
|
+
success(
|
|
1157
|
+
`Saved ${bold(path12.basename(savedPath))} [${session.adapter}] \u2192 ${source(savedPath)}`
|
|
1158
|
+
)
|
|
1159
|
+
);
|
|
1160
|
+
return {
|
|
1161
|
+
filename: path12.basename(savedPath),
|
|
1162
|
+
adapter: session.adapter,
|
|
1163
|
+
title: session.title,
|
|
1164
|
+
source: filePath
|
|
1165
|
+
};
|
|
1166
|
+
}
|
|
1167
|
+
async function listDirectoryFiles(dirPath) {
|
|
1168
|
+
const entries = await readdir(dirPath);
|
|
1169
|
+
const files = [];
|
|
1170
|
+
for (const entry of entries) {
|
|
1171
|
+
const full = path12.join(dirPath, entry);
|
|
1172
|
+
const info2 = await stat(full);
|
|
1173
|
+
if (info2.isFile()) files.push(full);
|
|
1174
|
+
}
|
|
1175
|
+
return files;
|
|
1176
|
+
}
|
|
1177
|
+
async function ingestDirectory(dirPath) {
|
|
1178
|
+
const files = await listDirectoryFiles(dirPath);
|
|
1179
|
+
if (files.length === 0) {
|
|
1180
|
+
throw new Error(`No files found in directory: ${dirPath}`);
|
|
1181
|
+
}
|
|
1182
|
+
status("*", info(`Scanning ${files.length} file(s) in: ${dirPath}`));
|
|
1183
|
+
let imported = 0;
|
|
1184
|
+
let skipped = 0;
|
|
1185
|
+
for (const file of files) {
|
|
1186
|
+
try {
|
|
1187
|
+
await ingestSessionFile(file);
|
|
1188
|
+
imported++;
|
|
1189
|
+
} catch (err) {
|
|
1190
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1191
|
+
status("!", warn(`Skipped ${path12.basename(file)}: ${message}`));
|
|
1192
|
+
skipped++;
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
if (imported === 0) {
|
|
1196
|
+
throw new Error(
|
|
1197
|
+
`No sessions imported from ${dirPath} (${skipped} file(s) skipped). Check that at least one file is in a supported session format.`
|
|
1198
|
+
);
|
|
1199
|
+
}
|
|
1200
|
+
status(
|
|
1201
|
+
"\u2192",
|
|
1202
|
+
dim(`Imported ${imported} session(s), skipped ${skipped}.`)
|
|
1203
|
+
);
|
|
1204
|
+
}
|
|
1205
|
+
async function ingestSession(targetPath) {
|
|
1206
|
+
const info2 = await stat(targetPath).catch(() => {
|
|
1207
|
+
throw new Error(`Path not found: ${targetPath}`);
|
|
1208
|
+
});
|
|
1209
|
+
if (info2.isDirectory()) {
|
|
1210
|
+
await ingestDirectory(targetPath);
|
|
1211
|
+
} else {
|
|
1212
|
+
await ingestSessionFile(targetPath);
|
|
1213
|
+
}
|
|
1214
|
+
status("\u2192", dim("Next: llmwiki compile"));
|
|
1215
|
+
}
|
|
1216
|
+
|
|
824
1217
|
// src/commands/compile.ts
|
|
825
1218
|
import { existsSync as existsSync7 } from "fs";
|
|
826
1219
|
|
|
827
1220
|
// src/compiler/index.ts
|
|
828
|
-
import { readFile as
|
|
829
|
-
import
|
|
1221
|
+
import { readFile as readFile18 } from "fs/promises";
|
|
1222
|
+
import path26 from "path";
|
|
830
1223
|
|
|
831
1224
|
// src/utils/state.ts
|
|
832
|
-
import { readFile as
|
|
1225
|
+
import { readFile as readFile11, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
|
|
833
1226
|
import { existsSync } from "fs";
|
|
834
|
-
import
|
|
1227
|
+
import path13 from "path";
|
|
835
1228
|
function emptyState() {
|
|
836
1229
|
return { version: 1, indexHash: "", sources: {} };
|
|
837
1230
|
}
|
|
838
1231
|
async function readState(root) {
|
|
839
|
-
const filePath =
|
|
1232
|
+
const filePath = path13.join(root, STATE_FILE);
|
|
840
1233
|
if (!existsSync(filePath)) {
|
|
841
1234
|
return emptyState();
|
|
842
1235
|
}
|
|
843
1236
|
try {
|
|
844
|
-
const raw = await
|
|
1237
|
+
const raw = await readFile11(filePath, "utf-8");
|
|
845
1238
|
return JSON.parse(raw);
|
|
846
1239
|
} catch {
|
|
847
1240
|
const bakPath = filePath + ".bak";
|
|
@@ -851,9 +1244,9 @@ async function readState(root) {
|
|
|
851
1244
|
}
|
|
852
1245
|
}
|
|
853
1246
|
async function writeState(root, state) {
|
|
854
|
-
const dir =
|
|
1247
|
+
const dir = path13.join(root, LLMWIKI_DIR);
|
|
855
1248
|
await mkdir3(dir, { recursive: true });
|
|
856
|
-
const filePath =
|
|
1249
|
+
const filePath = path13.join(root, STATE_FILE);
|
|
857
1250
|
const tmpPath = filePath + ".tmp";
|
|
858
1251
|
await writeFile3(tmpPath, JSON.stringify(state, null, 2), "utf-8");
|
|
859
1252
|
await rename2(tmpPath, filePath);
|
|
@@ -870,18 +1263,18 @@ async function removeSourceState(root, sourceFile) {
|
|
|
870
1263
|
}
|
|
871
1264
|
|
|
872
1265
|
// src/compiler/source-state.ts
|
|
873
|
-
import
|
|
1266
|
+
import path15 from "path";
|
|
874
1267
|
|
|
875
1268
|
// src/compiler/hasher.ts
|
|
876
|
-
import { createHash } from "crypto";
|
|
877
|
-
import { readFile as
|
|
878
|
-
import
|
|
1269
|
+
import { createHash as createHash2 } from "crypto";
|
|
1270
|
+
import { readFile as readFile12, readdir as readdir2 } from "fs/promises";
|
|
1271
|
+
import path14 from "path";
|
|
879
1272
|
async function hashFile(filePath) {
|
|
880
|
-
const content = await
|
|
881
|
-
return
|
|
1273
|
+
const content = await readFile12(filePath, "utf-8");
|
|
1274
|
+
return createHash2("sha256").update(content).digest("hex");
|
|
882
1275
|
}
|
|
883
1276
|
async function detectChanges(root, prevState) {
|
|
884
|
-
const sourcesPath =
|
|
1277
|
+
const sourcesPath = path14.join(root, SOURCES_DIR);
|
|
885
1278
|
const currentFiles = await listSourceFiles(sourcesPath);
|
|
886
1279
|
const changes = [];
|
|
887
1280
|
for (const file of currentFiles) {
|
|
@@ -894,14 +1287,14 @@ async function detectChanges(root, prevState) {
|
|
|
894
1287
|
}
|
|
895
1288
|
async function listSourceFiles(sourcesPath) {
|
|
896
1289
|
try {
|
|
897
|
-
const entries = await
|
|
1290
|
+
const entries = await readdir2(sourcesPath);
|
|
898
1291
|
return entries.filter((f) => f.endsWith(".md"));
|
|
899
1292
|
} catch {
|
|
900
1293
|
return [];
|
|
901
1294
|
}
|
|
902
1295
|
}
|
|
903
1296
|
async function classifyFile(root, file, prevState) {
|
|
904
|
-
const filePath =
|
|
1297
|
+
const filePath = path14.join(root, SOURCES_DIR, file);
|
|
905
1298
|
const hash = await hashFile(filePath);
|
|
906
1299
|
const prev = prevState.sources[file];
|
|
907
1300
|
if (!prev) return "new";
|
|
@@ -924,7 +1317,7 @@ async function buildExtractionSourceStates(root, extractions) {
|
|
|
924
1317
|
return snapshot;
|
|
925
1318
|
}
|
|
926
1319
|
async function buildEntry(root, result, compiledAt) {
|
|
927
|
-
const filePath =
|
|
1320
|
+
const filePath = path15.join(root, SOURCES_DIR, result.sourceFile);
|
|
928
1321
|
const hash = await hashFile(filePath);
|
|
929
1322
|
return {
|
|
930
1323
|
hash,
|
|
@@ -1162,8 +1555,8 @@ async function callClaude(options) {
|
|
|
1162
1555
|
}
|
|
1163
1556
|
|
|
1164
1557
|
// src/utils/lock.ts
|
|
1165
|
-
import { open, readFile as
|
|
1166
|
-
import
|
|
1558
|
+
import { open, readFile as readFile13, unlink, mkdir as mkdir4 } from "fs/promises";
|
|
1559
|
+
import path16 from "path";
|
|
1167
1560
|
var RECLAIM_SUFFIX = ".reclaim";
|
|
1168
1561
|
var MAX_ACQUIRE_ATTEMPTS = 2;
|
|
1169
1562
|
function isProcessAlive(pid) {
|
|
@@ -1175,8 +1568,8 @@ function isProcessAlive(pid) {
|
|
|
1175
1568
|
}
|
|
1176
1569
|
}
|
|
1177
1570
|
async function acquireLock(root) {
|
|
1178
|
-
const lockPath =
|
|
1179
|
-
await mkdir4(
|
|
1571
|
+
const lockPath = path16.join(root, LOCK_FILE);
|
|
1572
|
+
await mkdir4(path16.join(root, LLMWIKI_DIR), { recursive: true });
|
|
1180
1573
|
for (let attempt = 0; attempt < MAX_ACQUIRE_ATTEMPTS; attempt++) {
|
|
1181
1574
|
const created = await tryCreateLock(lockPath);
|
|
1182
1575
|
if (created) return true;
|
|
@@ -1239,7 +1632,7 @@ async function tryCreateLock(lockPath) {
|
|
|
1239
1632
|
}
|
|
1240
1633
|
async function isLockStale(lockPath) {
|
|
1241
1634
|
try {
|
|
1242
|
-
const content = await
|
|
1635
|
+
const content = await readFile13(lockPath, "utf-8");
|
|
1243
1636
|
const pid = parseInt(content.trim(), 10);
|
|
1244
1637
|
if (isNaN(pid)) return true;
|
|
1245
1638
|
return !isProcessAlive(pid);
|
|
@@ -1248,14 +1641,32 @@ async function isLockStale(lockPath) {
|
|
|
1248
1641
|
}
|
|
1249
1642
|
}
|
|
1250
1643
|
async function releaseLock(root) {
|
|
1251
|
-
const lockPath =
|
|
1644
|
+
const lockPath = path16.join(root, LOCK_FILE);
|
|
1252
1645
|
try {
|
|
1253
1646
|
await unlink(lockPath);
|
|
1254
1647
|
} catch {
|
|
1255
1648
|
}
|
|
1256
1649
|
}
|
|
1257
1650
|
|
|
1651
|
+
// src/utils/output-language.ts
|
|
1652
|
+
var LANG_ENV_VAR = "LLMWIKI_OUTPUT_LANG";
|
|
1653
|
+
function getOutputLanguage() {
|
|
1654
|
+
const raw = process.env[LANG_ENV_VAR];
|
|
1655
|
+
if (!raw) return null;
|
|
1656
|
+
const trimmed = raw.trim();
|
|
1657
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
1658
|
+
}
|
|
1659
|
+
function languageDirective() {
|
|
1660
|
+
const lang = getOutputLanguage();
|
|
1661
|
+
if (!lang) return "";
|
|
1662
|
+
return `Write the output in ${lang}.`;
|
|
1663
|
+
}
|
|
1664
|
+
|
|
1258
1665
|
// src/compiler/prompts.ts
|
|
1666
|
+
function withLangLine(...lines) {
|
|
1667
|
+
const lang = languageDirective();
|
|
1668
|
+
return lang ? [...lines, lang] : lines;
|
|
1669
|
+
}
|
|
1259
1670
|
var PROVENANCE_STATE_VALUES = [
|
|
1260
1671
|
"extracted",
|
|
1261
1672
|
"merged",
|
|
@@ -1310,10 +1721,6 @@ var CONCEPT_EXTRACTION_TOOL = {
|
|
|
1310
1721
|
required: ["slug"]
|
|
1311
1722
|
},
|
|
1312
1723
|
description: "Slugs of other concepts whose evidence contradicts this one."
|
|
1313
|
-
},
|
|
1314
|
-
inferred_paragraphs: {
|
|
1315
|
-
type: "integer",
|
|
1316
|
-
description: "Estimated number of paragraphs in the page that will be inferred rather than directly cited."
|
|
1317
1724
|
}
|
|
1318
1725
|
},
|
|
1319
1726
|
required: ["concept", "summary", "is_new"]
|
|
@@ -1330,11 +1737,13 @@ Here is the existing wiki index \u2014 avoid duplicating concepts already covere
|
|
|
1330
1737
|
|
|
1331
1738
|
${existingIndex}` : "\n\nNo existing wiki pages yet.";
|
|
1332
1739
|
return [
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1740
|
+
...withLangLine(
|
|
1741
|
+
"You are a knowledge extraction engine. Analyze the following source document",
|
|
1742
|
+
"and identify 3-8 distinct, meaningful concepts worth documenting as wiki pages.",
|
|
1743
|
+
"Each concept should be a standalone topic that someone might look up.",
|
|
1744
|
+
"Focus on key ideas, techniques, patterns, or entities \u2014 not trivial details.",
|
|
1745
|
+
"Use the extract_concepts tool to return your findings."
|
|
1746
|
+
),
|
|
1338
1747
|
"",
|
|
1339
1748
|
"For every concept, emit provenance metadata so downstream tools can reason",
|
|
1340
1749
|
"about reliability:",
|
|
@@ -1344,8 +1753,6 @@ ${existingIndex}` : "\n\nNo existing wiki pages yet.";
|
|
|
1344
1753
|
" or 'ambiguous' if the source is contradictory or unclear.",
|
|
1345
1754
|
" - contradicted_by: slugs of other concepts (in this batch or the index)",
|
|
1346
1755
|
" whose evidence conflicts with this one.",
|
|
1347
|
-
" - inferred_paragraphs: estimated number of paragraphs in the resulting",
|
|
1348
|
-
" page that will be inferred rather than directly citable.",
|
|
1349
1756
|
indexSection,
|
|
1350
1757
|
"\n\n--- SOURCE DOCUMENT ---\n\n",
|
|
1351
1758
|
sourceContent
|
|
@@ -1363,11 +1770,13 @@ Related wiki pages for cross-referencing:
|
|
|
1363
1770
|
|
|
1364
1771
|
${relatedPages}` : "";
|
|
1365
1772
|
return [
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1773
|
+
...withLangLine(
|
|
1774
|
+
`You are a wiki author. Write a clear, well-structured markdown page about "${concept}".`,
|
|
1775
|
+
"Draw facts only from the provided source material.",
|
|
1776
|
+
"Include a ## Sources section at the end listing the source document.",
|
|
1777
|
+
"Suggest [[wikilinks]] to related concepts where appropriate.",
|
|
1778
|
+
"Write in a neutral, informative tone. Be concise but thorough."
|
|
1779
|
+
),
|
|
1371
1780
|
"",
|
|
1372
1781
|
"Source attribution: at the end of each prose paragraph, append a citation",
|
|
1373
1782
|
"marker showing which source file(s) the paragraph drew from.",
|
|
@@ -1384,7 +1793,7 @@ ${relatedPages}` : "";
|
|
|
1384
1793
|
"",
|
|
1385
1794
|
"If a paragraph is your inference rather than a direct extraction, leave it",
|
|
1386
1795
|
"uncited \u2014 downstream lint rules will count uncited paragraphs as 'inferred'",
|
|
1387
|
-
"
|
|
1796
|
+
"so lint can surface excess-inferred-paragraphs warnings on review.",
|
|
1388
1797
|
existingSection,
|
|
1389
1798
|
relatedSection,
|
|
1390
1799
|
"\n\n--- SOURCE MATERIAL ---\n\n",
|
|
@@ -1416,20 +1825,21 @@ function mapRawConcept(c) {
|
|
|
1416
1825
|
tags: Array.isArray(c.tags) ? c.tags : void 0,
|
|
1417
1826
|
confidence: typeof c.confidence === "number" ? c.confidence : void 0,
|
|
1418
1827
|
provenanceState: provenance,
|
|
1419
|
-
contradictedBy: coerceContradictedBy(c.contradicted_by)
|
|
1420
|
-
inferredParagraphs: typeof c.inferred_paragraphs === "number" && Number.isInteger(c.inferred_paragraphs) && c.inferred_paragraphs >= 0 ? c.inferred_paragraphs : void 0
|
|
1828
|
+
contradictedBy: coerceContradictedBy(c.contradicted_by)
|
|
1421
1829
|
};
|
|
1422
1830
|
}
|
|
1423
1831
|
function buildSeedPagePrompt(seed, rule, relatedPagesContent) {
|
|
1424
1832
|
const minLinks = rule.minWikilinks;
|
|
1425
1833
|
const linkExpectation = minLinks > 0 ? `Include at least ${minLinks} [[wikilinks]] to related pages.` : "Use [[wikilinks]] when referencing other pages.";
|
|
1426
1834
|
return [
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1835
|
+
...withLangLine(
|
|
1836
|
+
`You are a wiki author. Write a ${seed.kind} page titled "${seed.title}".`,
|
|
1837
|
+
`Page-kind guidance: ${rule.description}`,
|
|
1838
|
+
`Summary line for context: ${seed.summary}`,
|
|
1839
|
+
"Draw facts only from the related wiki pages provided below.",
|
|
1840
|
+
linkExpectation,
|
|
1841
|
+
"Write in a neutral, informative tone. Be concise but thorough."
|
|
1842
|
+
),
|
|
1433
1843
|
"\n\n--- RELATED PAGES ---\n\n",
|
|
1434
1844
|
relatedPagesContent
|
|
1435
1845
|
].join("\n");
|
|
@@ -1491,8 +1901,8 @@ function buildDefaultSchema() {
|
|
|
1491
1901
|
|
|
1492
1902
|
// src/schema/loader.ts
|
|
1493
1903
|
import { existsSync as existsSync2 } from "fs";
|
|
1494
|
-
import { readFile as
|
|
1495
|
-
import
|
|
1904
|
+
import { readFile as readFile14 } from "fs/promises";
|
|
1905
|
+
import path17 from "path";
|
|
1496
1906
|
import yaml2 from "js-yaml";
|
|
1497
1907
|
var SCHEMA_CANDIDATE_PATHS = [
|
|
1498
1908
|
".llmwiki/schema.json",
|
|
@@ -1503,7 +1913,7 @@ var SCHEMA_CANDIDATE_PATHS = [
|
|
|
1503
1913
|
];
|
|
1504
1914
|
function findSchemaPath(root) {
|
|
1505
1915
|
for (const candidate of SCHEMA_CANDIDATE_PATHS) {
|
|
1506
|
-
const absolute =
|
|
1916
|
+
const absolute = path17.join(root, candidate);
|
|
1507
1917
|
if (existsSync2(absolute)) return absolute;
|
|
1508
1918
|
}
|
|
1509
1919
|
return null;
|
|
@@ -1556,12 +1966,12 @@ async function loadSchema(root) {
|
|
|
1556
1966
|
const defaults = buildDefaultSchema();
|
|
1557
1967
|
const schemaPath = findSchemaPath(root);
|
|
1558
1968
|
if (!schemaPath) return defaults;
|
|
1559
|
-
const raw = await
|
|
1969
|
+
const raw = await readFile14(schemaPath, "utf-8");
|
|
1560
1970
|
const parsed = parseSchemaFile(schemaPath, raw);
|
|
1561
1971
|
return applyOverrides(defaults, parsed, schemaPath);
|
|
1562
1972
|
}
|
|
1563
1973
|
function defaultSchemaInitPath(root) {
|
|
1564
|
-
return
|
|
1974
|
+
return path17.join(root, SCHEMA_CANDIDATE_PATHS[0]);
|
|
1565
1975
|
}
|
|
1566
1976
|
|
|
1567
1977
|
// src/schema/helpers.ts
|
|
@@ -1733,7 +2143,7 @@ async function freezeFailedExtractions(root, results, frozenSlugs) {
|
|
|
1733
2143
|
}
|
|
1734
2144
|
|
|
1735
2145
|
// src/compiler/orphan.ts
|
|
1736
|
-
import
|
|
2146
|
+
import path18 from "path";
|
|
1737
2147
|
async function markOrphaned(root, sourceFile, state) {
|
|
1738
2148
|
const sourceEntry = state.sources[sourceFile];
|
|
1739
2149
|
if (!sourceEntry) return;
|
|
@@ -1759,7 +2169,7 @@ async function orphanUnownedFrozenPages(root, frozenSlugs) {
|
|
|
1759
2169
|
}
|
|
1760
2170
|
}
|
|
1761
2171
|
async function orphanPage(root, slug, reason) {
|
|
1762
|
-
const pagePath =
|
|
2172
|
+
const pagePath = path18.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
1763
2173
|
const content = await safeReadFile(pagePath);
|
|
1764
2174
|
if (!content) return;
|
|
1765
2175
|
const { meta } = parseFrontmatter(content);
|
|
@@ -1770,18 +2180,18 @@ async function orphanPage(root, slug, reason) {
|
|
|
1770
2180
|
}
|
|
1771
2181
|
|
|
1772
2182
|
// src/compiler/resolver.ts
|
|
1773
|
-
import { readdir as
|
|
1774
|
-
import
|
|
2183
|
+
import { readdir as readdir3, readFile as readFile15 } from "fs/promises";
|
|
2184
|
+
import path19 from "path";
|
|
1775
2185
|
import { existsSync as existsSync3 } from "fs";
|
|
1776
2186
|
async function buildTitleIndex(root) {
|
|
1777
|
-
const conceptsDir =
|
|
2187
|
+
const conceptsDir = path19.join(root, CONCEPTS_DIR);
|
|
1778
2188
|
if (!existsSync3(conceptsDir)) return [];
|
|
1779
|
-
const files = await
|
|
2189
|
+
const files = await readdir3(conceptsDir);
|
|
1780
2190
|
const pages = [];
|
|
1781
2191
|
for (const file of files) {
|
|
1782
2192
|
if (!file.endsWith(".md")) continue;
|
|
1783
|
-
const filePath =
|
|
1784
|
-
const content = await
|
|
2193
|
+
const filePath = path19.join(conceptsDir, file);
|
|
2194
|
+
const content = await readFile15(filePath, "utf-8");
|
|
1785
2195
|
const { meta } = parseFrontmatter(content);
|
|
1786
2196
|
if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
|
|
1787
2197
|
pages.push({
|
|
@@ -1867,7 +2277,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
|
|
|
1867
2277
|
let count = 0;
|
|
1868
2278
|
for (const page of titleIndex) {
|
|
1869
2279
|
if (newSlugs.includes(page.slug)) continue;
|
|
1870
|
-
const content = await
|
|
2280
|
+
const content = await readFile15(page.filePath, "utf-8");
|
|
1871
2281
|
const { body } = parseFrontmatter(content);
|
|
1872
2282
|
const linked = addWikilinks(body, newTitles, page.title);
|
|
1873
2283
|
if (linked !== body) {
|
|
@@ -1879,7 +2289,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
|
|
|
1879
2289
|
return count;
|
|
1880
2290
|
}
|
|
1881
2291
|
async function linkPage(page, titleIndex) {
|
|
1882
|
-
const content = await
|
|
2292
|
+
const content = await readFile15(page.filePath, "utf-8");
|
|
1883
2293
|
const { body } = parseFrontmatter(content);
|
|
1884
2294
|
const linked = addWikilinks(body, titleIndex, page.title);
|
|
1885
2295
|
if (linked === body) return false;
|
|
@@ -1889,18 +2299,18 @@ async function linkPage(page, titleIndex) {
|
|
|
1889
2299
|
}
|
|
1890
2300
|
|
|
1891
2301
|
// src/compiler/indexgen.ts
|
|
1892
|
-
import { readdir as
|
|
1893
|
-
import
|
|
2302
|
+
import { readdir as readdir4 } from "fs/promises";
|
|
2303
|
+
import path20 from "path";
|
|
1894
2304
|
async function generateIndex(root) {
|
|
1895
2305
|
status("*", info("Generating index..."));
|
|
1896
|
-
const conceptsPath =
|
|
1897
|
-
const queriesPath =
|
|
2306
|
+
const conceptsPath = path20.join(root, CONCEPTS_DIR);
|
|
2307
|
+
const queriesPath = path20.join(root, QUERIES_DIR);
|
|
1898
2308
|
const concepts = await collectPageSummaries(conceptsPath);
|
|
1899
2309
|
const queries = await collectPageSummaries(queriesPath);
|
|
1900
2310
|
concepts.sort((a, b) => a.title.localeCompare(b.title));
|
|
1901
2311
|
queries.sort((a, b) => a.title.localeCompare(b.title));
|
|
1902
2312
|
const indexContent = buildIndexContent(concepts, queries);
|
|
1903
|
-
const indexPath =
|
|
2313
|
+
const indexPath = path20.join(root, INDEX_FILE);
|
|
1904
2314
|
await atomicWrite(indexPath, indexContent);
|
|
1905
2315
|
const total = concepts.length + queries.length;
|
|
1906
2316
|
status("+", success(`Index updated with ${total} pages.`));
|
|
@@ -1908,13 +2318,13 @@ async function generateIndex(root) {
|
|
|
1908
2318
|
async function scanWikiPages(dirPath) {
|
|
1909
2319
|
let files;
|
|
1910
2320
|
try {
|
|
1911
|
-
files = await
|
|
2321
|
+
files = await readdir4(dirPath);
|
|
1912
2322
|
} catch {
|
|
1913
2323
|
return [];
|
|
1914
2324
|
}
|
|
1915
2325
|
const scanned = [];
|
|
1916
2326
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
1917
|
-
const content = await safeReadFile(
|
|
2327
|
+
const content = await safeReadFile(path20.join(dirPath, file));
|
|
1918
2328
|
const { meta } = parseFrontmatter(content);
|
|
1919
2329
|
scanned.push({ slug: file.replace(/\.md$/, ""), meta });
|
|
1920
2330
|
}
|
|
@@ -1949,9 +2359,45 @@ function buildIndexContent(concepts, queries) {
|
|
|
1949
2359
|
return lines.join("\n");
|
|
1950
2360
|
}
|
|
1951
2361
|
|
|
2362
|
+
// src/compiler/prompt-budget.ts
|
|
2363
|
+
var TRUNCATION_MARKER = "\n\n[\u2026truncated for prompt budget \u2014 see #39\u2026]";
|
|
2364
|
+
function resolvePromptBudgetChars() {
|
|
2365
|
+
const raw = process.env[PROMPT_BUDGET_ENV_VAR];
|
|
2366
|
+
if (!raw) return DEFAULT_PROMPT_BUDGET_CHARS;
|
|
2367
|
+
const parsed = Number.parseInt(raw, 10);
|
|
2368
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_PROMPT_BUDGET_CHARS;
|
|
2369
|
+
return parsed;
|
|
2370
|
+
}
|
|
2371
|
+
function buildBudgetedCombinedContent(concept, slices) {
|
|
2372
|
+
const budget = resolvePromptBudgetChars();
|
|
2373
|
+
const totalRaw = slices.reduce((sum, s) => sum + s.content.length, 0);
|
|
2374
|
+
if (totalRaw <= budget) {
|
|
2375
|
+
return formatSlices(slices);
|
|
2376
|
+
}
|
|
2377
|
+
const perSource = Math.max(1, Math.floor(budget / slices.length));
|
|
2378
|
+
warnTruncation(concept, totalRaw, slices.length, perSource, budget);
|
|
2379
|
+
const trimmed = slices.map(
|
|
2380
|
+
(s) => s.content.length > perSource ? { ...s, content: s.content.slice(0, perSource) + TRUNCATION_MARKER } : s
|
|
2381
|
+
);
|
|
2382
|
+
return formatSlices(trimmed);
|
|
2383
|
+
}
|
|
2384
|
+
function formatSlices(slices) {
|
|
2385
|
+
return slices.map((s) => `--- SOURCE: ${s.file} ---
|
|
2386
|
+
|
|
2387
|
+
${s.content}`).join("\n\n");
|
|
2388
|
+
}
|
|
2389
|
+
function warnTruncation(concept, totalRaw, sourceCount, perSource, budget) {
|
|
2390
|
+
status(
|
|
2391
|
+
"!",
|
|
2392
|
+
warn(
|
|
2393
|
+
`Combined source content for "${concept}" (${totalRaw.toLocaleString()} chars across ${sourceCount} sources) exceeds the ${budget.toLocaleString()}-char prompt budget; truncating each source to ~${perSource.toLocaleString()} chars. Raise via ${PROMPT_BUDGET_ENV_VAR} when running against larger-context models.`
|
|
2394
|
+
)
|
|
2395
|
+
);
|
|
2396
|
+
}
|
|
2397
|
+
|
|
1952
2398
|
// src/compiler/obsidian.ts
|
|
1953
|
-
import { readdir as
|
|
1954
|
-
import
|
|
2399
|
+
import { readdir as readdir5 } from "fs/promises";
|
|
2400
|
+
import path21 from "path";
|
|
1955
2401
|
var ABBREVIATION_MIN_WORDS = 3;
|
|
1956
2402
|
var SWAP_CONJUNCTIONS = [" and ", " or "];
|
|
1957
2403
|
function addObsidianMeta(frontmatter, conceptTitle, tags) {
|
|
@@ -1993,23 +2439,23 @@ function generateAbbreviation(title) {
|
|
|
1993
2439
|
return abbreviation;
|
|
1994
2440
|
}
|
|
1995
2441
|
async function generateMOC(root) {
|
|
1996
|
-
const conceptsPath =
|
|
2442
|
+
const conceptsPath = path21.join(root, CONCEPTS_DIR);
|
|
1997
2443
|
const pages = await loadConceptPages(conceptsPath);
|
|
1998
2444
|
const tagGroups = groupPagesByTag(pages);
|
|
1999
2445
|
const content = buildMOCContent(tagGroups);
|
|
2000
|
-
await atomicWrite(
|
|
2446
|
+
await atomicWrite(path21.join(root, MOC_FILE), content);
|
|
2001
2447
|
}
|
|
2002
2448
|
async function loadConceptPages(conceptsPath) {
|
|
2003
2449
|
let files;
|
|
2004
2450
|
try {
|
|
2005
|
-
files = await
|
|
2451
|
+
files = await readdir5(conceptsPath);
|
|
2006
2452
|
} catch {
|
|
2007
2453
|
return [];
|
|
2008
2454
|
}
|
|
2009
2455
|
const pages = [];
|
|
2010
2456
|
for (const file of files) {
|
|
2011
2457
|
if (!file.endsWith(".md")) continue;
|
|
2012
|
-
const content = await safeReadFile(
|
|
2458
|
+
const content = await safeReadFile(path21.join(conceptsPath, file));
|
|
2013
2459
|
if (!content) continue;
|
|
2014
2460
|
const { meta } = parseFrontmatter(content);
|
|
2015
2461
|
if (meta.orphaned) continue;
|
|
@@ -2060,14 +2506,14 @@ function buildMOCContent(tagGroups) {
|
|
|
2060
2506
|
}
|
|
2061
2507
|
|
|
2062
2508
|
// src/utils/embeddings.ts
|
|
2063
|
-
import { readFile as
|
|
2509
|
+
import { readFile as readFile16, readdir as readdir6 } from "fs/promises";
|
|
2064
2510
|
import { existsSync as existsSync4 } from "fs";
|
|
2065
|
-
import
|
|
2511
|
+
import path22 from "path";
|
|
2066
2512
|
|
|
2067
2513
|
// src/utils/retrieval.ts
|
|
2068
|
-
import { createHash as
|
|
2514
|
+
import { createHash as createHash3 } from "crypto";
|
|
2069
2515
|
function hashChunkText(text) {
|
|
2070
|
-
return
|
|
2516
|
+
return createHash3("sha256").update(text, "utf8").digest("hex").slice(0, 16);
|
|
2071
2517
|
}
|
|
2072
2518
|
function splitIntoChunks(body) {
|
|
2073
2519
|
const paragraphs = extractParagraphs(body);
|
|
@@ -2227,13 +2673,13 @@ function findTopKChunks(queryVec, chunks, k) {
|
|
|
2227
2673
|
return scored.slice(0, k);
|
|
2228
2674
|
}
|
|
2229
2675
|
async function readEmbeddingStore(root) {
|
|
2230
|
-
const filePath =
|
|
2676
|
+
const filePath = path22.join(root, EMBEDDINGS_FILE);
|
|
2231
2677
|
if (!existsSync4(filePath)) return null;
|
|
2232
|
-
const raw = await
|
|
2678
|
+
const raw = await readFile16(filePath, "utf-8");
|
|
2233
2679
|
return JSON.parse(raw);
|
|
2234
2680
|
}
|
|
2235
2681
|
async function writeEmbeddingStore(root, store) {
|
|
2236
|
-
const filePath =
|
|
2682
|
+
const filePath = path22.join(root, EMBEDDINGS_FILE);
|
|
2237
2683
|
await atomicWrite(filePath, JSON.stringify(store, null, 2));
|
|
2238
2684
|
}
|
|
2239
2685
|
async function findRelevantPages(root, question) {
|
|
@@ -2265,10 +2711,10 @@ async function loadActiveStore(root, hasContent) {
|
|
|
2265
2711
|
async function collectPageRecords(root) {
|
|
2266
2712
|
const records = [];
|
|
2267
2713
|
for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
|
|
2268
|
-
const absDir =
|
|
2714
|
+
const absDir = path22.join(root, dir);
|
|
2269
2715
|
let files;
|
|
2270
2716
|
try {
|
|
2271
|
-
files = await
|
|
2717
|
+
files = await readdir6(absDir);
|
|
2272
2718
|
} catch {
|
|
2273
2719
|
continue;
|
|
2274
2720
|
}
|
|
@@ -2280,7 +2726,7 @@ async function collectPageRecords(root) {
|
|
|
2280
2726
|
return records;
|
|
2281
2727
|
}
|
|
2282
2728
|
async function readPageRecord(absDir, file) {
|
|
2283
|
-
const content = await safeReadFile(
|
|
2729
|
+
const content = await safeReadFile(path22.join(absDir, file));
|
|
2284
2730
|
const { meta, body } = parseFrontmatter(content);
|
|
2285
2731
|
if (meta.orphaned || typeof meta.title !== "string") return null;
|
|
2286
2732
|
return {
|
|
@@ -2442,9 +2888,9 @@ function shouldRunEmbedding(modelChanged, toEmbed, previousEntries, previousChun
|
|
|
2442
2888
|
}
|
|
2443
2889
|
|
|
2444
2890
|
// src/compiler/candidates.ts
|
|
2445
|
-
import { readdir as
|
|
2891
|
+
import { readdir as readdir7, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
|
|
2446
2892
|
import { existsSync as existsSync5 } from "fs";
|
|
2447
|
-
import
|
|
2893
|
+
import path23 from "path";
|
|
2448
2894
|
import { randomBytes } from "crypto";
|
|
2449
2895
|
var ID_SUFFIX_BYTES = 4;
|
|
2450
2896
|
var CANDIDATE_EXT = ".json";
|
|
@@ -2453,10 +2899,10 @@ function buildCandidateId(slug) {
|
|
|
2453
2899
|
return `${slug}-${suffix}`;
|
|
2454
2900
|
}
|
|
2455
2901
|
function candidatePath(root, id) {
|
|
2456
|
-
return
|
|
2902
|
+
return path23.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
|
|
2457
2903
|
}
|
|
2458
2904
|
function archivePath(root, id) {
|
|
2459
|
-
return
|
|
2905
|
+
return path23.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
|
|
2460
2906
|
}
|
|
2461
2907
|
async function writeCandidate(root, draft) {
|
|
2462
2908
|
const candidate = {
|
|
@@ -2468,7 +2914,8 @@ async function writeCandidate(root, draft) {
|
|
|
2468
2914
|
body: draft.body,
|
|
2469
2915
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2470
2916
|
...draft.sourceStates ? { sourceStates: draft.sourceStates } : {},
|
|
2471
|
-
...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {}
|
|
2917
|
+
...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {},
|
|
2918
|
+
...draft.provenanceViolations ? { provenanceViolations: draft.provenanceViolations } : {}
|
|
2472
2919
|
};
|
|
2473
2920
|
await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
|
|
2474
2921
|
return candidate;
|
|
@@ -2507,9 +2954,9 @@ function isValidCandidate(value) {
|
|
|
2507
2954
|
return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
|
|
2508
2955
|
}
|
|
2509
2956
|
async function listCandidates(root) {
|
|
2510
|
-
const dir =
|
|
2957
|
+
const dir = path23.join(root, CANDIDATES_DIR);
|
|
2511
2958
|
if (!existsSync5(dir)) return [];
|
|
2512
|
-
const entries = await
|
|
2959
|
+
const entries = await readdir7(dir, { withFileTypes: true });
|
|
2513
2960
|
const candidates = [];
|
|
2514
2961
|
for (const entry of entries) {
|
|
2515
2962
|
if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_EXT)) continue;
|
|
@@ -2534,7 +2981,7 @@ async function archiveCandidate(root, id) {
|
|
|
2534
2981
|
const sourcePath = candidatePath(root, id);
|
|
2535
2982
|
if (!existsSync5(sourcePath)) return false;
|
|
2536
2983
|
const target = archivePath(root, id);
|
|
2537
|
-
await mkdir5(
|
|
2984
|
+
await mkdir5(path23.dirname(target), { recursive: true });
|
|
2538
2985
|
try {
|
|
2539
2986
|
await rename3(sourcePath, target);
|
|
2540
2987
|
} catch {
|
|
@@ -2546,9 +2993,9 @@ async function archiveCandidate(root, id) {
|
|
|
2546
2993
|
}
|
|
2547
2994
|
|
|
2548
2995
|
// src/linter/rules.ts
|
|
2549
|
-
import { readdir as
|
|
2996
|
+
import { readdir as readdir8, readFile as readFile17 } from "fs/promises";
|
|
2550
2997
|
import { existsSync as existsSync6 } from "fs";
|
|
2551
|
-
import
|
|
2998
|
+
import path24 from "path";
|
|
2552
2999
|
var MIN_BODY_LENGTH = 50;
|
|
2553
3000
|
var WIKILINK_PATTERN2 = /\[\[([^\]]+)\]\]/g;
|
|
2554
3001
|
var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
|
|
@@ -2565,26 +3012,26 @@ function findMatchesInContent(content, pattern) {
|
|
|
2565
3012
|
}
|
|
2566
3013
|
async function readMarkdownFiles(dirPath) {
|
|
2567
3014
|
if (!existsSync6(dirPath)) return [];
|
|
2568
|
-
const entries = await
|
|
3015
|
+
const entries = await readdir8(dirPath);
|
|
2569
3016
|
const mdFiles = entries.filter((f) => f.endsWith(".md"));
|
|
2570
3017
|
const results = await Promise.all(
|
|
2571
3018
|
mdFiles.map(async (fileName) => {
|
|
2572
|
-
const filePath =
|
|
2573
|
-
const content = await
|
|
3019
|
+
const filePath = path24.join(dirPath, fileName);
|
|
3020
|
+
const content = await readFile17(filePath, "utf-8");
|
|
2574
3021
|
return { filePath, content };
|
|
2575
3022
|
})
|
|
2576
3023
|
);
|
|
2577
3024
|
return results;
|
|
2578
3025
|
}
|
|
2579
3026
|
async function collectAllPages(root) {
|
|
2580
|
-
const conceptPages = await readMarkdownFiles(
|
|
2581
|
-
const queryPages = await readMarkdownFiles(
|
|
3027
|
+
const conceptPages = await readMarkdownFiles(path24.join(root, CONCEPTS_DIR));
|
|
3028
|
+
const queryPages = await readMarkdownFiles(path24.join(root, QUERIES_DIR));
|
|
2582
3029
|
return [...conceptPages, ...queryPages];
|
|
2583
3030
|
}
|
|
2584
3031
|
function buildPageSlugSet(pages) {
|
|
2585
3032
|
const slugs = /* @__PURE__ */ new Set();
|
|
2586
3033
|
for (const page of pages) {
|
|
2587
|
-
const baseName =
|
|
3034
|
+
const baseName = path24.basename(page.filePath, ".md");
|
|
2588
3035
|
slugs.add(baseName.toLowerCase());
|
|
2589
3036
|
}
|
|
2590
3037
|
return slugs;
|
|
@@ -2731,9 +3178,8 @@ async function checkInferredWithoutCitations(root) {
|
|
|
2731
3178
|
const pages = await collectAllPages(root);
|
|
2732
3179
|
const results = [];
|
|
2733
3180
|
for (const page of pages) {
|
|
2734
|
-
const {
|
|
2735
|
-
const
|
|
2736
|
-
const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
|
|
3181
|
+
const { body } = parseFrontmatter(page.content);
|
|
3182
|
+
const inferred = countUncitedProseParagraphs(body);
|
|
2737
3183
|
if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
|
|
2738
3184
|
results.push({
|
|
2739
3185
|
rule: "excess-inferred-paragraphs",
|
|
@@ -2744,7 +3190,7 @@ async function checkInferredWithoutCitations(root) {
|
|
|
2744
3190
|
}
|
|
2745
3191
|
return results;
|
|
2746
3192
|
}
|
|
2747
|
-
var PROSE_PARAGRAPH_LEAD =
|
|
3193
|
+
var PROSE_PARAGRAPH_LEAD = new RegExp("^\\p{L}", "u");
|
|
2748
3194
|
function countUncitedProseParagraphs(body) {
|
|
2749
3195
|
const paragraphs = body.split(/\n\s*\n/);
|
|
2750
3196
|
let count = 0;
|
|
@@ -2767,18 +3213,7 @@ async function checkSchemaCrossLinks(root, schema) {
|
|
|
2767
3213
|
const pages = await collectAllPages(root);
|
|
2768
3214
|
const results = [];
|
|
2769
3215
|
for (const page of pages) {
|
|
2770
|
-
|
|
2771
|
-
const kind = resolvePageKind(meta.kind, schema);
|
|
2772
|
-
const rule = schema.kinds[kind];
|
|
2773
|
-
if (rule.minWikilinks <= 0) continue;
|
|
2774
|
-
const linkCount = countWikilinks(body);
|
|
2775
|
-
if (linkCount >= rule.minWikilinks) continue;
|
|
2776
|
-
results.push({
|
|
2777
|
-
rule: "schema-cross-link-minimum",
|
|
2778
|
-
severity: "warning",
|
|
2779
|
-
file: page.filePath,
|
|
2780
|
-
message: `Page kind "${kind}" requires at least ${rule.minWikilinks} [[wikilinks]] but only ${linkCount} found.`
|
|
2781
|
-
});
|
|
3216
|
+
results.push(...checkPageCrossLinks(page.content, page.filePath, schema));
|
|
2782
3217
|
}
|
|
2783
3218
|
return results;
|
|
2784
3219
|
}
|
|
@@ -2819,13 +3254,24 @@ function countLines(content) {
|
|
|
2819
3254
|
}
|
|
2820
3255
|
async function checkBrokenCitations(root) {
|
|
2821
3256
|
const pages = await collectAllPages(root);
|
|
2822
|
-
const sourcesDir =
|
|
3257
|
+
const sourcesDir = path24.join(root, SOURCES_DIR);
|
|
2823
3258
|
const results = [];
|
|
2824
3259
|
const lineCountCache = /* @__PURE__ */ new Map();
|
|
2825
3260
|
for (const page of pages) {
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
3261
|
+
const pageFindings = await checkPageBrokenCitations(
|
|
3262
|
+
page.content,
|
|
3263
|
+
page.filePath,
|
|
3264
|
+
sourcesDir,
|
|
3265
|
+
lineCountCache
|
|
3266
|
+
);
|
|
3267
|
+
results.push(...pageFindings);
|
|
3268
|
+
}
|
|
3269
|
+
return results;
|
|
3270
|
+
}
|
|
3271
|
+
async function checkPageBrokenCitations(content, filePath, sourcesDir, lineCountCache = /* @__PURE__ */ new Map()) {
|
|
3272
|
+
const results = [];
|
|
3273
|
+
for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
|
|
3274
|
+
await collectBrokenForMarker(captured, line, filePath, sourcesDir, lineCountCache, results);
|
|
2829
3275
|
}
|
|
2830
3276
|
return results;
|
|
2831
3277
|
}
|
|
@@ -2834,7 +3280,7 @@ async function collectBrokenForMarker(captured, line, pageFile, sourcesDir, line
|
|
|
2834
3280
|
const trimmed = part.trim();
|
|
2835
3281
|
if (trimmed.length === 0) continue;
|
|
2836
3282
|
const filename = stripSpanSuffix(trimmed);
|
|
2837
|
-
const citedPath =
|
|
3283
|
+
const citedPath = path24.join(sourcesDir, filename);
|
|
2838
3284
|
if (!existsSync6(citedPath)) {
|
|
2839
3285
|
out.push({
|
|
2840
3286
|
rule: "broken-citation",
|
|
@@ -2870,25 +3316,30 @@ async function checkMalformedClaimCitations(root) {
|
|
|
2870
3316
|
const pages = await collectAllPages(root);
|
|
2871
3317
|
const results = [];
|
|
2872
3318
|
for (const page of pages) {
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
|
|
2878
|
-
|
|
2879
|
-
|
|
2880
|
-
|
|
2881
|
-
|
|
2882
|
-
|
|
2883
|
-
|
|
3319
|
+
results.push(...checkPageMalformedCitations(page.content, page.filePath));
|
|
3320
|
+
}
|
|
3321
|
+
return results;
|
|
3322
|
+
}
|
|
3323
|
+
function checkPageMalformedCitations(content, filePath) {
|
|
3324
|
+
const results = [];
|
|
3325
|
+
for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
|
|
3326
|
+
for (const part of captured.split(",")) {
|
|
3327
|
+
if (!isMalformedCitationEntry(part)) continue;
|
|
3328
|
+
results.push({
|
|
3329
|
+
rule: "malformed-claim-citation",
|
|
3330
|
+
severity: "error",
|
|
3331
|
+
file: filePath,
|
|
3332
|
+
message: `Malformed claim citation ^[${captured}] \u2014 expected file.md, file.md:N-N, or file.md#LN-LN`,
|
|
3333
|
+
line
|
|
3334
|
+
});
|
|
2884
3335
|
}
|
|
2885
3336
|
}
|
|
2886
3337
|
return results;
|
|
2887
3338
|
}
|
|
2888
3339
|
|
|
2889
3340
|
// src/compiler/page-renderer.ts
|
|
2890
|
-
import { readdir as
|
|
2891
|
-
import
|
|
3341
|
+
import { readdir as readdir9 } from "fs/promises";
|
|
3342
|
+
import path25 from "path";
|
|
2892
3343
|
|
|
2893
3344
|
// src/compiler/provenance.ts
|
|
2894
3345
|
function addProvenanceMeta(fields, concept) {
|
|
@@ -2901,9 +3352,6 @@ function addProvenanceMeta(fields, concept) {
|
|
|
2901
3352
|
if (concept.contradictedBy && concept.contradictedBy.length > 0) {
|
|
2902
3353
|
fields.contradictedBy = concept.contradictedBy;
|
|
2903
3354
|
}
|
|
2904
|
-
if (typeof concept.inferredParagraphs === "number") {
|
|
2905
|
-
fields.inferredParagraphs = concept.inferredParagraphs;
|
|
2906
|
-
}
|
|
2907
3355
|
}
|
|
2908
3356
|
function reportContradictionWarnings(conceptTitle, concept) {
|
|
2909
3357
|
const refs = concept.contradictedBy;
|
|
@@ -2918,7 +3366,7 @@ function reportContradictionWarnings(conceptTitle, concept) {
|
|
|
2918
3366
|
// src/compiler/page-renderer.ts
|
|
2919
3367
|
var RELATED_PAGE_CONTEXT_LIMIT = 5;
|
|
2920
3368
|
async function renderMergedPageContent(root, entry, schema) {
|
|
2921
|
-
const pagePath =
|
|
3369
|
+
const pagePath = path25.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
2922
3370
|
const existingPage = await safeReadFile(pagePath);
|
|
2923
3371
|
const relatedPages = await loadRelatedPages(root, entry.slug);
|
|
2924
3372
|
const system = buildPagePrompt(
|
|
@@ -2957,17 +3405,17 @@ function buildMergedFrontmatter(entry, existingPage, schema) {
|
|
|
2957
3405
|
return buildFrontmatter(frontmatterFields);
|
|
2958
3406
|
}
|
|
2959
3407
|
async function loadRelatedPages(root, excludeSlug) {
|
|
2960
|
-
const conceptsPath =
|
|
3408
|
+
const conceptsPath = path25.join(root, CONCEPTS_DIR);
|
|
2961
3409
|
let files;
|
|
2962
3410
|
try {
|
|
2963
|
-
files = await
|
|
3411
|
+
files = await readdir9(conceptsPath);
|
|
2964
3412
|
} catch {
|
|
2965
3413
|
return "";
|
|
2966
3414
|
}
|
|
2967
3415
|
const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
|
|
2968
3416
|
const contents = [];
|
|
2969
3417
|
for (const f of related) {
|
|
2970
|
-
const content = await safeReadFile(
|
|
3418
|
+
const content = await safeReadFile(path25.join(conceptsPath, f));
|
|
2971
3419
|
if (!content) continue;
|
|
2972
3420
|
const { meta } = parseFrontmatter(content);
|
|
2973
3421
|
if (meta.orphaned) continue;
|
|
@@ -3021,7 +3469,7 @@ async function generatePagesPhase(root, extractions, frozenSlugs, schema, option
|
|
|
3021
3469
|
return entry;
|
|
3022
3470
|
}))
|
|
3023
3471
|
);
|
|
3024
|
-
return { pages, errors, candidates };
|
|
3472
|
+
return { pages, errors, candidates, seedSlugs: [] };
|
|
3025
3473
|
}
|
|
3026
3474
|
async function persistExtractionStates(root, extractions) {
|
|
3027
3475
|
for (const result of extractions) {
|
|
@@ -3047,12 +3495,13 @@ function summarizeCompile(buckets, generation, extractions, options) {
|
|
|
3047
3495
|
errors.push(`No concepts extracted from ${result.sourceFile}`);
|
|
3048
3496
|
}
|
|
3049
3497
|
}
|
|
3498
|
+
const conceptSlugs = generation.pages.map((entry) => entry.slug);
|
|
3050
3499
|
const baseResult = {
|
|
3051
3500
|
compiled: buckets.toCompile.length,
|
|
3052
3501
|
skipped: buckets.unchanged.length,
|
|
3053
3502
|
deleted: buckets.deleted.length,
|
|
3054
3503
|
concepts: generation.pages.map((entry) => entry.concept.concept),
|
|
3055
|
-
pages: generation.
|
|
3504
|
+
pages: [...conceptSlugs, ...generation.seedSlugs],
|
|
3056
3505
|
errors
|
|
3057
3506
|
};
|
|
3058
3507
|
if (options.review) {
|
|
@@ -3070,12 +3519,21 @@ async function runCompilePipeline(root, options) {
|
|
|
3070
3519
|
if (buckets.toCompile.length === 0 && buckets.deleted.length === 0) {
|
|
3071
3520
|
status("\u2713", success("Nothing to compile \u2014 all sources up to date."));
|
|
3072
3521
|
if (!options.review) {
|
|
3073
|
-
const emptyGeneration = {
|
|
3522
|
+
const emptyGeneration = {
|
|
3523
|
+
pages: [],
|
|
3524
|
+
errors: [],
|
|
3525
|
+
candidates: [],
|
|
3526
|
+
seedSlugs: []
|
|
3527
|
+
};
|
|
3074
3528
|
await generateSeedPages(root, schema, emptyGeneration);
|
|
3075
|
-
await finalizeWiki(root, emptyGeneration.pages);
|
|
3529
|
+
await finalizeWiki(root, emptyGeneration.pages, emptyGeneration.seedSlugs);
|
|
3076
3530
|
return {
|
|
3077
3531
|
...emptyCompileResult(),
|
|
3078
3532
|
skipped: buckets.unchanged.length,
|
|
3533
|
+
// Surface seed-page slugs alongside any errors so downstream
|
|
3534
|
+
// consumers (MCP, embeddings, programmatic callers) can see what
|
|
3535
|
+
// landed even on the no-source-changes early-return path.
|
|
3536
|
+
pages: [...emptyGeneration.seedSlugs],
|
|
3079
3537
|
errors: emptyGeneration.errors
|
|
3080
3538
|
};
|
|
3081
3539
|
}
|
|
@@ -3099,7 +3557,7 @@ async function runCompilePipeline(root, options) {
|
|
|
3099
3557
|
}
|
|
3100
3558
|
await persistFrozenSlugs(root, frozenSlugs, extractions);
|
|
3101
3559
|
await generateSeedPages(root, schema, generation);
|
|
3102
|
-
await finalizeWiki(root, generation.pages);
|
|
3560
|
+
await finalizeWiki(root, generation.pages, generation.seedSlugs);
|
|
3103
3561
|
}
|
|
3104
3562
|
return summarizeCompile(buckets, generation, extractions, options);
|
|
3105
3563
|
}
|
|
@@ -3136,9 +3594,11 @@ async function runExtractionPhases(root, toCompile, state, allChanges) {
|
|
|
3136
3594
|
}
|
|
3137
3595
|
return extractions;
|
|
3138
3596
|
}
|
|
3139
|
-
async function finalizeWiki(root, pages) {
|
|
3140
|
-
const
|
|
3141
|
-
const
|
|
3597
|
+
async function finalizeWiki(root, pages, seedSlugs = []) {
|
|
3598
|
+
const conceptChangedSlugs = pages.map((entry) => entry.slug);
|
|
3599
|
+
const conceptNewSlugs = pages.filter((entry) => entry.concept.is_new).map((entry) => entry.slug);
|
|
3600
|
+
const allChangedSlugs = [...conceptChangedSlugs, ...seedSlugs];
|
|
3601
|
+
const allNewSlugs = [...conceptNewSlugs, ...seedSlugs];
|
|
3142
3602
|
if (allChangedSlugs.length > 0) {
|
|
3143
3603
|
status("\u{1F517}", info("Resolving interlinks..."));
|
|
3144
3604
|
await resolveLinks(root, allChangedSlugs, allNewSlugs);
|
|
@@ -3168,9 +3628,9 @@ function printChangesSummary(changes) {
|
|
|
3168
3628
|
}
|
|
3169
3629
|
async function extractForSource(root, sourceFile) {
|
|
3170
3630
|
status("*", info(`Extracting: ${sourceFile}`));
|
|
3171
|
-
const sourcePath =
|
|
3172
|
-
const sourceContent = await
|
|
3173
|
-
const existingIndex = await safeReadFile(
|
|
3631
|
+
const sourcePath = path26.join(root, SOURCES_DIR, sourceFile);
|
|
3632
|
+
const sourceContent = await readFile18(sourcePath, "utf-8");
|
|
3633
|
+
const existingIndex = await safeReadFile(path26.join(root, INDEX_FILE));
|
|
3174
3634
|
const concepts = await extractConcepts(sourceContent, existingIndex);
|
|
3175
3635
|
if (concepts.length > 0) {
|
|
3176
3636
|
const names = concepts.map((c) => c.concept).join(", ");
|
|
@@ -3193,13 +3653,11 @@ function reconcileConceptMetadata(existing, incoming) {
|
|
|
3193
3653
|
}
|
|
3194
3654
|
}
|
|
3195
3655
|
reconciled.contradictedBy = refs.length > 0 ? refs : void 0;
|
|
3196
|
-
if (typeof incoming.inferredParagraphs === "number") {
|
|
3197
|
-
reconciled.inferredParagraphs = typeof existing.inferredParagraphs === "number" ? Math.max(existing.inferredParagraphs, incoming.inferredParagraphs) : incoming.inferredParagraphs;
|
|
3198
|
-
}
|
|
3199
3656
|
return reconciled;
|
|
3200
3657
|
}
|
|
3201
3658
|
function mergeExtractions(extractions, frozenSlugs) {
|
|
3202
3659
|
const bySlug = /* @__PURE__ */ new Map();
|
|
3660
|
+
const slicesBySlug = /* @__PURE__ */ new Map();
|
|
3203
3661
|
for (const result of extractions) {
|
|
3204
3662
|
if (result.concepts.length === 0) continue;
|
|
3205
3663
|
for (const concept of result.concepts) {
|
|
@@ -3209,23 +3667,28 @@ function mergeExtractions(extractions, frozenSlugs) {
|
|
|
3209
3667
|
if (existing) {
|
|
3210
3668
|
existing.concept = reconcileConceptMetadata(existing.concept, concept);
|
|
3211
3669
|
existing.sourceFiles.push(result.sourceFile);
|
|
3212
|
-
existing.combinedContent += `
|
|
3213
|
-
|
|
3214
|
-
--- SOURCE: ${result.sourceFile} ---
|
|
3215
|
-
|
|
3216
|
-
${result.sourceContent}`;
|
|
3217
3670
|
} else {
|
|
3218
3671
|
bySlug.set(slug, {
|
|
3219
3672
|
slug,
|
|
3220
3673
|
concept,
|
|
3221
3674
|
sourceFiles: [result.sourceFile],
|
|
3222
|
-
combinedContent:
|
|
3223
|
-
|
|
3224
|
-
${result.sourceContent}`
|
|
3675
|
+
combinedContent: ""
|
|
3225
3676
|
});
|
|
3677
|
+
slicesBySlug.set(slug, []);
|
|
3226
3678
|
}
|
|
3679
|
+
slicesBySlug.get(slug).push({
|
|
3680
|
+
file: result.sourceFile,
|
|
3681
|
+
content: result.sourceContent
|
|
3682
|
+
});
|
|
3227
3683
|
}
|
|
3228
3684
|
}
|
|
3685
|
+
for (const merged of bySlug.values()) {
|
|
3686
|
+
const slices = slicesBySlug.get(merged.slug) ?? [];
|
|
3687
|
+
merged.combinedContent = buildBudgetedCombinedContent(
|
|
3688
|
+
merged.concept.concept,
|
|
3689
|
+
slices
|
|
3690
|
+
);
|
|
3691
|
+
}
|
|
3229
3692
|
return Array.from(bySlug.values());
|
|
3230
3693
|
}
|
|
3231
3694
|
async function generateMergedPage(root, entry, schema, options, sourceStates) {
|
|
@@ -3233,13 +3696,18 @@ async function generateMergedPage(root, entry, schema, options, sourceStates) {
|
|
|
3233
3696
|
if (options.review) {
|
|
3234
3697
|
return await persistReviewCandidate(root, entry, fullPage, sourceStates, schema);
|
|
3235
3698
|
}
|
|
3236
|
-
const pagePath =
|
|
3699
|
+
const pagePath = path26.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
3237
3700
|
const error2 = await writePageIfValid(pagePath, fullPage, entry.concept.concept);
|
|
3238
3701
|
return { error: error2 ?? void 0 };
|
|
3239
3702
|
}
|
|
3240
3703
|
async function persistReviewCandidate(root, entry, fullPage, sourceStates, schema) {
|
|
3241
3704
|
const virtualPath = `wiki/concepts/${entry.slug}.md`;
|
|
3242
|
-
const
|
|
3705
|
+
const schemaViolations = checkPageCrossLinks(fullPage, virtualPath, schema);
|
|
3706
|
+
const provenanceViolations = await collectCandidateProvenanceViolations(
|
|
3707
|
+
root,
|
|
3708
|
+
fullPage,
|
|
3709
|
+
virtualPath
|
|
3710
|
+
);
|
|
3243
3711
|
const candidate = await writeCandidate(root, {
|
|
3244
3712
|
title: entry.concept.concept,
|
|
3245
3713
|
slug: entry.slug,
|
|
@@ -3247,21 +3715,35 @@ async function persistReviewCandidate(root, entry, fullPage, sourceStates, schem
|
|
|
3247
3715
|
sources: entry.sourceFiles,
|
|
3248
3716
|
body: fullPage,
|
|
3249
3717
|
sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles),
|
|
3250
|
-
schemaViolations:
|
|
3718
|
+
schemaViolations: schemaViolations.length > 0 ? schemaViolations : void 0,
|
|
3719
|
+
provenanceViolations: provenanceViolations.length > 0 ? provenanceViolations : void 0
|
|
3251
3720
|
});
|
|
3252
3721
|
status("?", info(`Candidate ready: ${candidate.id} (${entry.slug})`));
|
|
3253
3722
|
return { candidateId: candidate.id };
|
|
3254
3723
|
}
|
|
3724
|
+
async function collectCandidateProvenanceViolations(root, fullPage, virtualPath) {
|
|
3725
|
+
const malformed = checkPageMalformedCitations(fullPage, virtualPath);
|
|
3726
|
+
const broken = await checkPageBrokenCitations(
|
|
3727
|
+
fullPage,
|
|
3728
|
+
virtualPath,
|
|
3729
|
+
path26.join(root, SOURCES_DIR)
|
|
3730
|
+
);
|
|
3731
|
+
return [...malformed, ...broken];
|
|
3732
|
+
}
|
|
3255
3733
|
async function generateSeedPages(root, schema, generation) {
|
|
3256
3734
|
if (schema.seedPages.length === 0) return;
|
|
3257
3735
|
for (const seed of schema.seedPages) {
|
|
3258
|
-
const
|
|
3259
|
-
if (
|
|
3736
|
+
const result = await generateSingleSeedPage(root, schema, seed);
|
|
3737
|
+
if (result.error) {
|
|
3738
|
+
generation.errors.push(result.error);
|
|
3739
|
+
continue;
|
|
3740
|
+
}
|
|
3741
|
+
generation.seedSlugs.push(result.slug);
|
|
3260
3742
|
}
|
|
3261
3743
|
}
|
|
3262
3744
|
async function generateSingleSeedPage(root, schema, seed) {
|
|
3263
3745
|
const slug = slugify(seed.title);
|
|
3264
|
-
const pagePath =
|
|
3746
|
+
const pagePath = path26.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
3265
3747
|
const relatedContent = await loadSeedRelatedPages(root, seed.relatedSlugs ?? []);
|
|
3266
3748
|
const rule = schema.kinds[seed.kind];
|
|
3267
3749
|
const system = buildSeedPagePrompt(seed, rule, relatedContent);
|
|
@@ -3284,16 +3766,17 @@ async function generateSingleSeedPage(root, schema, seed) {
|
|
|
3284
3766
|
const frontmatterFields = { ...typedFields };
|
|
3285
3767
|
addObsidianMeta(frontmatterFields, seed.title, []);
|
|
3286
3768
|
const frontmatter = buildFrontmatter(frontmatterFields);
|
|
3287
|
-
|
|
3769
|
+
const error2 = await writePageIfValid(pagePath, `${frontmatter}
|
|
3288
3770
|
|
|
3289
3771
|
${pageBody}
|
|
3290
3772
|
`, seed.title);
|
|
3773
|
+
return error2 ? { slug, error: error2 } : { slug };
|
|
3291
3774
|
}
|
|
3292
3775
|
async function loadSeedRelatedPages(root, slugs) {
|
|
3293
3776
|
if (slugs.length === 0) return "";
|
|
3294
3777
|
const contents = [];
|
|
3295
3778
|
for (const slug of slugs) {
|
|
3296
|
-
const pagePath =
|
|
3779
|
+
const pagePath = path26.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
3297
3780
|
const content = await safeReadFile(pagePath);
|
|
3298
3781
|
if (content) contents.push(content);
|
|
3299
3782
|
}
|
|
@@ -3348,7 +3831,7 @@ async function compileCommand(options = {}) {
|
|
|
3348
3831
|
|
|
3349
3832
|
// src/commands/query.ts
|
|
3350
3833
|
import { existsSync as existsSync8 } from "fs";
|
|
3351
|
-
import
|
|
3834
|
+
import path27 from "path";
|
|
3352
3835
|
var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
|
|
3353
3836
|
var PAGE_SELECTION_TOOL = {
|
|
3354
3837
|
name: "select_pages",
|
|
@@ -3405,7 +3888,7 @@ async function selectRelevantPages(root, question, debug) {
|
|
|
3405
3888
|
const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
|
|
3406
3889
|
return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2, chunks: [] };
|
|
3407
3890
|
}
|
|
3408
|
-
const indexContent = await safeReadFile(
|
|
3891
|
+
const indexContent = await safeReadFile(path27.join(root, INDEX_FILE));
|
|
3409
3892
|
const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
|
|
3410
3893
|
return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning, chunks: [] };
|
|
3411
3894
|
}
|
|
@@ -3497,7 +3980,7 @@ async function loadSelectedPages(root, slugs) {
|
|
|
3497
3980
|
for (const slug of slugs) {
|
|
3498
3981
|
let content = "";
|
|
3499
3982
|
for (const dir of PAGE_DIRS) {
|
|
3500
|
-
const candidate = await safeReadFile(
|
|
3983
|
+
const candidate = await safeReadFile(path27.join(root, dir, `${slug}.md`));
|
|
3501
3984
|
if (!candidate) continue;
|
|
3502
3985
|
const { meta } = parseFrontmatter(candidate);
|
|
3503
3986
|
if (meta.orphaned) continue;
|
|
@@ -3513,7 +3996,11 @@ ${content}`);
|
|
|
3513
3996
|
}
|
|
3514
3997
|
return sections.join("\n\n");
|
|
3515
3998
|
}
|
|
3516
|
-
var
|
|
3999
|
+
var ANSWER_SYSTEM_PROMPT_BASE = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
|
|
4000
|
+
function buildAnswerSystemPrompt() {
|
|
4001
|
+
const lang = languageDirective();
|
|
4002
|
+
return lang ? `${ANSWER_SYSTEM_PROMPT_BASE} ${lang}` : ANSWER_SYSTEM_PROMPT_BASE;
|
|
4003
|
+
}
|
|
3517
4004
|
async function callAnswerLLM(question, pagesContent, chunks, onToken) {
|
|
3518
4005
|
const provenance = chunks.length > 0 ? buildChunkProvenance(chunks) : "";
|
|
3519
4006
|
const userMessage = `Question: ${question}
|
|
@@ -3521,7 +4008,7 @@ async function callAnswerLLM(question, pagesContent, chunks, onToken) {
|
|
|
3521
4008
|
Relevant wiki pages:
|
|
3522
4009
|
${pagesContent}${provenance}`;
|
|
3523
4010
|
return callClaude({
|
|
3524
|
-
system:
|
|
4011
|
+
system: buildAnswerSystemPrompt(),
|
|
3525
4012
|
messages: [{ role: "user", content: userMessage }],
|
|
3526
4013
|
stream: Boolean(onToken),
|
|
3527
4014
|
onToken
|
|
@@ -3544,7 +4031,7 @@ function summarizeAnswer(answer) {
|
|
|
3544
4031
|
}
|
|
3545
4032
|
async function saveQueryPage(root, question, answer) {
|
|
3546
4033
|
const slug = slugify(question);
|
|
3547
|
-
const filePath =
|
|
4034
|
+
const filePath = path27.join(root, QUERIES_DIR, `${slug}.md`);
|
|
3548
4035
|
const frontmatter = buildFrontmatter({
|
|
3549
4036
|
title: question,
|
|
3550
4037
|
summary: summarizeAnswer(answer),
|
|
@@ -3570,7 +4057,7 @@ ${answer}
|
|
|
3570
4057
|
return slug;
|
|
3571
4058
|
}
|
|
3572
4059
|
async function generateAnswer(root, question, options = {}) {
|
|
3573
|
-
if (!existsSync8(
|
|
4060
|
+
if (!existsSync8(path27.join(root, INDEX_FILE))) {
|
|
3574
4061
|
throw new Error("Wiki index not found. Run `llmwiki compile` first.");
|
|
3575
4062
|
}
|
|
3576
4063
|
const selection = await selectRelevantPages(root, question, Boolean(options.debug));
|
|
@@ -3598,7 +4085,7 @@ function buildEmptyResult(selection) {
|
|
|
3598
4085
|
};
|
|
3599
4086
|
}
|
|
3600
4087
|
async function queryCommand(root, question, options) {
|
|
3601
|
-
if (!existsSync8(
|
|
4088
|
+
if (!existsSync8(path27.join(root, INDEX_FILE))) {
|
|
3602
4089
|
status("!", error("Wiki index not found. Run `llmwiki compile` first."));
|
|
3603
4090
|
return;
|
|
3604
4091
|
}
|
|
@@ -3649,10 +4136,10 @@ var DEBUG_CHUNK_PREVIEW_CHARS = 120;
|
|
|
3649
4136
|
// src/commands/watch.ts
|
|
3650
4137
|
import { watch as chokidarWatch } from "chokidar";
|
|
3651
4138
|
import { existsSync as existsSync9 } from "fs";
|
|
3652
|
-
import
|
|
4139
|
+
import path28 from "path";
|
|
3653
4140
|
var DEBOUNCE_MS = 500;
|
|
3654
4141
|
async function watchCommand() {
|
|
3655
|
-
const sourcesPath =
|
|
4142
|
+
const sourcesPath = path28.resolve(SOURCES_DIR);
|
|
3656
4143
|
if (!existsSync9(sourcesPath)) {
|
|
3657
4144
|
status(
|
|
3658
4145
|
"!",
|
|
@@ -3687,7 +4174,7 @@ async function watchCommand() {
|
|
|
3687
4174
|
const scheduleCompile = (eventPath, event) => {
|
|
3688
4175
|
status(
|
|
3689
4176
|
"~",
|
|
3690
|
-
dim(`${event}: ${
|
|
4177
|
+
dim(`${event}: ${path28.basename(eventPath)}`)
|
|
3691
4178
|
);
|
|
3692
4179
|
if (debounceTimer) clearTimeout(debounceTimer);
|
|
3693
4180
|
debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
|
|
@@ -3771,10 +4258,388 @@ async function lintCommand() {
|
|
|
3771
4258
|
}
|
|
3772
4259
|
}
|
|
3773
4260
|
|
|
4261
|
+
// src/commands/export.ts
|
|
4262
|
+
import path30 from "path";
|
|
4263
|
+
import { createRequire } from "module";
|
|
4264
|
+
|
|
4265
|
+
// src/export/collect.ts
|
|
4266
|
+
import { readdir as readdir10, readFile as readFile19 } from "fs/promises";
|
|
4267
|
+
import path29 from "path";
|
|
4268
|
+
var WIKILINK_RE = /\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g;
|
|
4269
|
+
function extractWikilinkSlugs(body) {
|
|
4270
|
+
const slugs = /* @__PURE__ */ new Set();
|
|
4271
|
+
let match;
|
|
4272
|
+
while ((match = WIKILINK_RE.exec(body)) !== null) {
|
|
4273
|
+
slugs.add(slugify(match[1].trim()));
|
|
4274
|
+
}
|
|
4275
|
+
return [...slugs];
|
|
4276
|
+
}
|
|
4277
|
+
async function parsePageFile(filePath, slug, pageDirectory) {
|
|
4278
|
+
let raw;
|
|
4279
|
+
try {
|
|
4280
|
+
raw = await readFile19(filePath, "utf-8");
|
|
4281
|
+
} catch {
|
|
4282
|
+
return null;
|
|
4283
|
+
}
|
|
4284
|
+
const { meta, body } = parseFrontmatter(raw);
|
|
4285
|
+
if (!meta.title || typeof meta.title !== "string") return null;
|
|
4286
|
+
if (meta.orphaned === true) return null;
|
|
4287
|
+
return {
|
|
4288
|
+
title: meta.title,
|
|
4289
|
+
slug,
|
|
4290
|
+
pageDirectory,
|
|
4291
|
+
summary: typeof meta.summary === "string" ? meta.summary : "",
|
|
4292
|
+
sources: Array.isArray(meta.sources) ? meta.sources.filter((s) => typeof s === "string") : [],
|
|
4293
|
+
tags: Array.isArray(meta.tags) ? meta.tags.filter((t) => typeof t === "string") : [],
|
|
4294
|
+
createdAt: typeof meta.createdAt === "string" ? meta.createdAt : (/* @__PURE__ */ new Date()).toISOString(),
|
|
4295
|
+
updatedAt: typeof meta.updatedAt === "string" ? meta.updatedAt : (/* @__PURE__ */ new Date()).toISOString(),
|
|
4296
|
+
links: extractWikilinkSlugs(body),
|
|
4297
|
+
body
|
|
4298
|
+
};
|
|
4299
|
+
}
|
|
4300
|
+
async function collectFromDir(dirPath, pageDirectory) {
|
|
4301
|
+
let files;
|
|
4302
|
+
try {
|
|
4303
|
+
files = await readdir10(dirPath);
|
|
4304
|
+
} catch {
|
|
4305
|
+
return [];
|
|
4306
|
+
}
|
|
4307
|
+
const pages = [];
|
|
4308
|
+
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
4309
|
+
const slug = file.replace(/\.md$/, "");
|
|
4310
|
+
const page = await parsePageFile(path29.join(dirPath, file), slug, pageDirectory);
|
|
4311
|
+
if (page) pages.push(page);
|
|
4312
|
+
}
|
|
4313
|
+
return pages;
|
|
4314
|
+
}
|
|
4315
|
+
async function collectExportPages(root) {
|
|
4316
|
+
const conceptsPath = path29.join(root, CONCEPTS_DIR);
|
|
4317
|
+
const queriesPath = path29.join(root, QUERIES_DIR);
|
|
4318
|
+
const [concepts, queries] = await Promise.all([
|
|
4319
|
+
collectFromDir(conceptsPath, "concepts"),
|
|
4320
|
+
collectFromDir(queriesPath, "queries")
|
|
4321
|
+
]);
|
|
4322
|
+
const all = [...concepts, ...queries];
|
|
4323
|
+
all.sort((a, b) => a.title.localeCompare(b.title));
|
|
4324
|
+
return all;
|
|
4325
|
+
}
|
|
4326
|
+
|
|
4327
|
+
// src/export/llms-txt.ts
|
|
4328
|
+
function pageRelativePath(page) {
|
|
4329
|
+
return `wiki/${page.pageDirectory}/${page.slug}.md`;
|
|
4330
|
+
}
|
|
4331
|
+
function buildEntryNote(page) {
|
|
4332
|
+
const parts = [];
|
|
4333
|
+
if (page.summary) parts.push(page.summary);
|
|
4334
|
+
if (page.tags.length > 0) parts.push(`tags: ${page.tags.join(", ")}`);
|
|
4335
|
+
if (page.sources.length > 0) parts.push(`sources: ${page.sources.join(", ")}`);
|
|
4336
|
+
parts.push(`created: ${page.createdAt}`);
|
|
4337
|
+
parts.push(`updated: ${page.updatedAt}`);
|
|
4338
|
+
return parts.join(" | ");
|
|
4339
|
+
}
|
|
4340
|
+
function formatPageEntry(page) {
|
|
4341
|
+
const note = buildEntryNote(page);
|
|
4342
|
+
return `- [${page.title}](${pageRelativePath(page)}): ${note}`;
|
|
4343
|
+
}
|
|
4344
|
+
function buildSection(heading, pages) {
|
|
4345
|
+
if (pages.length === 0) return [];
|
|
4346
|
+
return [`## ${heading}`, "", ...pages.map(formatPageEntry), ""];
|
|
4347
|
+
}
|
|
4348
|
+
function buildLlmsTxt(pages, projectTitle) {
|
|
4349
|
+
const concepts = pages.filter((p) => p.pageDirectory === "concepts");
|
|
4350
|
+
const queries = pages.filter((p) => p.pageDirectory === "queries");
|
|
4351
|
+
const lines = [
|
|
4352
|
+
`# ${projectTitle}`,
|
|
4353
|
+
"",
|
|
4354
|
+
`> ${pages.length} pages \u2014 exported ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
4355
|
+
"",
|
|
4356
|
+
...buildSection("Concepts", concepts),
|
|
4357
|
+
...buildSection("Saved Queries", queries)
|
|
4358
|
+
];
|
|
4359
|
+
return lines.join("\n");
|
|
4360
|
+
}
|
|
4361
|
+
function buildLlmsFullTxt(pages, projectTitle) {
|
|
4362
|
+
const sections = [buildLlmsTxt(pages, projectTitle)];
|
|
4363
|
+
for (const page of pages) {
|
|
4364
|
+
const tags = page.tags.length > 0 ? `
|
|
4365
|
+
Tags: ${page.tags.join(", ")}` : "";
|
|
4366
|
+
const sources = page.sources.length > 0 ? `
|
|
4367
|
+
Sources: ${page.sources.join(", ")}` : "";
|
|
4368
|
+
const header2 = [
|
|
4369
|
+
"---",
|
|
4370
|
+
`## ${page.title}`,
|
|
4371
|
+
`> ${page.summary}${tags}${sources}`,
|
|
4372
|
+
`Created: ${page.createdAt} | Updated: ${page.updatedAt}`,
|
|
4373
|
+
""
|
|
4374
|
+
].join("\n");
|
|
4375
|
+
sections.push(`${header2}
|
|
4376
|
+
${page.body.trim()}
|
|
4377
|
+
`);
|
|
4378
|
+
}
|
|
4379
|
+
return sections.join("\n");
|
|
4380
|
+
}
|
|
4381
|
+
|
|
4382
|
+
// src/export/json-export.ts
|
|
4383
|
+
function buildJsonExport(pages) {
|
|
4384
|
+
const doc = {
|
|
4385
|
+
exportedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
4386
|
+
pageCount: pages.length,
|
|
4387
|
+
pages
|
|
4388
|
+
};
|
|
4389
|
+
return JSON.stringify(doc, null, 2);
|
|
4390
|
+
}
|
|
4391
|
+
|
|
4392
|
+
// src/export/json-ld.ts
|
|
4393
|
+
var LOCAL_BASE = "urn:llmwiki:";
|
|
4394
|
+
function pageIri(slug) {
|
|
4395
|
+
return `${LOCAL_BASE}${slug}`;
|
|
4396
|
+
}
|
|
4397
|
+
function pageToJsonLd(page) {
|
|
4398
|
+
const node = {
|
|
4399
|
+
"@id": pageIri(page.slug),
|
|
4400
|
+
"@type": "Article",
|
|
4401
|
+
name: page.title,
|
|
4402
|
+
description: page.summary,
|
|
4403
|
+
dateCreated: page.createdAt,
|
|
4404
|
+
dateModified: page.updatedAt
|
|
4405
|
+
};
|
|
4406
|
+
if (page.tags.length > 0) {
|
|
4407
|
+
node["keywords"] = page.tags;
|
|
4408
|
+
}
|
|
4409
|
+
if (page.sources.length > 0) {
|
|
4410
|
+
node["isBasedOn"] = page.sources;
|
|
4411
|
+
}
|
|
4412
|
+
if (page.links.length > 0) {
|
|
4413
|
+
node["mentions"] = page.links.map((slug) => ({ "@id": pageIri(slug) }));
|
|
4414
|
+
}
|
|
4415
|
+
return node;
|
|
4416
|
+
}
|
|
4417
|
+
function buildJsonLd(pages) {
|
|
4418
|
+
const doc = {
|
|
4419
|
+
"@context": "https://schema.org",
|
|
4420
|
+
"@graph": pages.map(pageToJsonLd)
|
|
4421
|
+
};
|
|
4422
|
+
return JSON.stringify(doc, null, 2);
|
|
4423
|
+
}
|
|
4424
|
+
|
|
4425
|
+
// src/export/graphml.ts
|
|
4426
|
+
var XML_ESCAPES = {
|
|
4427
|
+
"&": "&",
|
|
4428
|
+
"<": "<",
|
|
4429
|
+
">": ">",
|
|
4430
|
+
'"': """,
|
|
4431
|
+
"'": "'"
|
|
4432
|
+
};
|
|
4433
|
+
function escapeXml(value) {
|
|
4434
|
+
return value.replace(/[&<>"']/g, (ch) => XML_ESCAPES[ch] ?? ch);
|
|
4435
|
+
}
|
|
4436
|
+
var KEY_DEFS = [
|
|
4437
|
+
'<key id="title" for="node" attr.name="title" attr.type="string"/>',
|
|
4438
|
+
'<key id="summary" for="node" attr.name="summary" attr.type="string"/>',
|
|
4439
|
+
'<key id="tags" for="node" attr.name="tags" attr.type="string"/>',
|
|
4440
|
+
'<key id="sources" for="node" attr.name="sources" attr.type="string"/>',
|
|
4441
|
+
'<key id="createdAt" for="node" attr.name="createdAt" attr.type="string"/>',
|
|
4442
|
+
'<key id="updatedAt" for="node" attr.name="updatedAt" attr.type="string"/>'
|
|
4443
|
+
].join("\n ");
|
|
4444
|
+
function pageToNode(page) {
|
|
4445
|
+
const tags = page.tags.join(", ");
|
|
4446
|
+
const sources = page.sources.join(", ");
|
|
4447
|
+
return [
|
|
4448
|
+
` <node id="${escapeXml(page.slug)}">`,
|
|
4449
|
+
` <data key="title">${escapeXml(page.title)}</data>`,
|
|
4450
|
+
` <data key="summary">${escapeXml(page.summary)}</data>`,
|
|
4451
|
+
` <data key="tags">${escapeXml(tags)}</data>`,
|
|
4452
|
+
` <data key="sources">${escapeXml(sources)}</data>`,
|
|
4453
|
+
` <data key="createdAt">${escapeXml(page.createdAt)}</data>`,
|
|
4454
|
+
` <data key="updatedAt">${escapeXml(page.updatedAt)}</data>`,
|
|
4455
|
+
` </node>`
|
|
4456
|
+
].join("\n");
|
|
4457
|
+
}
|
|
4458
|
+
function pageToEdges(page, knownSlugs) {
|
|
4459
|
+
return page.links.filter((slug) => knownSlugs.has(slug)).map(
|
|
4460
|
+
(slug) => ` <edge source="${escapeXml(page.slug)}" target="${escapeXml(slug)}"/>`
|
|
4461
|
+
);
|
|
4462
|
+
}
|
|
4463
|
+
function buildGraphml(pages) {
|
|
4464
|
+
const knownSlugs = new Set(pages.map((p) => p.slug));
|
|
4465
|
+
const nodes = pages.map(pageToNode).join("\n");
|
|
4466
|
+
const edges = pages.flatMap((p) => pageToEdges(p, knownSlugs)).join("\n");
|
|
4467
|
+
return [
|
|
4468
|
+
'<?xml version="1.0" encoding="UTF-8"?>',
|
|
4469
|
+
'<graphml xmlns="http://graphml.graphdrawing.org/graphml"',
|
|
4470
|
+
' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"',
|
|
4471
|
+
' xsi:schemaLocation="http://graphml.graphdrawing.org/graphml',
|
|
4472
|
+
' http://graphml.graphdrawing.org/graphml/1.0/graphml.xsd">',
|
|
4473
|
+
` ${KEY_DEFS}`,
|
|
4474
|
+
' <graph id="wiki" edgedefault="directed">',
|
|
4475
|
+
nodes,
|
|
4476
|
+
edges,
|
|
4477
|
+
" </graph>",
|
|
4478
|
+
"</graphml>",
|
|
4479
|
+
""
|
|
4480
|
+
].join("\n");
|
|
4481
|
+
}
|
|
4482
|
+
|
|
4483
|
+
// src/export/marp.ts
|
|
4484
|
+
var SLIDE_BODY_MAX_CHARS = 300;
|
|
4485
|
+
function extractFirstParagraph(body) {
|
|
4486
|
+
const trimmed = body.trim();
|
|
4487
|
+
const firstBlock = trimmed.split(/\n\s*\n/)[0] ?? "";
|
|
4488
|
+
const stripped = firstBlock.replace(/^#{1,6}\s+/gm, "").replace(/^[-*+]\s+/gm, "").trim();
|
|
4489
|
+
if (stripped.length <= SLIDE_BODY_MAX_CHARS) return stripped;
|
|
4490
|
+
return `${stripped.slice(0, SLIDE_BODY_MAX_CHARS)}\u2026`;
|
|
4491
|
+
}
|
|
4492
|
+
function buildSpeakerNotes(page) {
|
|
4493
|
+
const parts = [`created: ${page.createdAt}`, `updated: ${page.updatedAt}`];
|
|
4494
|
+
if (page.sources.length > 0) parts.push(`sources: ${page.sources.join(", ")}`);
|
|
4495
|
+
return `<!-- ${parts.join(" | ")} -->`;
|
|
4496
|
+
}
|
|
4497
|
+
function pageToSlide(page) {
|
|
4498
|
+
const tagLine = page.tags.length > 0 ? `
|
|
4499
|
+
_Tags: ${page.tags.join(", ")}_` : "";
|
|
4500
|
+
const excerpt = extractFirstParagraph(page.body);
|
|
4501
|
+
const notes = buildSpeakerNotes(page);
|
|
4502
|
+
return [
|
|
4503
|
+
`## ${page.title}`,
|
|
4504
|
+
"",
|
|
4505
|
+
`> ${page.summary}${tagLine}`,
|
|
4506
|
+
"",
|
|
4507
|
+
excerpt,
|
|
4508
|
+
"",
|
|
4509
|
+
notes
|
|
4510
|
+
].join("\n");
|
|
4511
|
+
}
|
|
4512
|
+
function filterBySource(pages, source2) {
|
|
4513
|
+
if (source2 === "all") return pages;
|
|
4514
|
+
return pages.filter((p) => p.pageDirectory === source2);
|
|
4515
|
+
}
|
|
4516
|
+
function buildMarp(pages, projectTitle, source2 = "all") {
|
|
4517
|
+
const filtered = filterBySource(pages, source2);
|
|
4518
|
+
const frontmatter = [
|
|
4519
|
+
"---",
|
|
4520
|
+
"marp: true",
|
|
4521
|
+
"theme: default",
|
|
4522
|
+
"paginate: true",
|
|
4523
|
+
`title: "${projectTitle}"`,
|
|
4524
|
+
"---"
|
|
4525
|
+
].join("\n");
|
|
4526
|
+
const titleSlide = [
|
|
4527
|
+
"",
|
|
4528
|
+
`# ${projectTitle}`,
|
|
4529
|
+
"",
|
|
4530
|
+
`${filtered.length} pages | ${(/* @__PURE__ */ new Date()).toISOString()}`
|
|
4531
|
+
].join("\n");
|
|
4532
|
+
const slides = filtered.map((p) => `---
|
|
4533
|
+
|
|
4534
|
+
${pageToSlide(p)}`);
|
|
4535
|
+
return [frontmatter, titleSlide, ...slides, ""].join("\n\n");
|
|
4536
|
+
}
|
|
4537
|
+
|
|
4538
|
+
// src/export/types.ts
|
|
4539
|
+
var MARP_SOURCES = ["concepts", "queries", "all"];
|
|
4540
|
+
var EXPORT_TARGETS = [
|
|
4541
|
+
"llms-txt",
|
|
4542
|
+
"llms-full-txt",
|
|
4543
|
+
"json",
|
|
4544
|
+
"json-ld",
|
|
4545
|
+
"graphml",
|
|
4546
|
+
"marp"
|
|
4547
|
+
];
|
|
4548
|
+
|
|
4549
|
+
// src/commands/export.ts
|
|
4550
|
+
var require2 = createRequire(import.meta.url);
|
|
4551
|
+
var EXPORT_DIR = "dist/exports";
|
|
4552
|
+
var TARGET_FILENAMES = {
|
|
4553
|
+
"llms-txt": "llms.txt",
|
|
4554
|
+
"llms-full-txt": "llms-full.txt",
|
|
4555
|
+
json: "wiki.json",
|
|
4556
|
+
"json-ld": "wiki.jsonld",
|
|
4557
|
+
graphml: "wiki.graphml",
|
|
4558
|
+
marp: "wiki.md"
|
|
4559
|
+
};
|
|
4560
|
+
function resolveProjectTitle(root) {
|
|
4561
|
+
try {
|
|
4562
|
+
const pkg = require2(path30.join(root, "package.json"));
|
|
4563
|
+
return typeof pkg.name === "string" ? pkg.name : "Knowledge Wiki";
|
|
4564
|
+
} catch {
|
|
4565
|
+
return "Knowledge Wiki";
|
|
4566
|
+
}
|
|
4567
|
+
}
|
|
4568
|
+
function isValidTarget(value) {
|
|
4569
|
+
return EXPORT_TARGETS.includes(value);
|
|
4570
|
+
}
|
|
4571
|
+
function isValidMarpSource(value) {
|
|
4572
|
+
return MARP_SOURCES.includes(value);
|
|
4573
|
+
}
|
|
4574
|
+
function resolveMarpSource(rawSource) {
|
|
4575
|
+
if (!rawSource) return "all";
|
|
4576
|
+
if (!isValidMarpSource(rawSource)) {
|
|
4577
|
+
throw new Error(
|
|
4578
|
+
`Unknown --source value "${rawSource}". Valid values: ${MARP_SOURCES.join(", ")}`
|
|
4579
|
+
);
|
|
4580
|
+
}
|
|
4581
|
+
return rawSource;
|
|
4582
|
+
}
|
|
4583
|
+
function buildContent(target, pages, projectTitle, marpSource) {
|
|
4584
|
+
switch (target) {
|
|
4585
|
+
case "llms-txt":
|
|
4586
|
+
return buildLlmsTxt(pages, projectTitle);
|
|
4587
|
+
case "llms-full-txt":
|
|
4588
|
+
return buildLlmsFullTxt(pages, projectTitle);
|
|
4589
|
+
case "json":
|
|
4590
|
+
return buildJsonExport(pages);
|
|
4591
|
+
case "json-ld":
|
|
4592
|
+
return buildJsonLd(pages);
|
|
4593
|
+
case "graphml":
|
|
4594
|
+
return buildGraphml(pages);
|
|
4595
|
+
case "marp":
|
|
4596
|
+
return buildMarp(pages, projectTitle, marpSource);
|
|
4597
|
+
}
|
|
4598
|
+
}
|
|
4599
|
+
function computeReportedPageCount(pages, targets, marpSource) {
|
|
4600
|
+
const onlyMarpTarget = targets.length === 1 && targets[0] === "marp";
|
|
4601
|
+
if (onlyMarpTarget && marpSource !== "all") {
|
|
4602
|
+
return pages.filter((p) => p.pageDirectory === marpSource).length;
|
|
4603
|
+
}
|
|
4604
|
+
return pages.length;
|
|
4605
|
+
}
|
|
4606
|
+
async function runExport(root, options = {}) {
|
|
4607
|
+
const pages = await collectExportPages(root);
|
|
4608
|
+
const projectTitle = resolveProjectTitle(root);
|
|
4609
|
+
const targets = resolveTargets(options.target);
|
|
4610
|
+
const marpSource = resolveMarpSource(options.source);
|
|
4611
|
+
const written = [];
|
|
4612
|
+
for (const target of targets) {
|
|
4613
|
+
const content = buildContent(target, pages, projectTitle, marpSource);
|
|
4614
|
+
const outPath = path30.join(root, EXPORT_DIR, TARGET_FILENAMES[target]);
|
|
4615
|
+
await atomicWrite(outPath, content);
|
|
4616
|
+
written.push(outPath);
|
|
4617
|
+
status("+", success(`Exported ${target} \u2192 ${source(outPath)}`));
|
|
4618
|
+
}
|
|
4619
|
+
return { written, pageCount: computeReportedPageCount(pages, targets, marpSource) };
|
|
4620
|
+
}
|
|
4621
|
+
function resolveTargets(rawTarget) {
|
|
4622
|
+
if (!rawTarget) return [...EXPORT_TARGETS];
|
|
4623
|
+
if (!isValidTarget(rawTarget)) {
|
|
4624
|
+
throw new Error(
|
|
4625
|
+
`Unknown export target "${rawTarget}". Valid targets: ${EXPORT_TARGETS.join(", ")}`
|
|
4626
|
+
);
|
|
4627
|
+
}
|
|
4628
|
+
return [rawTarget];
|
|
4629
|
+
}
|
|
4630
|
+
async function exportCommand(root, options) {
|
|
4631
|
+
header("Exporting wiki");
|
|
4632
|
+
const { written, pageCount } = await runExport(root, options);
|
|
4633
|
+
status(
|
|
4634
|
+
"\u2713",
|
|
4635
|
+
success(`Done \u2014 ${pageCount} pages exported to ${written.length} file(s).`)
|
|
4636
|
+
);
|
|
4637
|
+
}
|
|
4638
|
+
|
|
3774
4639
|
// src/commands/schema.ts
|
|
3775
4640
|
import { existsSync as existsSync10 } from "fs";
|
|
3776
4641
|
import { mkdir as mkdir6, writeFile as writeFile5 } from "fs/promises";
|
|
3777
|
-
import
|
|
4642
|
+
import path31 from "path";
|
|
3778
4643
|
async function schemaInitCommand() {
|
|
3779
4644
|
const root = process.cwd();
|
|
3780
4645
|
const defaults = buildDefaultSchema();
|
|
@@ -3783,7 +4648,7 @@ async function schemaInitCommand() {
|
|
|
3783
4648
|
status("!", warn(`Schema file already exists at ${targetPath}`));
|
|
3784
4649
|
return;
|
|
3785
4650
|
}
|
|
3786
|
-
await mkdir6(
|
|
4651
|
+
await mkdir6(path31.dirname(targetPath), { recursive: true });
|
|
3787
4652
|
const serializable = {
|
|
3788
4653
|
version: defaults.version,
|
|
3789
4654
|
defaultKind: defaults.defaultKind,
|
|
@@ -3839,10 +4704,17 @@ async function reviewShowCommand(id) {
|
|
|
3839
4704
|
status("!", warn(`[${v.severity}] ${v.message}`));
|
|
3840
4705
|
}
|
|
3841
4706
|
}
|
|
4707
|
+
if (candidate.provenanceViolations && candidate.provenanceViolations.length > 0) {
|
|
4708
|
+
console.log();
|
|
4709
|
+
header("Provenance violations");
|
|
4710
|
+
for (const v of candidate.provenanceViolations) {
|
|
4711
|
+
status("!", warn(`[${v.severity}] ${v.message}`));
|
|
4712
|
+
}
|
|
4713
|
+
}
|
|
3842
4714
|
}
|
|
3843
4715
|
|
|
3844
4716
|
// src/commands/review-approve.ts
|
|
3845
|
-
import
|
|
4717
|
+
import path32 from "path";
|
|
3846
4718
|
|
|
3847
4719
|
// src/commands/review-helpers.ts
|
|
3848
4720
|
async function runReviewUnderLock(id, underLock) {
|
|
@@ -3874,7 +4746,7 @@ async function approveUnderLock(root, id) {
|
|
|
3874
4746
|
process.exitCode = 1;
|
|
3875
4747
|
return;
|
|
3876
4748
|
}
|
|
3877
|
-
const pagePath =
|
|
4749
|
+
const pagePath = path32.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
|
|
3878
4750
|
await atomicWrite(pagePath, candidate.body);
|
|
3879
4751
|
status("+", success(`Approved \u2192 ${source(pagePath)}`));
|
|
3880
4752
|
await persistCandidateSourceStates(root, candidate);
|
|
@@ -3934,7 +4806,7 @@ import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js
|
|
|
3934
4806
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
3935
4807
|
|
|
3936
4808
|
// src/mcp/tools.ts
|
|
3937
|
-
import
|
|
4809
|
+
import path33 from "path";
|
|
3938
4810
|
import { z } from "zod";
|
|
3939
4811
|
|
|
3940
4812
|
// src/mcp/provider-check.ts
|
|
@@ -4070,7 +4942,7 @@ async function pickSearchSlugs(root, question) {
|
|
|
4070
4942
|
if (candidates.length > 0) return candidates.map((c) => c.slug);
|
|
4071
4943
|
} catch {
|
|
4072
4944
|
}
|
|
4073
|
-
const indexContent = await safeReadFile(
|
|
4945
|
+
const indexContent = await safeReadFile(path33.join(root, INDEX_FILE));
|
|
4074
4946
|
const { pages } = await selectPages(question, indexContent);
|
|
4075
4947
|
return pages;
|
|
4076
4948
|
}
|
|
@@ -4129,8 +5001,8 @@ function registerStatusTool(server, root) {
|
|
|
4129
5001
|
);
|
|
4130
5002
|
}
|
|
4131
5003
|
async function collectStatus(root) {
|
|
4132
|
-
const concepts = await collectPageSummaries(
|
|
4133
|
-
const queries = await collectPageSummaries(
|
|
5004
|
+
const concepts = await collectPageSummaries(path33.join(root, CONCEPTS_DIR));
|
|
5005
|
+
const queries = await collectPageSummaries(path33.join(root, QUERIES_DIR));
|
|
4134
5006
|
const state = await readState(root);
|
|
4135
5007
|
const changes = await detectChanges(root, state);
|
|
4136
5008
|
const orphans = await findOrphanedSlugs(root);
|
|
@@ -4147,7 +5019,7 @@ async function collectStatus(root) {
|
|
|
4147
5019
|
};
|
|
4148
5020
|
}
|
|
4149
5021
|
async function findOrphanedSlugs(root) {
|
|
4150
|
-
const scanned = await scanWikiPages(
|
|
5022
|
+
const scanned = await scanWikiPages(path33.join(root, CONCEPTS_DIR));
|
|
4151
5023
|
return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
|
|
4152
5024
|
}
|
|
4153
5025
|
async function loadPageRecords(root, slugs) {
|
|
@@ -4160,7 +5032,7 @@ async function loadPageRecords(root, slugs) {
|
|
|
4160
5032
|
}
|
|
4161
5033
|
async function readPage(root, slug) {
|
|
4162
5034
|
for (const dir of PAGE_DIRS2) {
|
|
4163
|
-
const content = await safeReadFile(
|
|
5035
|
+
const content = await safeReadFile(path33.join(root, dir, `${slug}.md`));
|
|
4164
5036
|
if (!content) continue;
|
|
4165
5037
|
const { meta, body } = parseFrontmatter(content);
|
|
4166
5038
|
if (meta.orphaned) continue;
|
|
@@ -4175,8 +5047,8 @@ async function readPage(root, slug) {
|
|
|
4175
5047
|
}
|
|
4176
5048
|
|
|
4177
5049
|
// src/mcp/resources.ts
|
|
4178
|
-
import
|
|
4179
|
-
import { readdir as
|
|
5050
|
+
import path34 from "path";
|
|
5051
|
+
import { readdir as readdir11 } from "fs/promises";
|
|
4180
5052
|
import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
4181
5053
|
function jsonContent(uri, payload) {
|
|
4182
5054
|
return {
|
|
@@ -4209,7 +5081,7 @@ function registerIndexResource(server, root) {
|
|
|
4209
5081
|
mimeType: "text/markdown"
|
|
4210
5082
|
},
|
|
4211
5083
|
async (uri) => {
|
|
4212
|
-
const content = await safeReadFile(
|
|
5084
|
+
const content = await safeReadFile(path34.join(root, INDEX_FILE));
|
|
4213
5085
|
return { contents: [markdownContent(uri, content)] };
|
|
4214
5086
|
}
|
|
4215
5087
|
);
|
|
@@ -4276,23 +5148,23 @@ function registerQueryResource(server, root) {
|
|
|
4276
5148
|
);
|
|
4277
5149
|
}
|
|
4278
5150
|
async function listSources(root) {
|
|
4279
|
-
const sourcesPath =
|
|
5151
|
+
const sourcesPath = path34.join(root, SOURCES_DIR);
|
|
4280
5152
|
let files;
|
|
4281
5153
|
try {
|
|
4282
|
-
files = await
|
|
5154
|
+
files = await readdir11(sourcesPath);
|
|
4283
5155
|
} catch {
|
|
4284
5156
|
return [];
|
|
4285
5157
|
}
|
|
4286
5158
|
const records = [];
|
|
4287
5159
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
4288
|
-
const content = await safeReadFile(
|
|
5160
|
+
const content = await safeReadFile(path34.join(sourcesPath, file));
|
|
4289
5161
|
const { meta } = parseFrontmatter(content);
|
|
4290
5162
|
records.push({ filename: file, ...meta });
|
|
4291
5163
|
}
|
|
4292
5164
|
return records;
|
|
4293
5165
|
}
|
|
4294
5166
|
async function loadPageWithMeta(root, dir, slug) {
|
|
4295
|
-
const filePath =
|
|
5167
|
+
const filePath = path34.join(root, dir, `${slug}.md`);
|
|
4296
5168
|
const content = await safeReadFile(filePath);
|
|
4297
5169
|
if (!content) {
|
|
4298
5170
|
throw new Error(`Page not found: ${dir}/${slug}.md`);
|
|
@@ -4301,10 +5173,10 @@ async function loadPageWithMeta(root, dir, slug) {
|
|
|
4301
5173
|
return { slug, meta, body: body.trim() };
|
|
4302
5174
|
}
|
|
4303
5175
|
async function listPagesUnder(root, dir, scheme) {
|
|
4304
|
-
const pagesPath =
|
|
5176
|
+
const pagesPath = path34.join(root, dir);
|
|
4305
5177
|
let files;
|
|
4306
5178
|
try {
|
|
4307
|
-
files = await
|
|
5179
|
+
files = await readdir11(pagesPath);
|
|
4308
5180
|
} catch {
|
|
4309
5181
|
return { resources: [] };
|
|
4310
5182
|
}
|
|
@@ -4328,8 +5200,8 @@ async function startMCPServer(options) {
|
|
|
4328
5200
|
}
|
|
4329
5201
|
|
|
4330
5202
|
// src/cli.ts
|
|
4331
|
-
var
|
|
4332
|
-
var { version } =
|
|
5203
|
+
var require3 = createRequire2(import.meta.url);
|
|
5204
|
+
var { version } = require3("../package.json");
|
|
4333
5205
|
var program = new Command();
|
|
4334
5206
|
program.name("llmwiki").description("The knowledge compiler \u2014 raw sources in, interlinked wiki out").version(version);
|
|
4335
5207
|
program.command("ingest <source>").description("Ingest a URL or local file into sources/").action(async (source2) => {
|
|
@@ -4340,11 +5212,23 @@ program.command("ingest <source>").description("Ingest a URL or local file into
|
|
|
4340
5212
|
process.exit(1);
|
|
4341
5213
|
}
|
|
4342
5214
|
});
|
|
5215
|
+
program.command("ingest-session <path>").description("Ingest a coding-agent session export (Claude, Codex, Cursor) into sources/").action(async (targetPath) => {
|
|
5216
|
+
try {
|
|
5217
|
+
await ingestSession(targetPath);
|
|
5218
|
+
} catch (err) {
|
|
5219
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
5220
|
+
process.exit(1);
|
|
5221
|
+
}
|
|
5222
|
+
});
|
|
4343
5223
|
program.command("compile").description("Compile sources/ into an interlinked wiki").option(
|
|
4344
5224
|
"--review",
|
|
4345
5225
|
"Write generated pages as review candidates under .llmwiki/candidates/ instead of mutating wiki/. Orphan-marking for deleted sources is deferred until the next non-review compile."
|
|
5226
|
+
).option(
|
|
5227
|
+
"--lang <code>",
|
|
5228
|
+
'Target language for generated wiki content (e.g. "Chinese", "ja", "zh-CN"). Equivalent to setting LLMWIKI_OUTPUT_LANG.'
|
|
4346
5229
|
).action(async (options) => {
|
|
4347
5230
|
try {
|
|
5231
|
+
applyLanguageOption(options.lang);
|
|
4348
5232
|
requireProvider();
|
|
4349
5233
|
await compileCommand({ review: options.review });
|
|
4350
5234
|
} catch (err) {
|
|
@@ -4385,15 +5269,21 @@ reviewCommand.command("reject <id>").description("Reject a candidate and archive
|
|
|
4385
5269
|
process.exit(1);
|
|
4386
5270
|
}
|
|
4387
5271
|
});
|
|
4388
|
-
program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").option("--debug", "Print which pages and chunks were selected and their scores").
|
|
4389
|
-
|
|
4390
|
-
|
|
4391
|
-
|
|
4392
|
-
|
|
4393
|
-
|
|
4394
|
-
|
|
5272
|
+
program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").option("--debug", "Print which pages and chunks were selected and their scores").option(
|
|
5273
|
+
"--lang <code>",
|
|
5274
|
+
'Target language for the answer (e.g. "Chinese", "ja", "zh-CN"). Equivalent to setting LLMWIKI_OUTPUT_LANG.'
|
|
5275
|
+
).action(
|
|
5276
|
+
async (question, options) => {
|
|
5277
|
+
try {
|
|
5278
|
+
applyLanguageOption(options.lang);
|
|
5279
|
+
requireProvider();
|
|
5280
|
+
await queryCommand(process.cwd(), question, options);
|
|
5281
|
+
} catch (err) {
|
|
5282
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
5283
|
+
process.exit(1);
|
|
5284
|
+
}
|
|
4395
5285
|
}
|
|
4396
|
-
|
|
5286
|
+
);
|
|
4397
5287
|
program.command("watch").description("Watch sources/ and auto-recompile on changes").action(async () => {
|
|
4398
5288
|
try {
|
|
4399
5289
|
requireProvider();
|
|
@@ -4428,6 +5318,17 @@ schemaCmd.command("show").description("Print the resolved schema for this projec
|
|
|
4428
5318
|
process.exit(1);
|
|
4429
5319
|
}
|
|
4430
5320
|
});
|
|
5321
|
+
program.command("export").description("Export wiki content to portable formats (llms.txt, JSON, GraphML, Marp, \u2026)").option("--target <name>", "Limit export to a single target format").option(
|
|
5322
|
+
"--source <kind>",
|
|
5323
|
+
"For marp target: which pages to include \u2014 concepts, queries, or all (default: all)"
|
|
5324
|
+
).action(async (options) => {
|
|
5325
|
+
try {
|
|
5326
|
+
await exportCommand(process.cwd(), options);
|
|
5327
|
+
} catch (err) {
|
|
5328
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
5329
|
+
process.exit(1);
|
|
5330
|
+
}
|
|
5331
|
+
});
|
|
4431
5332
|
program.command("serve").description("Start an MCP server exposing wiki tools and resources over stdio").option("--root <dir>", "Project root directory", process.cwd()).action(async (options) => {
|
|
4432
5333
|
try {
|
|
4433
5334
|
await startMCPServer({ root: options.root, version });
|
|
@@ -4436,6 +5337,11 @@ program.command("serve").description("Start an MCP server exposing wiki tools an
|
|
|
4436
5337
|
process.exit(1);
|
|
4437
5338
|
}
|
|
4438
5339
|
});
|
|
5340
|
+
function applyLanguageOption(lang) {
|
|
5341
|
+
if (lang && lang.trim().length > 0) {
|
|
5342
|
+
process.env.LLMWIKI_OUTPUT_LANG = lang.trim();
|
|
5343
|
+
}
|
|
5344
|
+
}
|
|
4439
5345
|
var PROVIDER_KEY_VARS2 = {
|
|
4440
5346
|
anthropic: "ANTHROPIC_API_KEY",
|
|
4441
5347
|
openai: "OPENAI_API_KEY",
|